Required packages
library(WQM)
library(MBC)
#> Loading required package: Matrix
#> Loading required package: energy
#> Loading required package: FNN
library(WaveletComp)
library(tidyr)
#>
#> Attaching package: 'tidyr'
#> The following objects are masked from 'package:Matrix':
#>
#> expand, pack, unpack
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
library(scales)
library(ggplot2)
#>
#> Attaching package: 'ggplot2'
#> The following object is masked from 'package:WaveletComp':
#>
#> arrow
Load data
data(NWP.rain)
summary(NWP.rain)
#> Lead Date Station mod
#> Min. :1 Min. :2018-03-20 01:00:00.00 047058 : 5114 Min. : 0.00
#> 1st Qu.:1 1st Qu.:2019-02-02 13:00:00.00 073019 : 5114 1st Qu.: 0.00
#> Median :1 Median :2019-12-21 16:00:00.00 201001 : 5114 Median : 0.00
#> Mean :1 Mean :2019-12-22 03:58:39.04 203005 : 5114 Mean : 0.26
#> 3rd Qu.:1 3rd Qu.:2020-11-08 01:00:00.00 203010 : 5114 3rd Qu.: 0.00
#> Max. :1 Max. :2021-09-25 07:00:00.00 203030 : 5114 Max. :132.37
#> (Other):787556 NA's :39658
#> obs
#> Min. : 0.0000
#> 1st Qu.: 0.0000
#> Median : 0.0000
#> Mean : 0.0733
#> 3rd Qu.: 0.0000
#> Max. :64.4000
#> NA's :2509
station.no <- levels(NWP.rain$Station)
Ind.samp <- c(34,75)[1]; Ind.samp
#> [1] 34
station.samp <- station.no[Ind.samp]; station.samp
#> [1] "210018"
station.samp
#> [1] "210018"
NWP.rain.h <- NWP.rain %>% na.omit() %>% subset(Station==station.samp)
summary(NWP.rain.h)
#> Lead Date Station mod
#> Min. :1 Min. :2018-03-20 01:00:00.00 210018 :5113 Min. : 0.0000
#> 1st Qu.:1 1st Qu.:2019-02-02 13:00:00.00 047058 : 0 1st Qu.: 0.0000
#> Median :1 Median :2019-12-21 13:00:00.00 073019 : 0 Median : 0.0000
#> Mean :1 Mean :2019-12-22 02:07:47.50 201001 : 0 Mean : 0.2541
#> 3rd Qu.:1 3rd Qu.:2020-11-07 19:00:00.00 203005 : 0 3rd Qu.: 0.0000
#> Max. :1 Max. :2021-09-25 07:00:00.00 203010 : 0 Max. :80.1094
#> (Other): 0
#> obs
#> Min. :0.00000
#> 1st Qu.:0.00000
#> Median :0.00000
#> Mean :0.06505
#> 3rd Qu.:0.00000
#> Max. :9.80000
#>
#obs overview
plot(NWP.rain.h$obs, type="l",col=1, ylab="P", xlab=NA)
lines(NWP.rain.h$mod, type="l", col=2)
Amplitudes and Phases
# CWT plot----
# selected decomposition level to plot
level.samp <- c(seq(1,ncol(modulus.o), by=1));level.samp
#> [1] 1 2 3 4 5 6 7 8 9
df.cwt <- data.frame(No=data[[l]]$Date[subset],
obs=data[[l]]$obs[subset],modulus.o) %>%
gather(Group, Value,2:(ncol(modulus.o)+2))
df.cwt_sub <- subset(df.cwt, Group %in% c("obs",paste0("X",level.samp)))
summary(factor(df.cwt_sub$Group))
#> X1 X2 X3 X4 X5 X6 X7 X8 X9 obs
#> 2557 2557 2557 2557 2557 2557 2557 2557 2557 2557
##observations----
p0 <- ggplot(subset(df.cwt_sub,Group=="obs"), aes(x=No, y=Value)) +
geom_line()+
#facet_grid(Group~., switch="both") +
#facet_wrap(Group~., strip.position = "left", ncol=1, labeller = label_parsed) +
scale_y_continuous(breaks = pretty_breaks(n = 4)) +
scale_x_datetime(date_breaks="3 month", date_labels ="%Y-%m", expand = c(0,0)) +
#scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(y="Precipitation(mm/h)") +
theme_bw() +
theme(text = element_text(size = 16),
plot.margin = unit(c(0.5,1,0.5, 1), "cm"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
strip.text.y = element_text(size=16),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
strip.background = element_blank(),
strip.placement = "outside",
axis.title.x = element_blank(),
#axis.title.y = element_blank()
)
p0
##amplitudes of observations----
df.cwt_sub$Group <- factor(df.cwt_sub$Group, labels=c("obs",paste('italic(s)[',level.samp-1,']',sep = '')))
p1 <- ggplot(subset(df.cwt_sub,Group!="obs"), aes(x=No, y=Value)) +
geom_line()+
#facet_grid(Group~., switch="both") +
facet_wrap(Group~., strip.position = "left", ncol=1, labeller = label_parsed) +
scale_y_continuous(breaks = pretty_breaks(n = 4)) +
scale_x_datetime(date_breaks="3 month", date_labels ="%Y-%m",expand = c(0,0)) +
#scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(y="Amplitudes", color="temp") +
theme_bw() +
theme(text = element_text(size = 16),
plot.margin = unit(c(0.2,0.4,0.5, 0), "cm"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
strip.text.y = element_text(size=16),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
strip.background = element_blank(),
strip.placement = "outside",
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = -2, b = 0, l = 0))
#axis.title.y = element_blank()
)
p1 %>% print()
##phases of observations----
df.cwt <- data.frame(No=data[[l]]$Date[subset],
obs=data[[l]]$obs[subset],phases.o) %>%
gather(Group, Value,2:(ncol(modulus.o)+2))
df.cwt_sub <- subset(df.cwt, Group %in% c("obs",paste0("X",level.samp)))
summary(factor(df.cwt_sub$Group))
#> X1 X2 X3 X4 X5 X6 X7 X8 X9 obs
#> 2557 2557 2557 2557 2557 2557 2557 2557 2557 2557
df.cwt_sub$Group <- factor(df.cwt_sub$Group, labels=c("obs",paste('italic(s)[',level.samp-1,']',sep = '')))
p2 <- ggplot(subset(df.cwt_sub,Group!="obs"), aes(x=No, y=Value)) +
geom_line()+
#facet_grid(Group~., switch="both") +
facet_wrap(Group~., strip.position = "left", ncol=1, labeller = label_parsed) +
scale_y_continuous(limits=c(-pi,pi),breaks = pretty_breaks(n = 4)) +
#scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_datetime(date_breaks="3 month", date_labels ="%Y-%m",expand = c(0,0)) +
labs(y="Phases",color="temp") +
theme_bw() +
theme(text = element_text(size = 16),
plot.margin = unit(c(0.2,0.4,0.5, 0), "cm"),
panel.border = element_rect(color = 'black'),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
strip.text.y = element_text(size=16),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
strip.background = element_blank(),
strip.placement = "outside",
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = -2, b = 0, l = 0))
#axis.title.y = element_blank()
)
p2 %>% print()
Bias correction
if(QM %like% "MBC"){
modulus.tmp <- do.call(QM,list(o.c=modulus.o, m.c=modulus.m,
m.p=modulus.p, ratio.seq=rep(TRUE, ncol(modulus.m)), #trace=TRUE,
silent=TRUE,n.tau=100
))
modulus.bcc <- modulus.tmp$mhat.c
modulus.bcf <- modulus.tmp$mhat.p
} else if(QM=="MRS") {
modulus.tmp <- do.call(QM,list(o.c=modulus.o, m.c=modulus.m, m.p=modulus.p))
modulus.bcc <- modulus.tmp$mhat.c
modulus.bcf <- modulus.tmp$mhat.p
} else if(QM=="QDM") {
#cat("QDM with ratio=T \n")
modulus.tmp <- lapply(1:ncol(modulus.o), function(i)
QDM(o.c=modulus.o[,i], m.c=modulus.m[,i], m.p=modulus.p[,i], ratio=FALSE))
modulus.bcc <- sapply(modulus.tmp, function(ls) ls$mhat.c)
modulus.bcf <- sapply(modulus.tmp, function(ls) ls$mhat.p)
}
sum(modulus.bcc<0)
#> [1] 0
sum(modulus.bcf<0)
#> [1] 28
modulus.bcf[modulus.bcf<0] <-0
modulus.bcc[modulus.bcc<0] <-0
phases.tmp <- lapply(1:ncol(phases.o), function(i)
QDM(o.c=phases.o[,i], m.c=phases.m[,i], m.p=phases.p[,i]))
phases.bcc <- sapply(phases.tmp, function(ls) ls$mhat.c)
phases.bcf <- sapply(phases.tmp, function(ls) ls$mhat.p)
## modulus----
df.modulus <- rbind(data.frame(mod="obs",no=data[[l]]$Date[subset],modulus.o),
data.frame(mod="cur",no=data[[l]]$Date[subset],modulus.m),
data.frame(mod="bcc",no=data[[l]]$Date[subset],modulus.bcc)) %>%
gather(lev, amp, 3:((ncol(modulus.o)+2))) #%>% mutate(amp=as.numeric(amp), subset=as.numeric(subset))
summary(df.modulus$mod)
#> Length Class Mode
#> 69039 character character
df.modulus$mod <- factor(df.modulus$mod, levels = c("obs","cur","bcc"),
labels = c("obs"="Observed","mod"="Raw","QM"))
df.modulus_sub <- subset(df.modulus, lev %in% c(paste0("X",level.samp)))
summary(factor(df.modulus_sub$lev))
#> X1 X2 X3 X4 X5 X6 X7 X8 X9
#> 7671 7671 7671 7671 7671 7671 7671 7671 7671
df.modulus_sub$lev <- factor(df.modulus_sub$lev, labels=c(paste('italic(s)[',level.samp-1,']',sep = '')))
p3 <- ggplot(df.modulus_sub, aes(x=no, y=amp, color=mod)) +
geom_line(linewidth=0.5)+
#facet_grid(Group~., switch="both") +
facet_wrap(lev~., strip.position = "left", ncol=1, labeller = label_parsed) +
scale_color_manual(values=c("black","red","blue"))+
#scale_y_continuous(limits=c(-pi,pi),breaks = scales::pretty_breaks(n = 3)) +
#scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_datetime(date_breaks="3 month", date_labels ="%Y-%m", expand = c(0,0)) +
labs(color=NULL, x="Date") +
theme_bw() +
theme(text = element_text(size = 12),
plot.margin = unit(c(0.2,0.1,0.5, 0), "cm"),
panel.border = element_rect(color = 'black'),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
strip.text.y = element_text(size=16),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
#legend.position = 'none',
legend.position = c(0.68,0.98),
legend.direction = "horizontal",
legend.background = element_rect(fill = "transparent"),
strip.background = element_blank(),
strip.placement = "outside",
#axis.title.x = element_blank(),
axis.title.y = element_blank()
)
#> Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
#> 3.5.0.
#> ℹ Please use the `legend.position.inside` argument of `theme()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
summary(df.modulus_sub)
#> mod no lev
#> Observed:23013 Min. :2018-03-20 01:00:00.00 italic(s)[0]: 7671
#> Raw :23013 1st Qu.:2018-08-26 19:00:00.00 italic(s)[1]: 7671
#> QM :23013 Median :2019-02-02 13:00:00.00 italic(s)[2]: 7671
#> Mean :2019-02-02 23:27:24.43 italic(s)[3]: 7671
#> 3rd Qu.:2019-07-12 19:00:00.00 italic(s)[4]: 7671
#> Max. :2019-12-21 13:00:00.00 italic(s)[5]: 7671
#> (Other) :23013
#> amp
#> Min. : 0.0000
#> 1st Qu.: 0.0811
#> Median : 0.4642
#> Mean : 0.6524
#> 3rd Qu.: 0.9022
#> Max. :14.8106
#>
p4 <- ggplot(df.modulus_sub) +
stat_ecdf(aes(x=amp, color=mod, size=mod), geom = "step",pad = TRUE) +
facet_wrap(lev~., strip.position = "left", ncol=1, labeller = label_parsed)+
scale_x_continuous(trans="log2",limits = c(1/2^10,16), breaks=c(0.001,0.05,1,6)) +
#scale_x_continuous(trans="sqrt",limits = c(0,4), breaks=c(0,1,2,4)) +
scale_color_manual(values=c("black","red","blue"))+
scale_size_manual(values=c(1,0.5,0.5))+
#guides(color=guide_legend(override.aes = list(size=5))) +
labs(color=NULL, size=NULL, x="log2") +
theme_bw() +
theme(text = element_text(size = 12),
plot.margin = unit(c(0.2,0.1,0.5, 0), "cm"),
panel.border = element_rect(color = 'black'),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
strip.text.y = element_text(size=16),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
legend.position = 'none',
strip.background = element_blank(),
strip.placement = "outside",
#axis.title.x = element_blank(),
axis.title.y = element_blank()
)
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
p4
#> Warning: Removed 6195 rows containing non-finite outside the scale range
#> (`stat_ecdf()`).
Phase Randomization
# Phase random - validation same for calibration
noise_mat <- list()
for (r in 1:num.sim) {
data.obs <- as.vector(sapply(data, function(ls) ls$obs[subset]))
ts_wn <- sample(data.obs, size=length(data[[1]]$obs[-subset]), replace=T)
wt_noise <- t(WaveletComp::WaveletTransform(x=ts_wn,dt=dt,dj=dj)$Wave)
noise_mat[[r]] <- as.matrix(wt_noise)
}
phases.rand <- lapply(1:num.sim, function(r) {
tmp <- Arg(noise_mat[[r]])
ord.phases <- apply(phases.p, 2, order)
#ord.phases <- apply(phases.p, 2, order)
tmp.rank <- apply(tmp, 2, sort)
tmp.n <- sapply(1:ncol(tmp), function(ii) {tmp[ord.phases[,ii],ii] <- tmp.rank[,ii];
return(tmp[,ii])})
return(tmp.n)
})
## bcf----
mat_new_val <- matrix(complex(modulus=modulus.bcf,argument=phases.p),ncol=ncol(phases.p))
rec_val <- fun_icwt(x.wave=mat_new_val,dt=dt,dj=dj)
if(variable=="prep") rec_val[rec_val<=theta] <- 0
### apply wavelet reconstruction to randomized signal----
mat_val_r <- prsim(modulus.bcf, phases.p, noise_mat)
data_sim_val <- sapply(1:num.sim, function(r) fun_icwt(x.wave=mat_val_r[[r]], dt=dt, dj=dj))
if(variable=="prep") data_sim_val[data_sim_val<=theta] <- 0
colnames(data_sim_val) <- paste0("r",seq(1:num.sim))
data.val <- data[[l]][-subset,]
data.val$bcf <- rec_val
data.val <- data.frame(data.val, data_sim_val)
summary(data.val)
#> Lead Date Station mod
#> Min. :1 Min. :2019-12-21 19:00:00.00 210018 :2556 Min. : 0.0000
#> 1st Qu.:1 1st Qu.:2020-05-31 23:30:00.00 047058 : 0 1st Qu.: 0.0000
#> Median :1 Median :2020-11-07 22:00:00.00 073019 : 0 Median : 0.0000
#> Mean :1 Mean :2020-11-08 07:49:38.86 201001 : 0 Mean : 0.3163
#> 3rd Qu.:1 3rd Qu.:2021-04-18 14:30:00.00 203005 : 0 3rd Qu.: 0.0000
#> Max. :1 Max. :2021-09-25 07:00:00.00 203010 : 0 Max. :32.3750
#> (Other): 0
#> obs index bcf r1
#> Min. :0.00000 Min. : 1.0 Min. :0.0000 Min. :0.0000
#> 1st Qu.:0.00000 1st Qu.: 82.0 1st Qu.:0.0000 1st Qu.:0.0000
#> Median :0.00000 Median :162.5 Median :0.0000 Median :0.0000
#> Mean :0.08904 Mean :166.7 Mean :0.1721 Mean :0.1372
#> 3rd Qu.:0.00000 3rd Qu.:242.2 3rd Qu.:0.0000 3rd Qu.:0.0000
#> Max. :9.80000 Max. :366.0 Max. :5.9194 Max. :5.8685
#>
#> r2 r3 r4 r5
#> Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
#> 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
#> Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
#> Mean :0.1429 Mean :0.1388 Mean :0.1471 Mean :0.1638
#> 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
#> Max. :5.6116 Max. :4.9396 Max. :5.8413 Max. :5.9287
#>
Compare
df_fut <- cbind(data.val[,c('Date',"obs","mod","bcf",paste0("r",1:num.sim))],qm=data.bcf) %>%
gather(Group, Value,2:(4+num.sim+1))
summary(factor(df_fut$Group))
#> bcf mod obs qm r1 r2 r3 r4 r5
#> 2556 2556 2556 2556 2556 2556 2556 2556 2556
summary(df_fut)
#> Date Group Value
#> Min. :2019-12-21 19:00:00.00 Length:23004 Min. : 0.0000
#> 1st Qu.:2020-05-31 23:30:00.00 Class :character 1st Qu.: 0.0000
#> Median :2020-11-07 22:00:00.00 Mode :character Median : 0.0000
#> Mean :2020-11-08 07:49:38.86 Mean : 0.1521
#> 3rd Qu.:2021-04-18 14:30:00.00 3rd Qu.: 0.0000
#> Max. :2021-09-25 07:00:00.00 Max. :32.3750
p5 <- ggplot(data=df_fut,aes(x=Value,color = Group, size=Group)) +
stat_ecdf(aes(color = Group, size=Group), geom = "step", pad = FALSE) +
#stat_density(geom='line', position='identity') +
#facet_grid(W~Station, scales = "free") + #, labeller = labeller(Grid = Predictor.labs)) +
# scale_color_manual(values=c("black","red","green","blue"))+#,
# scale_size_manual(values=c(1,1,0.5,0.5))+
scale_color_manual(values = c("Black",alpha("Red",0.6),alpha("green",0.6),
alpha("blue",0.6), rep('grey',5))) +
scale_size_manual(values=c(1,1,0.5,0.5, rep(0.1,5))) +
#labels=c("obs","mod.c","mod.bcc")) +
scale_x_continuous(limits=c(0, 20), breaks = pretty_breaks(n = 3)) +
#labs(color=paste0(variable.ncep[i], "-level: ",level[j])) +
guides(color=guide_legend(override.aes = list(alpha=1)),
size=guide_legend(override.aes = list(size=1))) +
theme_bw() +
theme(text = element_text(size = 16),
plot.margin = unit(c(1,1,0.5, 1), "cm"),
# panel.grid.minor = element_blank(),
# panel.grid.major = element_blank(),
# axis.text.y = element_text(angle = 90, hjust=0.5, size=12),
# axis.title.x = element_text(size=16),
# axis.title.y = element_text(size=16),
legend.title=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
p5
#> Warning: Removed 4 rows containing non-finite outside the scale range
#> (`stat_ecdf()`).