-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplot_trends_design.R
156 lines (154 loc) · 7.2 KB
/
plot_trends_design.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
################################################################################
#This is based off of plot_trend_ducks_dev.R, but for ACP
# include it here just for dev purposes
##Now lets loop through all species and write figures and data to a directory
#will try to make this a function at some point
library(AKaerial)
library(tidyverse)
library(mgcv)
#write error and messages to log file
zz <- file("plot_trends/data/log.txt", open="wt")
sink(zz ,type = "output")
sink(zz, type = "message")
path = NULL
exclude.spp = c("SWANN", "UNGR", "UNME", "UNEI", "BAEA", "COME")
spp = "ARTE"
area = "ACP"
override = NULL
if(area == "ACP") data <- ACPHistoric$output.table
if(area == "YKD") data <- YKDHistoric$output.table
df <- data |>
filter(! Species %in% c("START", "END", exclude.spp)) |>
mutate(Species = factor(Species))
#start loop
for(spp in levels(df$Species)){
print(spp)
#select species and do sims
#spp <- "SPEI"
if(is.null(override)){
what = ifelse(spp %in% c("SNGO", "SWAN", "UNSC", "RNGR", "JAEG", "SAGU", "GLGU",
"ARTE", "RTLO", "PALO", "YBLO", "GOEA", "SEOW", "SNOW",
"CORA"), "total.est",
ifelse(spp %in% c("GWFG", "BRAN", "CCGO", "NSHO", "AMWI", "MALL",
"NOPI", "GWTE", "STEI", "SPEI", "KIEI", "COEI",
"SUSC", "WWSC", "BLSC", "LTDU", "RBME", "SACR"),
"itotal.est", "ibbtotal.est"))
}
se.what = ifelse(what == "ibbtotal.est", "SE.ibb",
ifelse(what == "total.est", "SE",
ifelse(what == "itotal.est", "SE.i", NULL)))
sppdf <- df |> filter(Species == spp) |>
mutate(index = .data[[what]], se.index = .data[[se.what]]) |>
mutate(index = units::drop_units(index), cv = se.index/index) |>
select(Year, Species, Observer, index, se.index, cv) |>
drop_na() |>
mutate(upper = index + 2*se.index, lower = index - 2*se.index,
Observer = factor(Observer))
yearobs <- length(unique(sppdf$Year))
NumObs <- length(unique(sppdf$Observer))
kyear <- ifelse(length(sppdf$Year) > yearobs - 1 + NumObs,
yearobs - 1, yearobs - 1 - NumObs)
# print(data.frame(Species = spp, yearobs = yearobs, NumObs = NumObs,
# kyear = kyear, nobs = dim(sppdf)[1]))
if(kyear <= 0 | NumObs + kyear > dim(sppdf)[1] ){
message(paste("Not enough data to fit to", spp))
next()
}
if(kyear < yearobs -1) message(paste("Number of temporal knots for", spp,"restricted to", kyear))
nreps = 300
results <- w <- vector("list", nreps)
newdf <- sppdf |> mutate(Observer = "HMW") |> select(Year, Species, Observer) |>
distinct()
for(i in 1:nreps){
#need to catch NaNs for rare or poorly estimated species
sppdf$est <- log(rnorm(dim(sppdf)[1], mean = sppdf$index, sd = sppdf$se.index))
sppdf$est <- if_else(is.na(sppdf$est), 1e-10, sppdf$est)
bootfit <- gam(est ~ s(Year, bs = "tp", k = kyear) +
s(Observer, bs = "re"),
method = "REML", data = sppdf, family = scat)
#sample from posterior
b <- rmvn(1, coef(bootfit), vcov(bootfit))
Xp <- predict(bootfit, type="lpmatrix", newdata=newdf, exclude = "s(Observer)",
newdata.guaranteed=TRUE)
# w[[i]] <- warnings()
# print(paste("iteration", i))
# print(paste("Number of iterations for fit", bootfit$iter))
# print(warnings())
results[[i]] <- exp(Xp%*%b)
}
names(results) <- paste0(rep("rep", length(results)), 1:nreps)
repdf <- cbind(newdf, as.data.frame(results)) |>
pivot_longer(4:(4+nreps-1), names_to = "Rep", values_to = "results")
# ggplot(data = repdf) +
# geom_line(aes(x = Year, y = results, group = Rep), alpha = 0.25) +
# geom_pointrange(data = sppdf, aes(x = Year, y = index, ymin = lower, ymax = upper,
# col = Observer))
#fit at point estimate to compare
# sppdf$logest <- log(sppdf$index)
# fit1 <- gam(logest ~ s(Year, bs = "tp", k = kyear) +
# s(Observer, bs = "re"),
# method = "REML", data = sppdf, family = scat)
# preds <- predict(fit1, newdata = newdf, exclude = "s(Observer)", se.fit = TRUE)
# fitdf <- cbind(newdf, preds) |> mutate(upper = fit + 2*se.fit, lower = fit - 2*se.fit) |>
# mutate(fit = exp(fit), upper = exp(upper), lower = exp(lower))
# ggplot() +
# geom_ribbon(data = fitdf, aes(x=Year, ymin=lower, ymax = upper, fill = "orange")) +
# geom_line(data = repdf, aes(x = Year, y = results, group = Rep), alpha = 0.25) +
# geom_line(data = fitdf, aes(x = Year, y = fit), col = "white", lwd = 1) +
# geom_pointrange(data = sppdf, aes(x = Year, y = index, ymin = lower, ymax = upper,
# col = Observer))
#Now use results to find mean and SD at each time point
sumdf <- repdf |> group_by(Year) |>
summarise(mean = mean(results), sd = sd(results)) |>
mutate(index = what, upper = mean + 2*sd, lower = mean - 2*sd)
write_csv(sumdf, file = paste0(path, "plot_trends/data/", spp, "_bootfit.csv"))
p1 <- ggplot() +
geom_ribbon(data = sumdf, aes(x=Year, ymin=lower, ymax = upper, fill = "orange"),
show.legend = FALSE) +
geom_line(data = repdf, aes(x = Year, y = results, group = Rep), alpha = 0.25) +
geom_line(data = sumdf, aes(x = Year, y = mean), col = "white", lwd = 1) +
geom_pointrange(data = sppdf, aes(x = Year, y = index, ymin = lower, ymax = upper,
col = Observer), show.legend = FALSE) +
labs(title = paste(spp, "on", area), y = what)
png(filename = paste0(path, "plot_trends/figures/", spp, "_index.png"))
print(p1)
dev.off()
#Now make a data frame of the posterior "trend" for each lag time period
#here x-year trend is measured as [N_a/N_b]^(1/(a - b)), where a is the current year
# and b is a any other year, b < a
trendDF <- data.frame(NULL)
for(i in 1:(length(unique(repdf$Year)) - 1)){
temp <- repdf |> arrange(Rep, Year) |> group_by(Rep) |>
mutate(Lag = i,
temp = lag(results, i, order_by = Year),
Trend = (results/temp)^(1/Lag)) |>
filter(Year == max(repdf$Year)) |>
ungroup() |>
select(Year, Species, Lag, Trend)
trendDF <- rbind(trendDF, temp)
}
#hist(filter(trendDF, Lag == 35)$Trend)
#summarize and plot
sumTrend <- trendDF |>
mutate(P = Trend < 1) |>
group_by(Lag) |>
summarise(index = what, mean = mean(Trend),
median = quantile(Trend, probs = 0.5),
lower = quantile(Trend, probs = 0.025),
upper = quantile(Trend, probs = 0.975),
P = mean(P))
write_csv(sumTrend, file = paste0(path, "plot_trends/data/", spp, "_trend.csv"))
p2 <- ggplot(data = sumTrend) +
geom_ribbon(aes(x=Lag, ymin=lower, ymax = upper), fill = "orange") +
geom_line(aes(x = Lag, y = median), col = "darkgray", lwd = 1) +
geom_line(aes(x = Lag, y = mean), col = "red", lwd = 1) +
geom_hline(yintercept = 1) +
labs(y = "Growth rate", title = paste(spp, "on", area))
png(filename = paste0(path, "plot_trends/figures/", spp, "_trend.png"))
print(p2)
dev.off()
}
print(warnings())
sink(type = "output")
sink(type="message")
close(zz)