-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSMSCG Zoop Script for DWR Reports_2022.R
342 lines (216 loc) · 12.5 KB
/
SMSCG Zoop Script for DWR Reports_2022.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
## Script dedicated to updating the SMSCG zoop graphs and analysis for the Summer- Fall Action Report each year
#Nick's quick update to plot 2021 data
library(tidyverse)
library(readxl)
library(ggthemes)
library(ggplot2)
library(car)
#read in data-----------------
#read in CPUE data from the ftp site. This also includes EMP data for the specified stations in the SMSCG footprint
#found path to data by right clicking file in the project file, saying import dataset, and copying path
#SMSCG_CPUE <- read.csv("D:/Data/Suisun Marsh Salinity Gate Project/2021 Report/SMSCG Zoop 2021 Report/Data/SMSCG_CBNet_2018to2020CPUE_16Oct2021.csv")
#read in file to convert to biomass
#zoop_biomass <- read.csv("D:/Data/Suisun Marsh Salinity Gate Project/2021 Report/SMSCG Zoop 2021 Report/Data/Copepod and Cladoceran Biomass Values.csv")
#read in zoop abundance data from SMSCG data package on EDI
#already includes 2021 data
SMSCG_CPUE <- read_csv("https://portal.edirepository.org/nis/dataviewer?packageid=edi.876.4&entityid=98795b108d5c1b34ebce4d8567a962d1")
#read in mass data from GitHub
#Note: there is a newer version of mass data; two files on the GitHub repo
#zoop_Biomass conversions_CEB Updated 2021.csv
#zoop_Copepod and Cladoceran Biomass Values.csv
zoop_biomass <- read_csv("./Data/zoop_Copepod and Cladoceran Biomass Values.csv")
#Editing file------
#filter the CPUE file so it only has the stations we're interested in
#look at all stations names in data set
unique(SMSCG_CPUE$Station)
stations <- data.frame(Station = as.character(c(602, "Grizz", "NZ028", 519, "Honk", 605, 606, "NZ032", "NZS42", 609, 610, "Mont", 508, 513, 520, 801, 802, 704, 706, "NZ054", "NZ060", "NZ064")),
Region = c(rep("Grizzly", 3), rep("Honker", 2), rep("West Marsh", 4), rep("East Marsh", 3), rep("River", 10)))
stations <- mutate(stations, Region = factor(Region, levels = c("Grizzly", "Honker", "West Marsh", "East Marsh", "River")))
#look at df structure
glimpse(stations)
#edited file with regions designated and added to csv
SMSCG_CPUE2 <- filter(merge(select(SMSCG_CPUE, -Region), stations, by = "Station"), Year %in% c(2018, 2019, 2020,2021), Month == 7 | Month ==8 | Month==9| Month==10) %>%
arrange(Year,Month,Station)
#Converting to BPUE------
Zoop_BPUE <- pivot_longer(SMSCG_CPUE2, cols = ACARTELA:CUMAC,
names_to = "taxon", values_to = "CPUE") %>%
left_join(zoop_biomass) %>%
mutate(BPUE = CPUE*mass_indiv_ug) %>%
filter(!is.na(BPUE)) %>%
pivot_wider(id_cols = c(Station:Volume, Region), names_from = taxon, values_from = BPUE)
#Pick out just the columns we are interested in.
#look at all taxon names
names(Zoop_BPUE)
zoop_BPUE2= mutate(Zoop_BPUE
, Acartiella = ACARTELA + ASINEJUV
,Tortanus = TORTANUS + TORTJUV
,Limnoithona = LIMNOSPP + LIMNOSINE + LIMNOTET + LIMNOJUV
,Pseudodiaptomus = PDIAPFOR + PDIAPMAR + PDIAPJUV + PDIAPNAUP
,`Other Calanoids` = ACARTIA + DIAPTOM + EURYTEM + OTHCALAD +
SINOCAL + EURYJUV + OTHCALJUV + SINOCALJUV + ACARJUV + DIAPTJUV + SINONAUP + EURYNAUP
,`Other Cyclopoids` = OITHDAV + OITHSIM + OITHSPP + OTHCYCAD + OITHJUV + OTHCYCJUV + OITHSPP + ACANTHO
,Other = BOSMINA + DAPHNIA + DIAPHAN + OTHCLADO + HARPACT + OTHCOPNAUP)
#zoop_BPUE2a = zoop_BPUE2[,c(1,3,5, 6, 22, 60:66)]
zoop_BPUE2a <- zoop_BPUE2 %>%
select(Station,Year,Month,Date,Region,Acartiella:Other)
#create a sample ID
zoop_BPUE2a$sample = 1:nrow(zoop_BPUE2a)
#turn month into a factor
zoop_BPUE2a$Month = factor(zoop_BPUE2a$Month, labels = c("Jul", "Aug", "Sep", "Oct"))
#write a csv
#write.csv(zoop_BPUE2a, "SMSCG_2018to2020_BPUE.csv")
#Convert to long format
zooplong = gather(zoop_BPUE2a, key = "Taxa", value = "BPUE", -Station, -Month, - Year, -Region, -Date, -sample)
#File with summary stats
zoopsummary = group_by(zooplong, Region, Year, Month, Taxa) %>% summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))
#Reorder taxa so more abundant taxa on bottom and can put n in there
zoopsummary2 = zoopsummary %>% mutate(Taxa = factor(Taxa, levels = c("Other","Limnoithona", "Other Cyclopoids","Other Calanoids",
"Pseudodiaptomus","Tortanus", "Acartiella",
ordered = TRUE)))
#Making plot
bpue1 = ggplot(zoopsummary2, aes(x = Month, y = meanB))
bpue2 = bpue1+ geom_bar(stat = "identity", aes(fill = Taxa)) +
facet_wrap(Year~Region, ncol = 5)+(coord_cartesian (ylim = c(1, 15000))) +
scale_fill_brewer(palette = "Set3", name = NULL) +
ylab("Mean BPUE (µgC/m3)") +
geom_label(aes(x = Month, y = 100, label = paste(n)))+
theme_few() + theme(text = element_text(family = "sans", size = 10),
legend.text = element_text(face = "italic"))
#ggsave(plot = bpue2, filename = "smscg_zoop_bpue_2018to2021.tiff", device = "tiff", width = 6, height =5, units = "in", dpi = 300)
##Removing n from graph b/c its super big and can't figure out how to reduce the text
bpue1_noN = ggplot(zoopsummary2, aes(x = Month, y = meanB))
bpue2_noN = bpue1_noN+ geom_bar(stat = "identity", aes(fill = Taxa)) +
facet_wrap(Year~Region, ncol = 5)+ (coord_cartesian (ylim = c(1, 15000))) +
scale_fill_brewer(palette = "Set3", name = NULL) +
ylab("Mean BPUE (µgC/m3)") +
theme_few() + theme(text = element_text(family = "sans", size = 9),
legend.text = element_text(face = "italic"))
#ggsave(plot = bpue2_noN, filename = "smscg_zoop_bpue_2018to2021_noN.tiff", device = "tiff", width = 6, height =4, units = "in", dpi = 300)
##Analysis----
#GLM of total BPUE
zooptots = group_by(zooplong, Region, Year, Month, Station, sample) %>%
summarize(BPUE = sum(BPUE), logBPUE = log(BPUE))
zooptots$Month = factor(zooptots$Month, labels = c("July", "August", "Sept", "Oct"))
hist(zooptots$BPUE)
hist(zooptots$logBPUE)
#pretty skewed without transforming, but doing shapiro test to confirm
shapiro.test(zooptots$BPUE)
#way significant usin non transformed data
shapiro.test(zooptots$logBPUE)
#need log transformed data, but still pretty significant----- transform again?
#based on plots, maybe start will full model with all interactions included
zlm = glm(logBPUE~Region*Month*Year, data = zooptots)
summary(zlm)
Anova(zlm) #region, month, year all significant but not interactions
plot(zlm)
#No interaction terms
zlm2 = glm(logBPUE~Region + Month + Year, data= zooptots)
summary(zlm2)
Anova(zlm2)
plot(zlm2)
######Combining Grizzly and Honker into Suisun------
stations_SB = data.frame(Station = as.character(c(602, "Grizz", "NZ028", 519, "Honk", 605, 606, "NZ032", "NZS42", 609, 610, "Mont", 508, 513, 520, 801, 802, 704, 706, "NZ054", "NZ060", "NZ064")),
Region = c(rep("Suisun Bay", 5), rep("West Marsh", 4), rep("East Marsh", 3), rep("River", 10)))
stations_SB = mutate(stations_SB, Region = factor(Region, levels = c("Suisun Bay", "West Marsh", "East Marsh", "River")))
SMSCG_CPUE_SB = filter(merge(select(SMSCG_CPUE, -Region), stations_SB, by = "Station"), Year %in% c(2018, 2019, 2020,2021), Month == 7 | Month ==8 | Month==9| Month==10)
zoop_BPUE_SB = pivot_longer(SMSCG_CPUE_SB, cols = ACARTELA:CUMAC,
names_to = "taxon", values_to = "CPUE") %>%
left_join(zoop_biomass) %>%
mutate(BPUE = CPUE*mass_indiv_ug) %>%
filter(!is.na(BPUE)) %>%
pivot_wider(id_cols = c(Station:Volume, Region), names_from = taxon, values_from = BPUE)
zoop_BPUE_SB= mutate(zoop_BPUE_SB, Acartiella = ACARTELA + ASINEJUV,
Tortanus = TORTANUS + TORTJUV,
Limnoithona = LIMNOSPP + LIMNOSINE + LIMNOTET + LIMNOJUV,
Pseudodiaptomus = PDIAPFOR + PDIAPMAR + PDIAPJUV + PDIAPNAUP,
`Other Calanoids` = ACARTIA + DIAPTOM + EURYTEM + OTHCALAD +
SINOCAL + EURYJUV + OTHCALJUV + SINOCALJUV + ACARJUV + DIAPTJUV + SINONAUP + EURYNAUP,
`Other Cyclopoids` = OITHDAV + OITHSIM + OITHSPP + OTHCYCAD + OITHJUV + OTHCYCJUV + OITHSPP + ACANTHO,
Other = BOSMINA + DAPHNIA + DIAPHAN + OTHCLADO + HARPACT + OTHCOPNAUP)
#zoop_BPUE_SBa = zoop_BPUE_SB[,c(1,3,5, 6, 22, 60:66)]
zoop_BPUE_SBa <- zoop_BPUE_SB %>%
select(Station,Year,Month,Date,Region,Acartiella:Other)
#create a sample ID
zoop_BPUE_SBa$sample = 1:nrow(zoop_BPUE_SBa)
#turn month into a factor
zoop_BPUE_SBa$Month = factor(zoop_BPUE_SBa$Month, labels = c("Jul", "Aug", "Sep", "Oct"))
#write a csv
#write.csv(zoop_BPUE_SBa, "SMSCG_2018to2021_BPUE2.csv")
#Convert to long format
zooplong_SB = gather(zoop_BPUE_SBa, key = "Taxa", value = "BPUE", -Station, -Month, - Year, -Region, -Date, -sample)
#File with summary stats
zoopsummary_SB = group_by(zooplong_SB, Region, Year, Month, Taxa) %>% summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))
#Reorder taxa so more abundant taxa on bottom and can put n in there
zoopsummary_SB2 = zoopsummary_SB %>% mutate(Taxa = factor(Taxa, levels = c("Other","Limnoithona", "Other Cyclopoids","Other Calanoids",
"Pseudodiaptomus","Tortanus", "Acartiella",
ordered = TRUE)))
#Making plot
bpue_SB = ggplot(zoopsummary_SB2, aes(x = Month, y = meanB))
bpue_SB2 = bpue_SB+ geom_bar(stat = "identity", aes(fill = Taxa)) +
facet_wrap(Year~Region, ncol = 4)+ (coord_cartesian (ylim = c(1, 15000))) +
scale_fill_brewer(palette = "Set3", name = NULL) +
ylab("Mean BPUE (µgC/m3)") +
theme_few() + theme(text = element_text(family = "sans", size = 9),
legend.text = element_text(face = "italic"))
#ggsave(plot = bpue_SB2, filename = "./Plots/smscg_zoop_bpue_2018to2021_SB.tiff", device = "tiff", width = 6.5, height =5, units = "in", dpi = 300)
#Combining to SB looks better, and not big changes in trend
##Analysis with Suisun Bay Region----
#GLM of total BPUE
zooptots_SB = group_by(zooplong_SB, Region, Year, Month, Station, sample) %>%
summarize(BPUE = sum(BPUE), logBPUE = log(BPUE))
zooptots_SB$Month = factor(zooptots_SB$Month, labels = c("July", "August", "Sept", "Oct"))
hist(zooptots_SB$BPUE)
hist(zooptots_SB$logBPUE)
#start with full model
zlm_SB = glm(logBPUE~Region*Month*Year, data = zooptots_SB)
summary(zlm_SB)
Anova(zlm_SB) #no significant interaction terms
plot(zlm_SB)
#No interaction terms
zlm_SB2 = glm(logBPUE~Region + Month + Year, data= zooptots_SB)
summary(zlm_SB2)
Anova(zlm_SB2) #all predictors are significant
plot(zlm_SB2) #normality could be better but other plots look pretty good
##Anovas with Suisun Bay Region------
anov_SB = aov(logBPUE~Region + Month + Year, data = zooptots_SB)
summary(anov_SB)
Anova(anov_SB) #basically same as GLM unsurprisingly
TukeyHSD(anov_SB)
##Turn year into a factor-----
#problem with year. it says its a number, need to be a factor?
#turn year into a factor
zoop_BPUE_SBa$Year = factor(zoop_BPUE_SBa$Year, labels = c("2018", "2019", "2020","2021"))
#Convert to long format
zooplong_SB_Y = gather(zoop_BPUE_SBa, key = "Taxa", value = "BPUE", -Station, -Month, - Year, -Region, -Date, -sample)
#File with summary stats
zoopsummary_SB_Y = group_by(zooplong_SB_Y, Region, Year, Month, Taxa) %>% summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))
#write.csv(zoopsummary_SB_Y, "ZoopSummarySB.csv")
zooptots_SB_Y = group_by(zooplong_SB_Y, Region, Year, Month, Station, sample) %>%
summarize(BPUE = sum(BPUE), logBPUE = log(BPUE))
#With Region*Month interaction. Region*Month not significant, don't need interaction
zlm_SB_Y = glm(logBPUE~Region*Month + Year, data = zooptots_SB_Y)
summary(zlm_SB_Y)
Anova(zlm_SB_Y)
plot(zlm_SB)
#No interaction terms
zlm_SB_Y2 = glm(logBPUE~Region + Month + Year, data= zooptots_SB_Y)
summary(zlm_SB_Y2)
Anova(zlm_SB_Y2)
plot(zlm_SB_Y2)
##Anovas with Suisun Bay Region------
anov_SB_Y = aov(logBPUE~Region + Month + Year, data = zooptots_SB_Y)
summary(anov_SB_Y)
TukeyHSD(anov_SB_Y)
#summary stats---------
#regions
zoopsummary_regions <- zooplong_SB_Y %>%
group_by(Region) %>%
summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))
#months
zoopsummary_months <- zooplong_SB_Y %>%
group_by(Month) %>%
summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))
#years
zoopsummary_years <- zooplong_SB_Y %>%
group_by(Year) %>%
summarize(meanB = mean(BPUE), sdB = sd(BPUE), n = length(BPUE))