-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSMSCG Zoops for Report2.R
399 lines (270 loc) · 16.9 KB
/
SMSCG Zoops for Report2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
## Script dedicated to updating the SMSCG zoop graphs and analysis for the Summer- Fall Action Report each year
library(tidyverse)
library(readxl)
#library(lubridate)
library(ggthemes)
#library(ggplot2)
library(devtools)
library(zooper)
library(sf) #for the regional data
library(lme4) #for linear models
library(lmerTest) #for getting p values with the linear models
library(emmeans)
library(effects)
library(car)
library(broom) #need this for pretty tables of the outputs of the model
#devtools::install_github("InteragencyEcologicalProgram/zooper") #install zooper to grab the integrated data with DOP
#Zooper Data-----
zooper = Zoopsynther(Data_type = "Community",
Sources = c("EMP", "FMWT", "STN", "DOP"),
Size_class = "Meso",
Date_range = c("2017-06-01", NA)) %>%
mutate("Month" = month(Date)) %>% #make month column so we can filter only the ones we want
filter(Month >5, Month <11) %>%
filter(Order == "Calanoida") %>% #only including calanoids since thats what's in our hypothesis
mutate(Date2 = ymd(Date))%>% # change date format to same as SMSCG
mutate(Date = Date2) %>%
select(Source, Station, Latitude, Longitude, Year, Date, TowType, SampleID, Taxlifestage, CPUE)
#DOP does multiple tows at a station (surface and bottom), plus added a special study for oblique in 2017, so want to average the cpue of these at the same date/ station.... but first need to add the regions in so we can use the lat/ longs
####SMSCG Region designations----
#add in the stations locations/ regions for the zooper dataset before taking averages
load("Data/SMSCGRegions.RData")
na_coords = zooper %>%
subset(is.na(zooper$Latitude)) #need to check which coordinates have NAs, looks like its from a 2018 LSC station, which don't need anyway
zoopstations = zooper %>%
filter(!is.na(zooper$Latitude)) %>% #remove stations with no locs
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
st_transform(crs = st_crs(Regions)) %>%
st_join(Regions) %>%
st_drop_geometry() %>%
filter(Region != "NA") %>% #remove any of the stations not included in the SMSCG footprint
select(Source:CPUE, Region)
#now take the station averages with the regions assigned
#Avg Zooper CPUE----
avgzoopCPUE = zoopstations %>%
group_by(Source, Region, Station, Date, Taxlifestage) %>%
summarize(meanCPUE = mean(CPUE)) %>%
mutate(SampleID = paste (Source, Region, Station, Date, Taxlifestage, sep = ' '))#create a sample ID for each combo so that when I combine I can eliminate duplicates
#Zooper BPUE------
#read in the biomass file
biomass = read.csv("Data/Mesomicrotaxa Biomass.csv")
zooper_bpue = left_join(avgzoopCPUE, biomass) %>%
mutate(BPUE = meanCPUE*Carbon_weight_ug) %>%
filter(!is.na(BPUE)) %>%
#mutate(taxa = str_remove(Taxlifestage, "_UnID")) %>%
select(c(-meanCPUE, -Carbon_weight_ug, -EMP_Code,))
#Add unpubed SMSCG data-----
#read in SMSCG data which has EMP that hasn't been published yet
SMSCG_CPUE = read.csv("Data/SMSCG_CBNet_2018to2023CPUE_07Feb2024.csv") %>%
select(Project, Year, Date, Station, ACARTELA:CUMAC ) %>%
rename(Source = Project) %>% #renaming Project so that the survey matches the source column in zooper
mutate(Date2 = mdy(Date)) %>% #change the date column to be a date format since its a character now
select(-starts_with("ALL")) %>% #remove the total columns
select(-Date) %>%
rename(Date = Date2) #changing it so we only have the right date column
###Station locations-----
#read in station locations file for lat and long
SMSCG_CPUE_Loc = read.csv("Data/SMSCG_Station_Coords.csv") %>%
select(Project, Station, longitude, latitude) %>%
rename(Source = Project, Longitude = longitude, Latitude = latitude) #renaming so can combine down the line
#merge the two files together so have coordinates in the SMSCG file
SMSCG_CPUE_new = merge(SMSCG_CPUE, SMSCG_CPUE_Loc, by = c("Source", "Station"))
#convert the SMSCG to long format so can convert to biomass and combine both file later
SMSCG_long = pivot_longer(SMSCG_CPUE_new, cols = (ACARTELA:CUMAC),
names_to = "EMP_Code",
values_to = "CPUE")
###SMSCG Regions----
unpubstations = SMSCG_long %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE) %>%
st_transform(crs = st_crs(Regions)) %>%
st_join(Regions) %>%
st_drop_geometry() %>%
filter(Region != "NA") %>% #remove any of the stations not included in the SMSCG footprint
select(Source:Station, Date, EMP_Code, CPUE, Region)
###SMSCG biomass--------------
#convert SMSCG file to biomass via the codes in the biomass csv
#full join so have the tax life stage column so I can combine the two files
SMSCG_bpue = full_join(unpubstations, biomass, by= "EMP_Code") %>%
mutate(BPUE = CPUE*Carbon_weight_ug) %>%
filter(!is.na(BPUE)) %>%
group_by(Source, Region, Station, Date, Taxlifestage) %>%
mutate(SampleID = paste (Source, Region, Station, Date, Taxlifestage, sep = ' ')) %>% #create sample ID so can check for duplicates between zooper and this dataset
select(-Carbon_weight_ug, -CPUE, -EMP_Code)
#check the number of rows for emp 2022 only to see how many records should be added to the zooper set
emp2022 = SMSCG_bpue %>%
mutate(Year = year(Date)) %>%
filter(Source == "EMP" & Year ==2022)
#Zooper and SMSCG files-----
#combine datasets but first need to filter out any duplicates
nodups = filter(SMSCG_bpue, !(SampleID %in% zooper_bpue$SampleID))
all_data = rbind(nodups, zooper_bpue) %>% #use rbind because just adding rows and has the same columns
#select(SampleID) %>% #filter out for just calanoids
#filter(Taxlifestage %in% c('Acartiella sinensis Adult', 'Acartiella sinensis Juvenile', 'Tortanus_UnID Adult', 'Tortanus_UnID
#Juvenile', 'Pseudodiaptomus forbesi Juvenile' , 'Pseudodiaptomus forbesi Adult' ,
#'Pseudodiaptomus_UnID Larva' , 'Pseudodiaptomus_UnID Adult' , 'Pseudodiaptomus_UnID Juvenile',
#'Eurytemora affinis Larva' , 'Eurytemora affinis Adult' , 'Eurytemora affinis Juvenile' ,
#'Eurytemora_UnID Larva', 'Sinocalanus doerrii Juvenile' , 'Sinocalanus doerrii Adult' ,'Sinocalanus
#doerrii Larva' , 'Sinocalanus_UnID Juvenile' , 'Acartia_UnID Adult' , 'Acartia_UnID Juvenile' ,
#'Diaptomidae_UnID Adult' , 'Diaptomidae_UnID Juvenile' , 'Pseudodiaptomus marinus Adult' ,
#'Calanoida_UnID Adult' , 'Calanoida_UnID Juvenile'))
mutate(Year = as.factor(year(Date))) %>% #add month and year plus adding a factor
mutate(Month = as.factor(month(Date)))
dups = rbind(zooper_bpue, SMSCG_bpue) %>% #checking to make the numbers match up with combining and number of dups
group_by(SampleID) %>%
summarize(N= n()) %>%
filter(N ==2)
#numbers work out with combing two datasets and removing the duplicates
#now need to move back to wide format so can group taxa, then will need to move back to long for the rest (e.g. plots)
all_data_wide = all_data %>%
pivot_wider(id_cols = c(Source: Region, Year, Month),
names_from = Taxlifestage,
values_from = BPUE,
values_fill = 0)
#combine taxa groups for plots
#the all data file has all zoop groups because of the SMSCG file
zoopgroups= mutate(all_data_wide,
Acartiella = `Acartiella sinensis Adult` + `Acartiella sinensis Juvenile`,
Tortanus = `Tortanus_UnID Adult` + `Tortanus_UnID Juvenile`,
Pseudodiaptomus = `Pseudodiaptomus forbesi Juvenile` + `Pseudodiaptomus forbesi Adult` +
`Pseudodiaptomus_UnID Larva` + `Pseudodiaptomus_UnID Adult` + `Pseudodiaptomus_UnID Juvenile`,
Eurytemora = `Eurytemora affinis Larva` + `Eurytemora affinis Adult` + `Eurytemora affinis Juvenile` +
`Eurytemora_UnID Larva`,
`Other Calanoids` = `Sinocalanus doerrii Juvenile` + `Sinocalanus doerrii Adult` +
`Sinocalanus doerrii Larva` + `Sinocalanus_UnID Juvenile` + `Acartia_UnID Adult` + `Acartia_UnID Juvenile`
+ `Diaptomidae_UnID Adult` + `Diaptomidae_UnID Juvenile` + `Pseudodiaptomus marinus Adult` +
`Calanoida_UnID Adult` + `Calanoida_UnID Juvenile`) %>%
select(Source:Month, Acartiella, Tortanus, Pseudodiaptomus, Eurytemora, 'Other Calanoids') %>% #remove all the other taxa files
mutate(SampleID = paste (Source, Region, Year, Month, Station, sep = ' ')) %>% #create a sample ID b/c things get funky with the total graph later
pivot_longer(., cols = (c(Acartiella: Eurytemora, 'Other Calanoids')), names_to = "Taxa_Group", values_to = "BPUE") #move back to long format for combining
####Plots------
#need graphs with years, not doing month for this report since the time hasn't been consistent and not doing BACI...
#still need month though for the random effect for the stats later
#File with summary stats
#first need to calc the mean by month
zoopsummary_month = group_by(zoopgroups, Region, Year, Month, Taxa_Group) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) %>% #change levels so geographically organized from W to E
summarize(mean_BPUE = mean(BPUE),
n = length(BPUE),
se_BPUE = (sd(BPUE))/n)
#then do the mean by year for the graph.
zoopsummary_yr = group_by(zoopsummary_month, Region, Year, Taxa_Group) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) %>% #change levels so geographically organized from W to E
summarize(mean_BPUE2 = mean(mean_BPUE))
#need to create a sample size based on the number of actual samples since if we do it in the summary files it won't calc by samples but by month
zoopsampsize = group_by(zoopgroups, Region, Year) %>%
summarize(N = length(unique(SampleID))) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River")))
#Making plot
bpue1 = ggplot(zoopsummary_yr, aes(x = Year, y = mean_BPUE2))
bpue2 = bpue1+ geom_bar(stat = "identity", aes(fill = Taxa_Group)) +
facet_wrap(facets= vars(Region)) +
scale_fill_brewer(palette = "Set3", name = NULL) +
ylab("Mean BPUE (µgC/m3)") +
geom_label(data = zoopsampsize, aes(x = Year, y = 100, label = paste(N)), size = 2, nudge_y = -500) +
theme_few() + theme(text = element_text(family = "sans", size = 10),
legend.text = element_text(face = "italic"),
axis.text.x = element_text(angle=45, hjust=1)) #angles x axis so the years fit
bpue2
ggsave(plot = bpue2, filename = "SMSCG_2017to2023_Zoop.tiff", device = "tiff", width = 6, height =5, units = "in", dpi = 300)
####Analysis------------------------------------
#want to do an analysis with years and regions with the total bpue----- but this is still just for calanoids only
#first need to do the same thing with summing by sample, then by month, then by year
zooptots_samp = group_by(zoopgroups, Region, Year, Month, SampleID) %>% #need to include sample ID so that it sums it by sample, and then take the mean of that since we have
summarize(BPUE_samp = sum(BPUE),
logBPUE_samp = (log(BPUE_samp+1))) #need to add 1 since there is a zero or two in there
zooptots_mon = group_by(zooptots_samp, Region, Year, Month) %>%
summarize(BPUE_mon = mean(BPUE_samp),
logBPUE = (log(BPUE_mon))) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) #change levels so geographically organized from W to E
zooptots_yr = zooptots_mon %>%
group_by(Region, Year) %>%
summarize(BPUE_yr = mean(BPUE_mon))
tots1 = ggplot(zooptots_yr, aes(x = Year, y = BPUE_yr))
tots2 = tots1 + geom_bar(stat = "identity") +
#facet_wrap(facets= vars(Region)) +
ylab("Mean BPUE (µgC/m3)") +
theme_few() + theme(text = element_text(family = "sans", size = 10),
legend.text = element_text(face = "italic"),
axis.text.x = element_text(angle=45, hjust=1)) #angles x axis so the years fit
tots2
hist(zooptots_samp$BPUE_samp)
hist(zooptots_samp$logBPUE_samp) #skewed so need to use log bpue
#doing a linear mixed model with month as a random effect so it removes the seasonality aspect
#need to use the package lm4
#want to do this by sample
zlmer = lmer(logBPUE_samp~ Region*Year + (1|Month), data = zooptots_samp)
summary(zlmer)
plot(zlmer)
#plot the output of the linear model. Need the effects package for that. This is just an effects plot
plot(allEffects(zlmer), x.var = "Year")
#this one is a prettier effects plot than using the
effs = allEffects(zlmer)[[1]]
effs_df = data.frame(effs$x, SE = effs$se, Fit = effs$fit) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) #change levels so geographically organized from W to E
ggplot(effs_df, aes(x = Region, y = Fit)) + geom_point()+
facet_wrap(~Year)+
geom_errorbar(aes(ymin = Fit-SE, ymax = Fit+SE))+ theme_bw()
#could also do an anova but the aov code in R is just for a type I anova which you can only do with an equal sample size which we definitely don't have
#need to do a type III which we can do in the package car
Anova(zlmer, type = "III") #region and year signif but not the interaction
#stick with linear model since we can use a random effect for month
#the lmer above shows River and SM signifi diff, and 2017/2023 are signif diff, but thats it.
#need to do pairwise comparisons to break out exactly what it is
#can do that by region and year combos
#pairwise comparisons to look at which region and year combos are signif
regbyyear = emmeans(zlmer, pairwise ~ "Region", by = "Year") #looking at how the regions differ to each other in the same year
yearbyreg = emmeans(zlmer, pairwise ~ "Year", by = "Region") #how the years differ in the same region
yearonly = emmeans(zlmer, pairwise ~ "Year") #just how the years differ
#post hoc results table
tablereg_yr= as.data.frame (regbyyear$contrasts) #making a pretty table for the report
write.csv(tablereg_yr, "region_by_year.csv")
tableyrby_reg= as.data.frame (yearbyreg$contrasts)
write.csv(tableyrby_reg, "year_by_region.csv")
tableyronly= as.data.frame (yearonly$contrasts)
write.csv(tableyronly, "year_only.csv")
#table with main glm results for main report, not just post hoc
main_glm= as.data.frame(summary(zlmer)$coefficients)
#doing analysis with pseudo only
#Pseudo----
pseudo = zoopgroups %>%
filter(Taxa_Group == "Pseudodiaptomus")
#File with summary stats for just pseduo
#first need to calc the mean by month
pseudosum_month = group_by(pseudo, Region, Year, Month, Taxa_Group) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) %>% #change levels so geographically organized from W to E
summarize(mean_BPUE = mean(BPUE),
n = length(BPUE),
se_BPUE = (sd(BPUE))/n)
#then do the mean by year for the graph.
pseudosum_yr = group_by(pseudosum_month, Region, Year, Taxa_Group) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) %>% #change levels so geographically organized from W to E
summarize(mean_BPUE2 = mean(mean_BPUE),
logBPUE = (log(mean_BPUE2)))
#Making plot
p1 = ggplot(pseudosum_yr, aes(x = Year, y = mean_BPUE2))
p2 = tots1 + geom_bar(stat = "identity") +
#facet_wrap(facets= vars(Region)) +
ylab("Mean BPUE (µgC/m3)") +
theme_few() + theme(text = element_text(family = "sans", size = 10),
legend.text = element_text(face = "italic"),
axis.text.x = element_text(angle=45, hjust=1)) #angles x axis so the years fit
p2
ggsave(plot = bpue2, filename = "SMSCG_Pseudo.tiff", device = "tiff", width = 6, height =5, units = "in", dpi = 300)
####Analysis------------------------------------
#doing a linear mixed model with month as a random effect so it removes the seasonality aspect
#need to use the package lm4
#want to do this by sample
pseudo_lmer = lmer((log(BPUE+1))~ Region*Year + (1|Month), data = pseudo)
summary(pseudo_lmer)
#this one is a prettier effects plot than using the
p_effs = allEffects(pseudo_lmer)[[1]]
p_effs_df = data.frame(p_effs$x, SE = p_effs$se, Fit = p_effs$fit) %>%
mutate(Region = factor(Region, levels = c("Suisun Bay", "Suisun Marsh", "River"))) #change levels so geographically organized from W to E
ggplot(p_effs_df, aes(x = Region, y = Fit)) + geom_point()+
facet_wrap(~Year)+
geom_errorbar(aes(ymin = Fit-SE, ymax = Fit+SE))+ theme_bw()
#pairwise comparisons to look at which region and year combos are signif
pseudobyyear = emmeans(pseudo_lmer, pairwise ~ "Region", by = "Year") #looking at how the regions differ to each other in the same year
#post hoc results table
tablereg_yr= as.data.frame (regbyyear$contrasts) #making a pretty table for the report
write.csv(tablereg_yr, "region_by_year.csv")