Compare SOE aggregate survey bio indices to VAST forage index. Caveats: may not contain same species, wont be on same scale

Compare indices scaled to mean for each EPU

Use Kim B’s code to extract planktivores from ecodata::aggregate_biomass, modify code to calculate time series sd, rescale

planktivores <- ecodata::aggregate_biomass %>%
  dplyr::filter(Var %in% c("Planktivore Fall Biomass Index",
                           "Planktivore Spring Biomass Index",
                           "Planktivore Fall Biomass Standard Error",
                           "Planktivore Spring Biomass Standard Error"))%>%
  tidyr::separate(Var, c("feeding.guild", "season", "Biomass", "Var1", "Null", "Area"), sep = " ") %>%
  tidyr::unite("Var", feeding.guild:season, sep = " ") %>%
  dplyr::mutate(stat = recode(Var1, Index = "Mean",
                      Standard = "SD")) %>%
  dplyr::select(-Biomass, -Var1, -Null, -Units) %>%
  # dplyr::filter(!Area == "-") %>%
  dplyr::group_by(Var, Time, EPU, Area) %>%
  tidyr::pivot_wider(id_cols = c(Var, Time, EPU, Area),names_from =  stat, values_from =  Value) %>%
  dplyr::filter(!Mean == "NULL") %>%
  dplyr::ungroup() %>%
  dplyr::mutate(Mean = as.numeric(Mean))


agg_bio<-planktivores %>% dplyr::filter(EPU %in% c("MAB", "GB", "GOM"),
         Time >= 1968) %>%
  dplyr::group_by(Var, EPU) %>%
  dplyr::mutate(hline = mean(Mean, na.rm = T), 
                tssd = sd(SD, na.rm = T),
                 upper = Mean + (2*SD),
                lower = Mean - (2*SD)) %>%
  dplyr::ungroup()  

agg_bio_std <- agg_bio %>%
  dplyr::mutate(stdPlank = (Mean-hline)/tssd,
                #cv = SD/Mean,
                stdPlankupper = (upper-hline)/tssd,
                stdPlanklower = (lower-hline)/tssd)

agg_bio_std$Var <- factor(agg_bio_std$Var,levels = c("Planktivore Spring",
                                                    "Planktivore Fall"))
agg_bio_std$EPU <- factor(agg_bio_std$EPU, levels = c("GOM", "GB", "MAB"))
series.col <- rep("black",length(unique(agg_bio_std$Var)))
facet_names <- list("Planktivores" = expression("Planktivores")
                    )

Test plotting

Original SOE plot scale

#Time series constants
shade.alpha <- 0.3
shade.fill <- "lightgrey"
lwd <- 1
pcex <- 2
trend.alpha <- 0.5
trend.size <- 2
hline.size <- 1
hline.alpha <- 0.35
hline.lty <- "dashed"
label.size <- 5
hjust.label <- 1.5
letter_size <- 4
feeding.guilds <- c("Apex Predator","Piscivore","Planktivore","Benthivore","Benthos")
x.shade.min <- 2011
x.shade.max <- 2021
series.col <- c("indianred","black")
#Function for custom ggplot facet labels
label <- function(variable,value){
  return(facet_names[value])
}

p3<-agg_bio_std %>%
  dplyr::filter(str_detect(Var,"Planktivore")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = Mean,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = Mean,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(lower,0), ymax = upper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = Mean),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = Mean),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = hline,
                 group = Var),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
  ggplot2::facet_wrap(EPU ~ Var~.,ncol = 2, scales = "free_y") +
       #Add NEAMAP
  # ggplot2::geom_ribbon(data = neamap.3, aes(ymax = pmax(upper, 0), ymin = lower, x = Time),
  #               fill = "pink", alpha = 0.5) +
  # ggplot2::geom_line(data = neamap.3, aes(x = Time, y = Value),
  #           color = "#ca0020")+
  # ggplot2::geom_point(data = neamap.3, aes(x = Time, y = Value),
  #            size = pcex-0.5,
  #            color = "#ca0020")+

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Biomass (kg tow"^-1*")")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p3

Rescaled SOE plots

p4<-agg_bio_std %>%
  dplyr::filter(str_detect(Var,"Planktivore")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = stdPlank,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = stdPlank,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(stdPlanklower), ymax = stdPlankupper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = stdPlank),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = stdPlank),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = 0,
                 group = Var),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
  ggplot2::facet_wrap(EPU ~ Var~.,ncol = 2, scales = "free_y") +
       #Add NEAMAP
  # ggplot2::geom_ribbon(data = neamap.3, aes(ymax = pmax(upper, 0), ymin = lower, x = Time),
  #               fill = "pink", alpha = 0.5) +
  # ggplot2::geom_line(data = neamap.3, aes(x = Time, y = Value),
  #           color = "#ca0020")+
  # ggplot2::geom_point(data = neamap.3, aes(x = Time, y = Value),
  #            size = pcex-0.5,
  #            color = "#ca0020")+

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Standardized Biomass Index")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p4

Rescale forage indices and add to rescaled SOE plot

forage <- ecodata::forage_index %>%
  dplyr::filter(Var %in% c(#"Annual Forage Fish Biomass Estimate",
                           #"Annual Forage Fish Biomass Estimate SE",
                           "Fall Forage Fish Biomass Estimate",
                           "Fall Forage Fish Biomass Estimate SE",
                           "Spring Forage Fish Biomass Estimate",
                           "Spring Forage Fish Biomass Estimate SE"),
                EPU %in% c("GOM", "GB", "MAB"))%>%
  tidyr::separate(Var, c("season", "feeding.guild", NA,"Biomass", NA, "Var1"), sep = " ", extra = "drop", fill = "right") %>% 
  tidyr::unite("Var", feeding.guild:season, sep = " ") %>%
  dplyr::mutate(stat = recode(Var1, .missing = "Mean",
                      SE = "SD")) %>%
  dplyr::select(-Biomass, -Var1, -Units) %>%
  # dplyr::filter(!Area == "-") %>%
  dplyr::group_by(Var, Time, EPU) %>%
  tidyr::pivot_wider(id_cols = c(Var, Time, EPU),names_from =  stat, values_from =  Value) %>%
  dplyr::filter(!Mean == "NULL") %>%
  dplyr::ungroup() %>%
  dplyr::mutate(Mean = as.numeric(Mean))

forage_std <- forage %>%
  dplyr::group_by(Var, EPU) %>%
  dplyr::mutate(hline = mean(Mean, na.rm = T), 
                tssd = sd(SD, na.rm = T),
                 upper = Mean + (2*SD),
                lower = Mean - (2*SD)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(stdForage = (Mean-hline)/tssd,
                #cv = SD/Mean,
                stdForageupper = (upper-hline)/tssd,
                stdForagelower = (lower-hline)/tssd)

forage_std$Var <- factor(forage_std$Var,levels = c("Forage Spring",
                                                    "Forage Fall"))
forage_std$EPU <- factor(forage_std$EPU, levels = c("GOM", "GB", "MAB"))
series.col <- rep("black",length(unique(forage_std$Var)))
facet_names <- list("Forage" = expression("Forage")
                    )

Plot just forage indices

p5<-forage_std %>%
  #dplyr::filter(str_detect(Var,"Forage")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = stdForage,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = stdForage,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(stdForagelower), ymax = stdForageupper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = stdForage),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = stdForage),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = 0,
                 group = Var),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
  ggplot2::facet_wrap(EPU ~ Var~.,ncol = 2, scales = "free_y") +
       #Add NEAMAP
  # ggplot2::geom_ribbon(data = neamap.3, aes(ymax = pmax(upper, 0), ymin = lower, x = Time),
  #               fill = "pink", alpha = 0.5) +
  # ggplot2::geom_line(data = neamap.3, aes(x = Time, y = Value),
  #           color = "#ca0020")+
  # ggplot2::geom_point(data = neamap.3, aes(x = Time, y = Value),
  #            size = pcex-0.5,
  #            color = "#ca0020")+

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Standardized Biomass Index")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p5

Same plot standardized SOE planktivores and Forage index

forage_std <- forage_std %>%
  mutate(CompVar = recode(Var, "Forage Spring" = "Spring",
                         "Forage Fall" = "Fall")) 
 
forage_std$CompVar <- factor(forage_std$CompVar,levels = c("Spring",
                                                    "Fall"))

agg_bio_std <- agg_bio_std %>%
  mutate(CompVar = recode(Var, "Planktivore Spring" = "Spring",
                         "Planktivore Fall" = "Fall")) 
  

agg_bio_std$CompVar <- factor(agg_bio_std$CompVar,levels = c("Spring",
                                                    "Fall"))


p6<-agg_bio_std %>%
  #dplyr::filter(str_detect(Var,"Planktivore")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = stdPlank,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = stdPlank,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(stdPlanklower), ymax = stdPlankupper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = stdPlank),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = stdPlank),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = 0,
                 group = CompVar),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
       #Add Forage Index
  ggplot2::geom_ribbon(data = forage_std, aes(ymin = stdForagelower, ymax = stdForageupper, x = Time),
                fill = "pink", alpha = 0.5) +
  ggplot2::geom_line(data = forage_std, aes(x = Time, y = stdForage),
            color = "#ca0020")+
  ggplot2::geom_point(data = forage_std, aes(x = Time, y = stdForage),
             size = pcex-0.5,
             color = "#ca0020")+
  # trend test forage indices
  ecodata::geom_gls(data = forage_std, aes(x = Time, y = stdForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(data = forage_std, aes(x = Time, y = stdForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  
  ggplot2::facet_wrap(EPU ~ CompVar~.,ncol = 2, scales = "free_y") +

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Standardized Biomass Index")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p6

SOE Planktivores show increasing trends for GB and GOM in Fall and GOM in Spring. No trends in the MAB or GB Spring.

Forage index agrees with no trends in MAB or GB Spring, and a positive trend in GOM Spring.

Forage index disagrees with SOE Planktivores showing a significant decreasing trend in GB Fall and no trend in GOM Fall.

Error bars (supposedly 2XSE for both) are larger for the Forage index overall.

Really these may be tracking different things.

Compare species lists

Bluefish prey list in forage index:

load(here("fhdat/allfh.Rdata"))

# October 8 2022: add NEFSC 2021 data
load(here("fhdat/allfh21.Rdata"))

allfh <- allfh %>%
  dplyr::bind_rows(allfh21) 

preycount <- allfh %>%
   group_by(pdcomnam, pynam) %>%
   summarise(count = n()) %>%
   filter(pdcomnam != "") %>%
   #arrange(desc(count))
   pivot_wider(names_from = pdcomnam, values_from = count) 

gencomlist <- allfh %>%  
  select(pynam, pycomnam2, gencom2) %>%
  distinct()

blueprey <- preycount %>%
  filter(BLUEFISH > 9) %>%
  filter(!pynam %in% c("EMPTY", "BLOWN",
                       "FISH", "OSTEICHTHYES",
                       "ANIMAL REMAINS",
                       "FISH SCALES")) %>%
  #filter(!str_detect(pynam, "SHRIMP|CRAB")) %>%
  left_join(gencomlist) %>%
  filter(!gencom2 %in% c("ARTHROPODA", "ANNELIDA",
                         "CNIDARIA", "UROCHORDATA",
                         "ECHINODERMATA", "WORMS",
                         "BRACHIOPODA", "COMB JELLIES",
                         "BRYOZOA", "SPONGES",
                         "MISCELLANEOUS", "OTHER")) %>%
  arrange(desc(BLUEFISH))

# kableExtra::kable(blueprey[,c('pynam','BLUEFISH')],
#                   col.names = c('Prey name', 'Bluefish stomachs (n)'),
#                   caption = "Table 1. Prey identified in bluefish stomachs, NEFSC diet database, 1973-2020.")

flextable::flextable(blueprey[,c('pynam', 'pycomnam2','BLUEFISH')]) %>%
  flextable::set_header_labels(pynam = "Prey name", pycomnam2 = "Prey common name", BLUEFISH = "Bluefish stomachs (n)") %>%
  flextable::set_caption("Prey identified in bluefish stomachs, NEFSC diet database, 1973-2021.") %>%
  #flextable::autofit()
  flextable::width(width = c(2.5,3.5,0.5))

Planktivores in SOE:

SOEplank <- ecodata::species_groupings %>% 
  dplyr::select(SCINAME, COMNAME, SOE.18, SOE.20) %>% 
  dplyr::filter(SOE.18 == "Planktivore")

flextable::flextable(SOEplank) %>%
  flextable::set_header_labels(SCINAME = "Name", COMNAME = "Common name", SOE.18 = "2018-2019 SOE", SOE.20 = "2020 to present SOE") %>%
  flextable::set_caption("Planktivore category used in State of the Ecosystem (SOE) reports, 2018-present.") %>%
  flextable::autofit()

SOE Planktivores includes sculpins, cusk, searobins, lumpfish, and blackbelly rosefish, which are not in the Forage Index. SOE Planktivores is currently missing both squids and all species of anchovies which are common prey species in the Forage Index.

Can we redo the survdat pull with the bluefish prey list? or close to it

New Survdat Aggbio with Forage species

Need the script that does the aggregation with survey-based EPUs. This script uses original EPUs and older species groupings: https://github.com/NOAA-EDAB/ecodata/blob/master/data-raw/get_nefsc_survey.R

#object name is survdat
#load(url("https://github.com/NOAA-EDAB/ms-keyrun/raw/master/data-raw/data/survdat.RData"))

MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB  <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)

MABgrid <-  FishStatsUtils::northwest_atlantic_grid %>%
  filter(stratum_number %in% c(MAB))

GBgrid <-  FishStatsUtils::northwest_atlantic_grid %>%
  filter(stratum_number %in% c(GB))

GOMgrid <-  FishStatsUtils::northwest_atlantic_grid %>%
  filter(stratum_number %in% c(GOM))


ggplot(data = ecodata::coast) +
  geom_sf() +   
  geom_point(data = MABgrid, aes(x = Lon, y = Lat), colour = "red", size=0.05, alpha=0.1) +
  geom_point(data = GBgrid, aes(x = Lon, y = Lat), colour = "green", size=0.05, alpha=0.1) +
  geom_point(data = GOMgrid, aes(x = Lon, y = Lat), colour = "blue", size=0.05, alpha=0.1) +
  coord_sf(xlim = c(-79, -65.5), ylim = c(33, 45))+
  ggtitle("Survey Strata Based EPUs")

# shows builtin epus are not survey strata based
#plot(sf::st_geometry(ecodata::epu_sf))

Mapping survey species to prey species. Unfortunately not easy.

Alter ecodata::species_groupings with a new column

# see https://stackoverflow.com/questions/32914357/dplyr-inner-join-with-a-partial-string-match
# https://stackoverflow.com/questions/60993676/join-data-frames-based-fuzzy-matching-of-strings

library(fuzzyjoin)

bluepreyonly <- blueprey %>%
  dplyr::select(pynam)

my_match_fun <- Vectorize(function(x,y) agrepl(x, y, ignore.case=TRUE))#, max.distance = 0.12, useBytes = TRUE))

foragelist <- ecodata::species_groupings %>%
  mutate(charsciname = as.character(SCINAME)) %>%
  #regex_inner_join(blueprey, by = c(charsciname = "pynam"))
  #fuzzy_inner_join(blueprey, by = c(charsciname = "pynam"), match_fun = str_detect)
  fuzzy_inner_join(bluepreyonly, by = c(charsciname = "pynam"), match_fun = my_match_fun)

# missing specific sandlances, I want all three of these
sandlances <- ecodata::species_groupings %>% filter(str_detect(SCINAME, "AMMO"))

# missing Illex
illex <- ecodata::species_groupings %>% filter(str_detect(SCINAME, "ILLEX"),
                                               !str_detect(SCINAME, "EGG"))

forageindex <- foragelist %>%
  dplyr::select(-charsciname, -pynam) %>%
  bind_rows(sandlances, illex) %>%
  distinct() %>%
  mutate(ForageIndex = "ForageIndex")

species_groupings_forage <- ecodata::species_groupings %>%
  left_join(forageindex)

Sean’s SOE_fish_resource_using_strata.R code from here

modified for local survdat.Rdata and local species list

#remotes::install_github("NOAA-EDAB/survdat",build_vignettes = TRUE)

#SML
#Required packages----
library(data.table); library(survdat); library(here)

# #Pull Data----
# channel <- dbutils::connect_to_database(server="sole",uid="slucey")
# 
# survey <- survdat::get_survdat_data(channel, getLengths = F)
# survdat <- survey$survdat
#object name is survdat
#load(url("https://github.com/NOAA-EDAB/ms-keyrun/raw/master/data-raw/data/survdat.RData"))
#load(url("https://github.com/slucey/SOE_data/raw/main/data_raw/Survdat.RData")) #newer
load(here("survdat/Survdat.Rdata")) # from repo above, can't pull directly

pulldate <- survey$pullDate
functioncall <- survey$functionCall
survdat <- survey$survdat

#Aggregate species----
#Grab species list
#load(here('data_raw/SOE_species_list.RData'))
species <- as.data.table(species_groupings_forage)

#Merge to get species group and fed managed status
survdat <- merge(survdat, unique(species[, list(SVSPP, ForageIndex)]), 
                 by = 'SVSPP', all.x = T)
#Combine agg group and manage status
#survdat[, SOE.Managed := paste(SOE.20, Fed.Managed)]

#Change seasons from all caps
survdat[SEASON == 'FALL',   SEASON := 'Fall']
survdat[SEASON == 'SPRING', SEASON := 'Spring']

#Calculate stratified means
#Strata sets
EPU <- c('MAB', 'GB', 'GOM', 'SS')
MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB  <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS  <- c(1300:1352, 3840:3990)

survey.data <- c()

for(iepu in 1:length(EPU)){
  epu.strata <- get(EPU[iepu])
  #Separate inshore/offshore
  epu.inshore  <- epu.strata[which(epu.strata >= 2000)]
  epu.offshore <- epu.strata[which(epu.strata <  2000)]
  
  #Calculate stratified means
  all <- survdat::calc_stratified_mean(survdat, filterByArea = epu.strata,
                                       filterBySeason = c('Fall', 'Spring'),
                                       groupDescription = 'ForageIndex', tidy = T)
  # all.fmc <- survdat::calc_stratified_mean(survdat, filterByArea = epu.strata, 
  #                                          filterBySeason = c('Fall', 'Spring'),
  #                                          groupDescription = 'SOE.Managed', tidy = T)
  # inshore.all <- survdat::calc_stratified_mean(survdat, filterByArea = epu.inshore,
  #                                              filterBySeason = c('Fall', 'Spring'),
  #                                              groupDescription = 'SOE.20',
  #                                              tidy = T)
  # offshore.all <- survdat::calc_stratified_mean(survdat, filterByArea = epu.offshore,
  #                                               filterBySeason = c('Fall', 'Spring'),
  #                                               groupDescription = 'SOE.20',
  #                                               tidy = T)
  # inshore.fmc <- survdat::calc_stratified_mean(survdat, filterByArea = epu.inshore,
  #                                              filterBySeason = c('Fall', 'Spring'),
  #                                              groupDescription = 'SOE.Managed',
  #                                              tidy = T)
  # offshore.fmc <- survdat::calc_stratified_mean(survdat, filterByArea = epu.offshore,
  #                                               filterBySeason = c('Fall', 'Spring'),
  #                                               groupDescription = 'SOE.Managed',
  #                                               tidy = T)
  # 
  #Correct variable names before combining into one dataset
  #stratified biomass
  all[variable == 'strat.biomass', Var := paste(ForageIndex, SEASON, 'Biomass Index')]
  # all.fmc[variable == 'strat.biomass', Var := paste(SOE.Managed, 'managed species -',
  #                                                   SEASON, 'Biomass Index')]
  # inshore.all[variable == 'strat.biomass', Var := paste(SOE.20, SEASON, 
  #                                                       'Biomass Index - inshore')]
  # inshore.fmc[variable == 'strat.biomass', Var := paste(SOE.Managed, 'managed species -',
  #                                                       SEASON, 
  #                                                       'Biomass Index - inshore')]
  # offshore.all[variable == 'strat.biomass', Var := paste(SOE.20, SEASON, 
  #                                                        'Biomass Index - offshore')]
  # offshore.fmc[variable == 'strat.biomass', Var := paste(SOE.Managed, 'managed species -',
  #                                                        SEASON, 
  #                                                        'Biomass Index - offshore')]
  
  #Standard Error
  all[variable == 'biomass.SE', Var := paste(ForageIndex, SEASON, 
                                                     'Biomass Standard Error')]
  # all.fmc[variable == 'biomass.SE', Var := paste(SOE.Managed, 'managed species -',
  #                                                    SEASON, 
  #                                                    'Biomass Standard Error')]
  # inshore.all[variable == 'biomass.SE', Var := paste(SOE.20, SEASON, 
  #                                                    'Biomass Standard Error - inshore')]
  # inshore.fmc[variable == 'biomass.SE', Var := paste(SOE.Managed, 'managed species -',
  #                                                    SEASON, 
  #                                                    'Biomass Standard Error - inshore')]
  # offshore.all[variable == 'biomass.SE', Var := paste(SOE.20, SEASON, 
  #                                                     'Biomass Standard Error - offshore')]
  # offshore.fmc[variable == 'biomass.SE', Var := paste(SOE.Managed, 'managed species -',
  #                                                     SEASON, 
  #                                                     'Biomass Standard Error - offshore')]
  # 
  #Combine into one dataset
  epu.all <- rbindlist(list(all#, all.fmc, inshore.all, inshore.fmc, offshore.all, 
                             #offshore.fmc
                            ), use.names = F)
  epu.all[, Region := EPU[iepu]]
  
  #Combine with other EPUs
  survey.data <- rbindlist(list(survey.data, epu.all))
}

#Remove unnecessary columns/rows
survey.data[, c('ForageIndex', 'SEASON', 'variable') := NULL]
survey.data <- survey.data[!is.na(Var), ]

#Rename columns and add new columns
setnames(survey.data, c('YEAR', 'value', 'units'), c('Time', 'Value', 'Units'))
survey.data[, Source := 'NEFSC bottom trawl survey (survdat)']

#Set column order
setcolorder(survey.data, c('Time', 'Value', 'Var', 'Units', 'Region', 'Source'))

save(survey.data, file = here('survdat/Aggregate_ForageIndex_Survey.RData'))
load(here('survdat/Aggregate_ForageIndex_Survey.RData'))

survforage <- survey.data %>%
  dplyr::filter(Var %in% c("ForageIndex Fall Biomass Index",
                           "ForageIndex Spring Biomass Index",
                           "ForageIndex Fall Biomass Standard Error",
                           "ForageIndex Spring Biomass Standard Error"),
                Region %in% c("MAB", "GB", "GOM")) %>%
  tidyr::separate(Var, c("feeding.guild", "season", "Biomass", "Var1", NA), sep = " ") %>%
  tidyr::unite("Var", feeding.guild:season, sep = " ") %>%
  dplyr::mutate(stat = recode(Var1, Index = "Mean",
                      Standard = "SD")) %>%
  dplyr::select(-Biomass, -Var1,  -Units) %>%
  # dplyr::filter(!Area == "-") %>%
  dplyr::group_by(Var, Time, Region) %>%
  tidyr::pivot_wider(id_cols = c(Var, Time, Region),names_from =  stat, values_from =  Value) %>%
  dplyr::filter(!Mean == "NULL") %>%
  dplyr::ungroup() %>%
  dplyr::mutate(Mean = as.numeric(Mean))


survforage <- survforage %>%
  dplyr::group_by(Var, Region) %>%
  dplyr::mutate(hline = mean(Mean, na.rm = T), 
                tssd = sd(SD, na.rm = T),
                 upper = Mean + (2*SD),
                lower = Mean - (2*SD)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(stdsurvForage = (Mean-hline)/tssd,
                #cv = SD/Mean,
                stdsurvForageupper = (upper-hline)/tssd,
                stdsurvForagelower = (lower-hline)/tssd)

survforage$Var <- factor(survforage$Var,levels = c("ForageIndex Spring",
                                                    "ForageIndex Fall"))
survforage$Region <- factor(survforage$Region, levels = c("GOM", "GB", "MAB"))

series.col <- rep("black",length(unique(survforage$Var)))
facet_names <- list("Forage" = expression("Forage")
                    )
p7<-survforage %>%
  #dplyr::filter(str_detect(Var,"Planktivore")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = Mean,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = Mean,
               color = Var),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(lower, 0), ymax = upper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = Mean),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = Mean),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = 0,
                 group = Var),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
  ggplot2::facet_wrap(Region ~ Var~.,ncol = 2, scales = "free_y") +
       #Add NEAMAP
  # ggplot2::geom_ribbon(data = neamap.3, aes(ymax = pmax(upper, 0), ymin = lower, x = Time),
  #               fill = "pink", alpha = 0.5) +
  # ggplot2::geom_line(data = neamap.3, aes(x = Time, y = Value),
  #           color = "#ca0020")+
  # ggplot2::geom_point(data = neamap.3, aes(x = Time, y = Value),
  #            size = pcex-0.5,
  #            color = "#ca0020")+

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Biomass (kg tow"^-1*")")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p7

Now plot together

forage_std <- forage_std %>%
  mutate(CompVar = recode(Var, "Forage Spring" = "Spring",
                         "Forage Fall" = "Fall")) 
 
forage_std$CompVar <- factor(forage_std$CompVar,levels = c("Spring",
                                                    "Fall"))

survforage <- survforage %>%
  mutate(CompVar = recode(Var, "ForageIndex Spring" = "Spring",
                         "ForageIndex Fall" = "Fall"))  %>%
  rename(EPU = Region)
  

survforage$CompVar <- factor(survforage$CompVar,levels = c("Spring",
                                                    "Fall"))


p8<-survforage %>%
  #dplyr::filter(str_detect(Var,"Planktivore")) %>%
  ggplot2::ggplot() +

  #Highlight last ten years
  #ggplot2::annotate("rect", fill = shade.fill, alpha = shade.alpha,
  #    xmin = x.shade.min , xmax = x.shade.max ,
  #    ymin = -Inf, ymax = Inf) +
  #Test for trend and add lines
  ecodata::geom_gls(aes(x = Time, y = stdsurvForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(aes(x = Time, y = stdsurvForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  # #ecodata::geom_lm(aes(x = Time, y = Mean))+

  #Add time series
  ggplot2::geom_ribbon(aes(x = Time, ymin = pmax(stdsurvForagelower), ymax = stdsurvForageupper),
              alpha = 0.5,
              fill = "grey") +
  ggplot2::geom_line(aes(x = Time, y = stdsurvForage),size = lwd-0.5) +
  ggplot2::geom_point(aes(x = Time, y = stdsurvForage),size = pcex-0.5) +
  ggplot2::scale_color_manual(values = series.col, aesthetics = "color")+
  ggplot2::guides(color = FALSE) +
  ggplot2::geom_hline(aes(yintercept = 0,
                 group = CompVar),
             size = hline.size,
             alpha = hline.alpha,
             linetype = hline.lty)+
       #Add Forage Index
  ggplot2::geom_ribbon(data = forage_std, aes(ymin = stdForagelower, ymax = stdForageupper, x = Time),
                fill = "pink", alpha = 0.5) +
  ggplot2::geom_line(data = forage_std, aes(x = Time, y = stdForage),
            color = "#ca0020")+
  ggplot2::geom_point(data = forage_std, aes(x = Time, y = stdForage),
             size = pcex-0.5,
             color = "#ca0020")+
  # trend test forage indices
  ecodata::geom_gls(data = forage_std, aes(x = Time, y = stdForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  ecodata::geom_lm(data = forage_std, aes(x = Time, y = stdForage,
               color = CompVar),
             alpha = trend.alpha, size = trend.size) +
  
  ggplot2::facet_wrap(EPU ~ CompVar~.,ncol = 2, scales = "free_y") +

  #Axis and theme
  ggplot2::scale_x_continuous(breaks = seq(1970, 2020, by = 10), expand = c(0.01, 0.01)) +
  #ylim(0, 600)+
  ggplot2::ylab(expression("Standardized Biomass Index")) +
  ecodata::theme_facet()+
  ggplot2::theme(strip.text=element_text(hjust=0),
        axis.title.x=element_blank())

p8

Direct comparison, VAST diet based forage index x, survey same species list y

survforage_comp <- survforage %>%
  tidyr::separate(Var, c(NA, "Season"), sep = " ") %>%
  dplyr::select(Time, Season, EPU, stdsurvForage, stdsurvForageupper, stdsurvForagelower) %>%
  dplyr::mutate(Time = as.integer(Time))

compareforageagg <- forage_std %>%
  tidyr::separate(Var, c(NA, "Season"), sep = " ") %>%
  dplyr::select(Time, Season, EPU, stdForage, stdForageupper, stdForagelower) %>%
  left_join(survforage_comp) 

ggplot2::ggplot(compareforageagg, aes(x=stdForage, y=stdsurvForage)) +
  geom_point(color="blue", alpha=0.1)+
  geom_abline(intercept = 0, slope = 1, lty=3) +
  geom_smooth(method=lm) +
  stat_cor(method="pearson") +
  ecodata::theme_facet() +
  facet_grid(fct_relevel(EPU,  "GOM", "GB", "MAB")~
               fct_relevel(Season, "Spring", "Fall")) 
Diet based forage index (stdForage) compared with survey based index (stdsurvForage) by season and region (GOM = Gulf of Maine, GB = Georges Bank, MAB = Mid Atlantic Bight). Pearson correlation coefficients (*R*) and correlation significance (*p*) are shown for each season/region.

Diet based forage index (stdForage) compared with survey based index (stdsurvForage) by season and region (GOM = Gulf of Maine, GB = Georges Bank, MAB = Mid Atlantic Bight). Pearson correlation coefficients (R) and correlation significance (p) are shown for each season/region.

Pretty well correlated in GOM spring, half correlated in GB spring, otherwise not, even with the same species list.

Hypothesis: survey and diet based index species composition should match well in GOM spring but not other seasons. A dominant species may be shared in GB spring.

What proportion of each bluefish prey species in prey disaggregated diet dataset by %FO and weight? Just saved interim dataset bluepyall_stn.rds Compare with the same once aggregate survey index done.

Define constant colors, similar by family:

#  unique(forageindex$SCINAME)
#  [1] ENGRAULIDAE           LOLIGO PLEI           CLUPEA HARENGUS       SCOMBER SCOMBRUS     
#  [5] ANCHOA MITCHILLI      POMATOMUS SALTATRIX   PEPRILUS TRIACANTHUS  PLEURONECTIFORMES    
#  [9] MERLUCCIUS            PEPRILUS              CLUPEIDAE             AMMODYTES            
# [13] LOLIGO PEALEII        BREVOORTIA            ETRUMEUS TERES        STENOTOMUS CHRYSOPS  
# [17] ENGRAULIS EURYSTOLE   MERLUCCIUS BILINEARIS CEPHALOPODA           ANCHOA HEPSETUS      
# [21] CYNOSCION REGALIS     AMMODYTES AMERICANUS  AMMODYTES DUBIUS      ILLEX ILLECEBROSUS

#  unique(blueprey$pynam)
#  [1] "LOLIGO SP"             "ENGRAULIDAE"           "ANCHOA MITCHILLI"      "PEPRILUS TRIACANTHUS" 
#  [5] "CEPHALOPODA"           "ANCHOA HEPSETUS"       "ETRUMEUS TERES"        "AMMODYTES SP"         
#  [9] "STENOTOMUS CHRYSOPS"   "MERLUCCIUS BILINEARIS" "ILLEX SP"              "CLUPEA HARENGUS"      
# [13] "CLUPEIDAE"             "POMATOMUS SALTATRIX"   "ENGRAULIS EURYSTOLE"   "LOLIGO PEALEII"       
# [17] "SCOMBER SCOMBRUS"      "PLEURONECTIFORMES"     "CYNOSCION REGALIS"     "BREVOORTIA TYRANNUS"  

# colors from http://medialab.github.io/iwanthue/ 12 hard(Force vector) colorblind friendly full range H C L
# sorted by diff

# unmanaged "#890058",
# managed "#afe5ff"

sandlances <- data.frame(prey=c("AMMODYTES",
                                "AMMODYTES SP",
                                "AMMODYTES AMERICANUS",  
                                "AMMODYTES DUBIUS"),
                         preycol=c("#3b0062",
                                   "#3b0062",
                                   "#3b0062",
                                   "#3b0062"),
                         mgtcol=c("#890058",
                                  "#890058",
                                  "#890058",
                                  "#890058"))
  
anchovies <- data.frame(prey=c("ENGRAULIDAE", 
                               "ANCHOA MITCHILLI",
                               "ANCHOA HEPSETUS",
                               "ENGRAULIS EURYSTOLE"),
                         preycol=c("#004d15",
                                   "#004d15",
                                   "#004d15",
                                   "#004d15"),
                         mgtcol=c("#890058",
                                  "#890058",
                                  "#890058",
                                  "#890058"))  
  
herrings <- data.frame(prey=c("CLUPEA HARENGUS",
                              "CLUPEIDAE",
                              "ETRUMEUS TERES"),
                         preycol=c("#ff3c98",
                                   "#ff3c98",
                                   "#ff3c98"),
                         mgtcol=c("#afe5ff",
                                  "#afe5ff",
                                  "#890058")) 
  
squids <- data.frame(prey=c("LOLIGO SP",
                            "LOLIGO PLEI",
                            "LOLIGO PEALEII",
                            "ILLEX SP",
                            "ILLEX ILLECEBROSUS",
                            "CEPHALOPODA"),
                         preycol=c("#a9f2ff",
                                   "#a9f2ff",
                                   "#a9f2ff",
                                   "#a9f2ff",
                                   "#a9f2ff",
                                   "#a9f2ff"),
                         mgtcol=c("#afe5ff",
                                  "#890058",
                                  "#afe5ff",
                                  "#afe5ff",
                                  "#afe5ff",
                                  "#890058")) 
  
silverhake <- data.frame(prey=c("MERLUCCIUS",
                                "MERLUCCIUS BILINEARIS"),
                         preycol=c("#be5000",
                                   "#be5000"),
                         mgtcol=c("#afe5ff",
                                  "#afe5ff"))

butterfish <- data.frame(prey=c("PEPRILUS",
                                "PEPRILUS TRIACANTHUS"),
                         preycol=c("#d6b8ff",
                                   "#d6b8ff"),
                         mgtcol=c("#afe5ff",
                                  "#afe5ff"))

bluefish <- data.frame(prey=c("POMATOMUS SALTATRIX"),
                         preycol=c("#2eff7d"),
                         mgtcol=c("#afe5ff"))

mackerel <- data.frame(prey=c("SCOMBER SCOMBRUS"),
                         preycol=c("#ffb4b5"),
                         mgtcol=c("#afe5ff"))

scup <- data.frame(prey=c("STENOTOMUS CHRYSOPS"),
                         preycol=c("#1810b8"),
                         mgtcol=c("#afe5ff"))

weakfish <- data.frame(prey=c("CYNOSCION REGALIS"),
                         preycol=c("#759700"),
                         mgtcol=c("#afe5ff"))

menhaden <- data.frame(prey=c("BREVOORTIA",
                              "BREVOORTIA TYRANNUS"),
                         preycol=c("#547aff",
                                   "#547aff"),
                         mgtcol=c("#afe5ff",
                                  "#afe5ff"))

otherflats <- data.frame(prey = c("PLEURONECTIFORMES"),
                         preycol=c("#48beff"),
                         mgtcol=c("#890058"))

preycolcode <- rbind(sandlances,
                     anchovies,
                     herrings,
                     squids,
                     silverhake,
                     butterfish,
                     bluefish,
                     mackerel,
                     scup,
                     weakfish,
                     menhaden,
                     otherflats)

preycols <- preycolcode$preycol
names(preycols) <- as.factor(preycolcode$prey)

mgtcols <- preycolcode$mgtcol
names(mgtcols) <- as.factor(preycolcode$prey)

Dominant species in diet based index. species comp (raw)

bluepyall_stn <- readRDS(here("fhdat/bluepyall_stn.rds"))

 MAB <- data.frame(stratum = c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510),
                      EPU = "MAB")
 
 GB <- data.frame(stratum = c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550),
                  EPU = "GB")
 
 GOM <- data.frame(stratum = c(1220, 1240, 1260:1290, 1360:1400, 3560:3830),
                   EPU = "GOM")
  
EPUlook <- dplyr::bind_rows(MAB, GB, GOM)

foragesppwt <- bluepyall_stn %>%
  dplyr::left_join(EPUlook) %>%
  dplyr::filter(blueprey=="blueprey",
                !is.na(EPU)) %>%
  dplyr::group_by(year, season_ng, EPU, bluepynam) %>%
  dplyr::summarise(pysum = sum(bluepywt))

plotDietCompBar <- function(dat, metric, plotcol, title=NULL){   #defines the name of the function and the requied inputs
  
dat %>% 
  filter(!is.na(.data[[metric]])) %>%          #take out the NA values (literally "not is NA")
  ggplot(aes(x=year, y=.data[[metric]], fill=bluepynam)) +     #year on x axis, %diet comp on y
  geom_bar(width = 1, stat = "identity", position="fill") +    #stacked bar chart
  scale_fill_manual(values=plotcol) +         #custom colors
  ecodata::theme_facet() +                    #simplify
  facet_grid(fct_relevel(EPU,  "GOM", "GB", "MAB")~
               fct_relevel(season_ng, "SPRING", "FALL")) + #separate by ordered epu and season
  labs(y = "% composition",              #add sensible labels
       title = title) 
    
}   

addSmallLegend <- function(myPlot, pointSize = 2, textSize = 6, spaceLegend = 0.5) {
    myPlot +
        guides(shape = guide_legend(override.aes = list(size = pointSize)),
               color = guide_legend(override.aes = list(size = pointSize)),
               fill = guide_legend(ncol = 1)) +
        theme(legend.title = element_text(size = textSize), 
              legend.text  = element_text(size = textSize),
              legend.key.size = unit(spaceLegend, "lines"))
}

pall <- plotDietCompBar(dat=foragesppwt, 
                          metric="pysum", 
                        plotcol=preycols,
                          title="Diet composition")  

addSmallLegend(pall)

Dominant species in survey forage index

load(here("survdat/Survdat.Rdata")) # from repo above, can't pull directly

pulldate <- survey$pullDate
functioncall <- survey$functionCall
survdat <- survey$survdat

#Aggregate species----
#Grab species list
#load(here('data_raw/SOE_species_list.RData'))
species <- as.data.table(species_groupings_forage)

#Merge to get species group and fed managed status
survdat <- merge(survdat, unique(species[, list(SVSPP, SCINAME, ForageIndex)]), 
                 by = 'SVSPP', all.x = T)

foragesurv <- survdat %>%
  tibble::as.tibble() %>%
  dplyr::rename_with(tolower, .cols = everything()) %>%
  dplyr::mutate(stratum = as.integer(stratum)) %>%
  dplyr::left_join(EPUlook) %>%
  dplyr::filter(!is.na(forageindex),
                !is.na(EPU)) %>%
  dplyr::group_by(year, season, EPU, sciname) %>%
  dplyr::summarise(pysum = sum(biomass)) %>%
  dplyr::rename(season_ng = season, bluepynam = sciname)

psurv <- plotDietCompBar(dat=foragesurv, 
                          metric="pysum", 
                          plotcol=preycols,
                          title="Survey composition")  

addSmallLegend(psurv)

Managed (light blue) or unmanaged (burgundy)? Diet based forage

pall <- plotDietCompBar(dat=foragesppwt, 
                          metric="pysum", 
                        plotcol=mgtcols,
                          title="Diet composition")  

addSmallLegend(pall)

Managed (light blue) or unmanaged (burgundy)? Survey based forage

psurv <- plotDietCompBar(dat=foragesurv, 
                          metric="pysum", 
                          plotcol=mgtcols,
                          title="Survey composition")  

addSmallLegend(psurv)

So we see a higher proportion of unmanaged forage species in the diet based raw data than we do the survey based raw data. As hypothesized.

Also, silver hake dominate GOM spring in both datasets.

Composition comparison, aka rabbithole

This is still all raw data so probably too much detail for naught. The NMDS plots show what we can see from the time series of composition: spring is different from fall and the three regions are different. MAB is more different from GB and GOM than they are from each other but GB and GOM still separate. Fall diet forage plotted below. The NMDS with 2 dimensions isn’t great since stress is > 0.2 unless I only look at fall as below. Still borderline at 0.19.

dietforagemat <- foragesppwt %>%
  mutate(source = "diet") %>%
  filter(season_ng == "FALL") %>%
  pivot_wider(names_from = bluepynam, values_from = pysum, names_sort = TRUE) 
  

survforagemat <- foragesurv %>%
  mutate(source = "surv") %>%
  pivot_wider(names_from = bluepynam, values_from = pysum, names_sort = TRUE) 
  
comdiet <- dietforagemat[,5:length(dietforagemat)] %>% replace(is.na(.), 0)
envdiet <- dietforagemat[,1:4]

library(vegan)
#convert com to a matrix
m_com = as.matrix(comdiet)

#nmds code
set.seed(123)
nmds = metaMDS(m_com, distance = "bray", k=2)
nmds

stressplot(nmds)

plot(nmds)

en = envfit(nmds, envdiet, permutations = 999, na.rm = TRUE)

en

plot(nmds)
plot(en)

data.scores = as.data.frame(scores(nmds)$sites)
data.scores$season = dietforagemat$season_ng
data.scores$region = dietforagemat$EPU

gg = ggplot(data = data.scores, aes(x = NMDS1, y = NMDS2)) + 
     geom_point(data = data.scores, aes(colour = region), size = 3, alpha = 0.5) + 
     scale_colour_manual(values = c("green", "orange", "steelblue")) 

gg + ggtitle("Fall diet based forage composition NMDS, points are years")

Looking at survey data we find similar patterns, but stress is better for Fall only with 2 dimensions, 0.14, and separation by region is even more pronounced.

survforagemat <- foragesurv %>%
  mutate(source = "surv") %>%
  filter(season_ng == "FALL") %>%
  pivot_wider(names_from = bluepynam, values_from = pysum, names_sort = TRUE) 
  
comsurv <- survforagemat[,5:length(survforagemat)] %>% replace(is.na(.), 0)
envsurv <- survforagemat[,1:4]

library(vegan)
#convert com to a matrix
m_com = as.matrix(comsurv)

#nmds code
set.seed(123)
nmds = metaMDS(m_com, distance = "bray", k=2)
nmds

stressplot(nmds)

plot(nmds)

en = envfit(nmds, envsurv, permutations = 999, na.rm = TRUE)

en

plot(nmds)
plot(en)

data.scores = as.data.frame(scores(nmds)$sites)
data.scores$season = survforagemat$season_ng
data.scores$region = survforagemat$EPU

gg = ggplot(data = data.scores, aes(x = NMDS1, y = NMDS2)) + 
     geom_point(data = data.scores, aes(colour = region), size = 3, alpha = 0.5) + 
     scale_colour_manual(values = c("green", "orange", "steelblue")) 

gg + ggtitle("Fall survey based forage composition NMDS, points are years")

To compare survey and diet derived compositions (the goal) I need to standardize the species.