Working group decision after the February 2022 Data Meeting was to update the predator list based on the most recent diet similarity matrix by Smith posted on the NEFSC shiny app, which results in a new Piscivore guild relative to our initial models based on Garrison and Link (2000).
We investigated alternative cluster algorithms here.
Input NEFSC food habits overlap matrix:
dietoverlap <- read_csv(here("datfromshiny/tgmat.2022-02-15.csv"))Get NEFSC food habits data (as of October 8 2022 contains only 1973-2020 data, check years):
# object is called `allfh`
#load(url("https://github.com/Laurels1/Condition/raw/master/data/allfh.RData"))
load(here("fhdat/allfh.Rdata"))
# as of October 8 2022 contains only 1973-2020 data
unique(allfh$year)##  [1] 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987
## [16] 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
## [31] 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012   NA 2013 2014 2015 2016
## [46] 2017 2018 2019 2020Load 2021 NEFSC food habits data (October 8 2022, check years):
#object is called allfh21
load(here("fhdat/allfh21.Rdata"))
# check years
unique(allfh21$year)## [1] 2021Make new NEFSC dataset 1973-2021:
allfh <- allfh %>%
  dplyr::bind_rows(allfh21)Generate the Piscivore list based on clustering with the “complete” algorithm. Identify which species cluster with all 3 sizes of bluefish:
# follows example here https://cran.r-project.org/web/packages/dendextend/vignettes/Cluster_Analysis.html
library(dendextend)
d_dietoverlap <- dist(dietoverlap)
guilds <- hclust(d_dietoverlap)
#plot(guilds)
dend <- as.dendrogram(guilds)
dend <- rotate(dend, 1:136)
dend <- color_branches(dend, k=6) # Brian uses 6 categories
labels(dend) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dend)],
                           "(",labels(dend),")", 
                           sep = "")
dend <- hang.dendrogram(dend,hang_height=0.1)
# reduce the size of the labels:
# dend <- assign_values_to_leaves_nodePar(dend, 0.5, "lab.cex")
dend <- set(dend, "labels_cex", 0.5)
# And plot:
par(mar = c(3,3,3,7))
plot(dend, 
     main = "Clustered NEFSC diet data, (complete)
     (the labels give the predator species/size)", 
     horiz =  TRUE,  nodePar = list(cex = .007))#legend("topleft", legend = iris_species, fill = rainbow_hcl(3))This is the list of predators and sizes:
# list of species in node with all three bluefish sizes
pisccomplete <- partition_leaves(dend)[[
  which_node(dend, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))
]]
pisccomplete##  [1] "Striped bass..M(106)"           "Sea raven..L(89)"              
##  [3] "Striped bass..L(105)"           "Bluefish..S(37)"               
##  [5] "Weakfish..M(117)"               "Fourspot flounder..L(50)"      
##  [7] "Northern shortfin squid..M(71)" "Longfin squid..M(63)"          
##  [9] "Longfin squid..S(64)"           "Northern shortfin squid..S(72)"
## [11] "Pollock..L(78)"                 "White hake..M(120)"            
## [13] "Red hake..L(82)"                "Silver hake..M(93)"            
## [15] "Atlantic halibut..M(17)"        "Pollock..XL(81)"               
## [17] "White hake..L(119)"             "Silver hake..L(92)"            
## [19] "Cusk..L(45)"                    "Goosefish..XL(56)"             
## [21] "Bluefish..L(35)"                "Goosefish..L(53)"              
## [23] "Goosefish..M(54)"               "Goosefish..S(55)"              
## [25] "Spiny dogfish..M(100)"          "Thorny skate..XL(116)"         
## [27] "Atlantic halibut..L(16)"        "Sea raven..M(90)"              
## [29] "Sea raven..S(91)"               "Atlantic cod..XL(13)"          
## [31] "Spiny dogfish..L(99)"           "Spotted hake..M(103)"          
## [33] "Summer flounder..M(111)"        "Summer flounder..L(110)"       
## [35] "Bluefish..M(36)"                "Buckler dory..M(38)"First filter to the predators and identify the bluefish prey (new prey list 2023):
pisccompletedf <- data.frame("COMNAME" = toupper(str_remove(pisccomplete, "\\..*")),
                              "SizeCat" = str_remove(str_extract(pisccomplete, "\\..*[:upper:]+"), "\\.."),
                              "feedguild" = "pisccomplete")
 
 fh.nefsc.pisc.pisccomplete <- allfh %>%
  #filter(pynam != "EMPTY") %>%
  left_join(pisccompletedf, by = c("pdcomnam" = "COMNAME",
                               "sizecat" = "SizeCat")) %>%
  filter(!is.na(feedguild))
 
preycount  <- fh.nefsc.pisc.pisccomplete %>%
   #group_by(year, season, pdcomnam, pynam) %>%
   group_by(pdcomnam, pynam) %>%
   summarise(count = n()) %>%
   #arrange(desc(count))
   pivot_wider(names_from = pdcomnam, values_from = count)
 
gencomlist <- allfh %>%
  select(pynam, pycomnam2, gencom2) %>%
  distinct()
NEFSCblueprey <- preycount %>%
  #filter(BLUEFISH > 9) %>%
  filter(!pynam %in% c("EMPTY", "BLOWN",
                       "FISH", "OSTEICHTHYES",
                       "ANIMAL REMAINS",
                       "FISH SCALES")) %>%
  #filter(!str_detect(pynam, "SHRIMP|CRAB")) %>%
  left_join(gencomlist) %>%
  filter(!gencom2 %in% c("ARTHROPODA", "ANNELIDA",
                         "CNIDARIA", "UROCHORDATA",
                         "ECHINODERMATA", "WORMS",
                         "BRACHIOPODA", "COMB JELLIES",
                         "BRYOZOA", "SPONGES",
                         "MISCELLANEOUS", "OTHER")) %>%
  arrange(desc(BLUEFISH))
# March 2023, formally add NEAMAP to prey decisions
NEAMAPblueprey <- read.csv(here("fhdat/Full Prey List_Common Names.csv")) %>%
  #filter(BLUEFISH > 9) %>%
  filter(!SCIENTIFIC.NAME %in% c("Actinopterygii", "fish scales",
                                 "Decapoda (megalope)", 
                                 "unidentified material",
                                 "Plantae",
                                 "unidentified animal"))
NEAMAPprey <- NEAMAPblueprey %>%
  dplyr::select(COMMON.NAME, SCIENTIFIC.NAME, BLUEFISH) %>%
  dplyr::filter(!is.na(BLUEFISH)) %>%
  dplyr::mutate(pynam2 = tolower(SCIENTIFIC.NAME),
                pynam2 = stringr::str_replace(pynam2, "spp.", "sp")) %>%
  dplyr::rename(NEAMAP = BLUEFISH)
NEFSCprey <- NEFSCblueprey %>%
  dplyr::select(pycomnam2, pynam, BLUEFISH) %>%
  dplyr::filter(!is.na(BLUEFISH)) %>%
  dplyr::mutate(pynam2 = tolower(pynam)) %>%
  dplyr::rename(NEFSC = BLUEFISH)
# new criteria March 2023, >20 observations NEAMAP+NEFSC, but keep mackerel
# removes the flatfish order (too broad) and unid Urophycis previously in NEAMAP
blueprey <- NEFSCprey %>% 
  dplyr::full_join(NEAMAPprey) %>%
  dplyr::mutate(NEAMAP = ifelse(is.na(NEAMAP), 0, NEAMAP),
                NEFSC = ifelse(is.na(NEFSC), 0, NEFSC),
                total = NEFSC + NEAMAP,
                PREY = ifelse(is.na(SCIENTIFIC.NAME), pynam, SCIENTIFIC.NAME),
                COMMON = ifelse(is.na(COMMON.NAME), pycomnam2, COMMON.NAME),
                pynam = ifelse(is.na(pynam), toupper(pynam2), pynam)) %>%
  dplyr::arrange(desc(total)) %>%
  dplyr::filter(total>20 | pynam=="SCOMBER SCOMBRUS") %>% # >20 leaves out mackerel
  dplyr::mutate(COMMON = case_when(pynam=="ILLEX SP" ~ "Shortfin squids",
                                   pynam2=="teuthida" ~ "Unidentified squids",
                                   TRUE ~ COMMON)) %>%
  dplyr::mutate(PREY = stringr::str_to_sentence(PREY),
                COMMON = stringr::str_to_sentence(COMMON))
fh.nefsc.pisc.pisccomplete.blueprey <- fh.nefsc.pisc.pisccomplete %>%
  mutate(blueprey = case_when(pynam %in% blueprey$pynam ~ "blueprey",
                              TRUE ~ "othprey"))
preystn.pisccomplete <- fh.nefsc.pisc.pisccomplete.blueprey %>%
  group_by(year, season, station) %>%
  count(blueprey) %>%
  pivot_wider(names_from = blueprey, values_from = n) %>%
  filter(year>1984)
#dim(preystn)[1]
bluepreystn.pisccomplete <- preystn.pisccomplete %>% 
  arrange(desc(blueprey)) %>%
  filter(!is.na(blueprey))
#dim(bluepreystn)[1]
flextable::flextable(blueprey[,c('PREY', 'COMMON', 'NEFSC', 'NEAMAP', 'total')]) %>%
  flextable::set_header_labels(PREY = "Prey",
                               COMMON = "Prey common name",
                               total = "Bluefish stomachs (n)") %>%
  flextable::set_caption("Prey identified in bluefish stomachs, NEFSC (1973-2021) and NEAMAP (2007-2021) diet databases.") | Prey | Prey common name | NEFSC | NEAMAP | Bluefish stomachs (n) | 
| Anchoa mitchilli | Bay anchovy | 321 | 1,378 | 1,699 | 
| Peprilus triacanthus | Butterfish | 307 | 356 | 663 | 
| Engraulidae | Anchovy (unidentified) | 408 | 61 | 469 | 
| Anchoa hepsetus | Striped anchovy | 171 | 266 | 437 | 
| Loligo spp. | Loligo squid | 423 | 8 | 431 | 
| Cephalopoda | Unidentified cephalopoda | 262 | 8 | 270 | 
| Ammodytes spp. | Sand lances | 96 | 139 | 235 | 
| Loligo pealeii | Longfin inshore squid | 17 | 171 | 188 | 
| Stenotomus chrysops | Scup | 69 | 108 | 177 | 
| Etrumeus teres | Round herring | 126 | 15 | 141 | 
| Teuthida | Unidentified squids | 0 | 95 | 95 | 
| Merluccius bilinearis | Silver hake | 53 | 10 | 63 | 
| Anchoa spp. | Common anchovies | 7 | 52 | 59 | 
| Clupeidae | Unidentified herring | 30 | 26 | 56 | 
| Pomatomus saltatrix | Bluefish | 22 | 28 | 50 | 
| Brevoortia tyrannus | Atlantic menhaden | 10 | 36 | 46 | 
| Clupea harengus | Atlantic herring | 37 | 4 | 41 | 
| Illex sp | Shortfin squids | 40 | 0 | 40 | 
| Cynoscion regalis | Weakfish | 12 | 26 | 38 | 
| Engraulis eurystole | Silver anchovy | 18 | 8 | 26 | 
| Scomber scombrus | Atlantic mackerel | 14 | 0 | 14 | 
Chunks below here save datasets so these are all set to eval=F
Assign station id, change the months for spring and fall to align with the assessment, and calculate mean bluefish prey per station in NEFSC:
bluepyall_stn <- fh.nefsc.pisc.pisccomplete.blueprey %>%
  #create id linking cruise6_station
  #create season_ng spring and fall Spring=Jan-May, Fall=June-Dec
  mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         month = as.numeric(month),
         season_ng = case_when(month <= 6 ~ "SPRING",
                               month >= 7 ~ "FALL",
                               TRUE ~ as.character(NA))
         ) %>%
  select(year, season_ng, id, stratum,
         pynam, pyamtw, pywgti, pyvoli, blueprey, 
         pdcomnam, pdid, pdlen, pdsvol, pdswgt, 
         beglat, beglon, declat, declon, 
         bottemp, surftemp, setdepth) %>%
  group_by(id) %>%
  #mean blueprey g per stomach per tow: sum all blueprey g/n stomachs in tow
  mutate(bluepywt = case_when(blueprey == "blueprey" ~ pyamtw,
                               TRUE ~ 0.0),
         bluepynam = case_when(blueprey == "blueprey" ~ pynam,
                               TRUE ~ NA_character_)) 
# save at prey disaggregated stage for paper
saveRDS(bluepyall_stn, here("fhdat/bluepyall_stn.rds"))
stndat <- bluepyall_stn %>%
  select(year, season_ng, id, 
         beglat, beglon, declat, declon, 
         bottemp, surftemp, setdepth) %>%
  distinct()
#pisc stomachs in tow count pdid for each pred and sum
piscstom <- bluepyall_stn %>%
  group_by(id, pdcomnam) %>%
  summarise(nstompd = n_distinct(pdid)) %>%
  group_by(id) %>%
  summarise(nstomtot = sum(nstompd))
#mean and var pred length per tow
pisclen <- bluepyall_stn %>%
  summarise(meanpisclen = mean(pdlen),
            varpisclen = var(pdlen))
bluepyagg_stn <- bluepyall_stn %>%
  summarise(sumbluepywt = sum(bluepywt),
            nbluepysp = n_distinct(bluepynam, na.rm = T),
            npreysp = n_distinct(pynam),
            npiscsp = n_distinct(pdcomnam)) %>%
  left_join(piscstom) %>%
  mutate(meanbluepywt = sumbluepywt/nstomtot) %>%
  left_join(pisclen) %>%
  left_join(stndat)
# save at same stage as before, writing over old file
saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn.rds"))
# current dataset, fix declon, add vessel
nefsc_bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds")) %>%
  mutate(declon = -declon,
         vessel = case_when(year<2009 ~ "AL",
                            year>=2009 ~ "HB", 
                            TRUE ~ as.character(NA)))NEAMAP inputs were similarly updated based on this predator size category list. NEAMAP includes the following predators, adding two not captured by the NEFSC survey offshore and leaving out those from NEFSC not captured inshore:
Read in new inputs, align columns, merge datasets, correct single station with wrong data in original dataset, and save:
# new prey list 2023
#neamap_bluepreyagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish PreyWQ2.csv")) %>%
neamap_bluepreyagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish Prey_wWQ2023.csv")) %>%
  mutate(vessel = "NEAMAP") %>%
  rename(id = station,
         sumbluepywt = sumbluepreywt,
         nbluepysp = nbfpreyspp,
         #npreysp = ,
         npiscsp = npiscspp,
         nstomtot = nstomtot, 
         meanbluepywt = meanbluepreywt,
         meanpisclen = meanpisclen.simple, 
         #varpisclen = ,
         season_ng = season,
         declat  = lat,
         declon = lon,
         bottemp = bWT,
         #surftemp = , 
         setdepth = depthm) 
  
# combine  
bluepyagg_stn <-  nefsc_bluepyagg_stn %>%
  bind_rows(neamap_bluepreyagg_stn) 
# check for incorrect NEAMAP station
bluepyagg_stn %>% filter(id == "NM20070901011") # has this station
# if sumbluepywt is 106564.2, this is incorrect
# corrected by Jim Gartland in September 2022
# correct single NEAMAP station if needed
#bluepyagg_stn$sumbluepywt[bluepyagg_stn$id == "NM20070901011"] <- 4.8404
#bluepyagg_stn$meanbluepywt[bluepyagg_stn$id == "NM20070901011"] <- 0.186169231
saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn_all.rds"))Now we take this dataset and add back the OISST data for stations missing surface temperature to apply our catchability covariates in VAST.
New prey list may have new stations so do all this over.
First add dates to NEFSC station data
bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
diethauls <- bluepyagg_stn_all %>%
  dplyr::select(id, declat, declon)
NEFSCstations <- allfh %>%
  dplyr::mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         month = as.numeric(month),
         day = as.numeric(day),
         declon = -declon) %>%
  dplyr::select(id, year, month, day, declat, declon) %>%
  dplyr::distinct()Then add SST to NEAMAP pull
NEAMAPstationSST <- read.csv(here("fhdat/NEAMAP SST_2007_2021.csv"))
NEAMAPstations <- NEAMAPstationSST %>%
  dplyr::mutate(id = station,
         year = as.numeric(year),
         month = as.numeric(month),
         day = as.numeric(day),
         declat = latitude,
         declon = longitude) %>%
  dplyr::select(id, year, month, day, declat, declon) %>%
  dplyr::distinct()Now combine NEAMAP and NEFSC and join with diet stations
Allstations <- bind_rows(NEFSCstations, NEAMAPstations)
diethauls <- left_join(diethauls, Allstations)There are still mismatches, see SSTmethods for details.
Since we know the NEAMAP lat and lon in the original diet dataset are correct, we will merge only the station id number, day, month, year, and surface temperature into the diet dataset to avoid the mismatch with 34 stations. We will also add the SST field as surftemp to be in the same column as in-situ measured temperature for the NEFSC survey.
NEAMAPstations <- NEAMAPstationSST %>%
  dplyr::mutate(id = station,
         year = as.numeric(year),
         month = as.numeric(month),
         day = as.numeric(day)) %>%
  dplyr::select(id, year, month, day) %>%
  dplyr::distinct()
# remake diethauls
diethauls <- bluepyagg_stn_all %>%
  dplyr::select(id, declat, declon)
NEFSCstations <- dplyr::select(NEFSCstations, c(-declat, -declon))
Allstations <- bind_rows(NEFSCstations, NEAMAPstations)
#station id, lat lon, year month day
diethauls <- left_join(diethauls, Allstations)
#add year month day to diet data
bluepyagg_stn_all <- left_join(bluepyagg_stn_all, diethauls)
# add NEAMAP SST to surftemp field
NEAMAPidSST <- NEAMAPstationSST %>%
  mutate(id = station) %>%
  dplyr::select(id, SST)
  
bluepyagg_stn_all <- left_join(bluepyagg_stn_all, NEAMAPidSST, by="id") %>%
  mutate(surftemp = coalesce(surftemp, SST)) %>%
  dplyr::select(-SST)
# save merged dataset with day, month, and NEAMAP surftemp, same name
saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))missing <- bluepyagg_stn_all %>%
  group_by(vessel) %>%
  summarise(missingSST = sum(is.na(surftemp)))Now match stations to OISST
#read in diet data with month day fields
library(sf)
library(raster)
library(terra)
library(nngeo)
bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
stations <- bluepyagg_stn_all %>%
  dplyr::mutate(day = str_pad(day, 2, pad='0'),
                month = str_pad(month, 2, pad='0'),
                yrmody = as.numeric(paste0(year, month, day))) %>%
  dplyr::select(id, declon, declat, year, yrmody) %>%
  na.omit() %>%
  sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE) 
#list of SST dataframes
SSTdfs <- list.files(here("data-raw/gridded/sst_data/"), pattern = "*.rds")
dietstn_OISST <- tibble()
for(df in SSTdfs){
  sstdf <- readRDS(paste0(here("data-raw/gridded/sst_data/", df)))
  
  # keep only bluefish dates in SST year
  stationsyr <- stations %>%
    filter(year == unique(sstdf$year))
  
  # keep only sst days in bluefish dataset
  sstdf_survdays <- sstdf %>%
    dplyr::mutate(yrmody = as.numeric(paste0(year, month, day)) )%>%
    dplyr::filter(yrmody %in% unique(stationsyr$yrmody)) %>%
    dplyr::mutate(year = as.numeric(year),
           month = as.numeric(month),
           day = as.numeric(day),
           declon = Lon,
           declat = Lat) %>%
    dplyr::select(-Lon, -Lat) %>%
    sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE)  
  
  # now join by nearest neighbor and date
    
#https://stackoverflow.com/questions/71959927/spatial-join-two-data-frames-by-nearest-feature-and-date-in-r      
  
yrdietOISST <- do.call('rbind', lapply(split(stationsyr, 1:nrow(stationsyr)), function(x) {
   sf::st_join(x, sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),],
           #join = st_nearest_feature
           join = st_nn, k = 1, progress = FALSE
           )
 }))
  
#   #datatable solution--works but doesnt seem faster?
#    df1 <- data.table(stationsyr)
#   
#  .nearest_samedate <- function(x) {
#    st_join(st_as_sf(x), sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),], join = st_nearest_feature)
#  }
# # 
#  yrdietOISST <- df1[, .nearest_samedate(.SD), by = list(1:nrow(df1))]
dietstn_OISST <- rbind(dietstn_OISST, yrdietOISST)
}
saveRDS(dietstn_OISST, here("data-raw/dietstn_OISST.rds"))Now join OISST with dataset
#read in diet data and station-OISST
bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
dietstn_OISST <- readRDS(here("data-raw/dietstn_OISST.rds"))
dietstn_OISST_merge <- dietstn_OISST %>%
  dplyr::rename(declon = declon.x,
         declat = declat.x,
         year = year.x,
         oisst = sst) %>%
  dplyr::select(id, oisst) %>%
  sf::st_drop_geometry()
bluepyagg_stn_all_OISST <- left_join(bluepyagg_stn_all, dietstn_OISST_merge)
saveRDS(bluepyagg_stn_all_OISST, here("fhdat/bluepyagg_stn_all_OISST.rds"))