PiscPESC from Scott

This is Scott’s code that runs the Predator Expanded Stomach Contents (PESC) VAST model.

# remotes::install_github("James-Thorson-NOAA/FishStatsUtils")
# remotes::install_github("James-Thorson-NOAA/VAST")

library(TMB)

## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.3.4
## Current Matrix version is 1.3.2
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package

library(VAST)

## ###########################################################################################

## Loading package VAST version 3.8.2

## For information and examples, please see http://github.com/james-thorson/VAST/

## ###########################################################################################

## Loading package `FishStatsUtils` version 2.10.2

## Loading required package: units

## udunits database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits/udunits2.xml

library(FishStatsUtils)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

packageVersion("VAST")

## [1] '3.8.2'

##3.8.2
packageVersion("FishStatsUtils")

## [1] '2.10.2'

##2.10.2

Scott’s complete code below. Compare the predator guilds here with the ecodata predator guilds–they should be the same!

# 1 Prepare data -----------------------------------------------------------

### Fish community diet paper: Garrison and Link 2000 (https://www.int-res.com/articles/meps/202/m202p231.pdf)

# ## NOTE: On my W10 machine, I needed to make sure Rtools is found in the PATH... every time.
# Sys.setenv(PATH = paste("C:/Rtools/bin", Sys.getenv("PATH"), sep=";"))
# Sys.setenv(BINPREF = "C:/Rtools/mingw_$(WIN)/bin/")

## 1.1 Download data -----------------------------------------------------------
load(url("https://github.com/Laurels1/Condition/raw/master/data/allfh.RData"))
# survdat <- readRDS(url("https://github.com/Laurels1/Condition/raw/master/data/NEFSC_survey_data_02-13-20.rds"))
## I have no idea where I found this file... check with Sean/Andy to see if there is a better survdatBio source
#survdat <- readRDS("analysis/data/derived_data/survdatBio.rds")$survdat
survdat <- readRDS("fhdat/survdatBio.rds")$survdat

## I think this is from Wigley's LW stuff, probably should use survdat (e.g., https://noaa-edab.github.io/survdat/reference/get_length_weight.html)
lw_dat <- read.csv("https://raw.githubusercontent.com/Laurels1/Condition/master/data/lw_parameters_Condition.csv",
                   stringsAsFactors = FALSE)

# guild_dat <- read.csv(file = "analysis/data/derived_data/predator_guilds.csv", stringsAsFactors = FALSE)
# prey_dat <- read.csv(file = "analysis/data/derived_data/prey_categories.csv", stringsAsFactors = FALSE)
# guild_diet <- read.csv(file = "analysis/data/derived_data/guild_diets.csv", stringsAsFactors = FALSE)

guild_dat <- read.csv("https://raw.githubusercontent.com/NOAA-EDAB/neusvast/master/analysis/data/derived_data/predator_guilds.csv?token=AB6LGDWTW4MZVH63YYUWNPDBS2XPC")
prey_dat <- read.csv("https://raw.githubusercontent.com/NOAA-EDAB/neusvast/master/analysis/data/derived_data/prey_categories.csv?token=AB6LGDRDO5TWRVDZ7RBE7RLBS2XQ2")
guild_diet <- read.csv("https://raw.githubusercontent.com/NOAA-EDAB/neusvast/master/analysis/data/derived_data/guild_diets.csv?token=AB6LGDS662FGTPK3VOFR37LBS2XMQ")

## Clean up the input data and reorganize so min and max length per guild are meaningful
### Note -- I removed the size groupings for each species within a feeding guild (e.g., if S and M cod are in the same guild, the range of S & M is reported)
guild_clean <- guild_dat %>%
  mutate(comnam = tolower(comnam),
         spp = tolower(spp),
         guild = gsub(" ", "_", guild),
         guild = gsub("/", "-and-", guild),
         max_val = ifelse(is.na(max_val) & grepl(pattern = ">", min_val),
                          Inf,
                          max_val),
         max_val = as.numeric(max_val),
         min_val = as.numeric(gsub(">", "", min_val))) %>%
  group_by(comnam, guild, spp) %>%
  summarize(min_val = min(min_val),
            max_val = max(max_val)) %>%
  arrange(guild)

## Select all of the predator spp from allfh to add svspp number to guild names
allfh_spp <- allfh %>%
  select(pdscinam, pdcomnam, svspp) %>%
  mutate(pdscinam = tolower(pdscinam),
         pdcomnam = tolower(pdcomnam)) %>%
  distinct()

## Look-up for all prey names/categories
prey_cat <- allfh %>%
  select(pynam, pycomnam2, gencat, gensci, analcat, analsci, collcat) %>%
  distinct()

## grab distinct predator spp and join with svspp
guild_spp <- guild_clean %>%
  select(spp, comnam) %>%
  distinct() %>%
  left_join(allfh_spp, by = c("spp" = "pdscinam", "comnam" = "pdcomnam"))

## add svspp back onto the guild data
guild_len <- guild_clean %>%
  left_join(guild_spp)


## Here's the spot to start iterating over guilds if one were so inclined
fish_guild <- c("amphipod-and-shrimp", "benthivore", "crab_eater", "piscivore", "planktivore", "shrimp-and-small_fish")[4]


working_dir <- here::here(sprintf("PESC/pesc_%s/", fish_guild))
plot_dir <- here::here(sprintf("PESC/pesc_%s//", fish_guild))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

if(!dir.exists(plot_dir)) {
  dir.create(plot_dir)
}

## 1.2 Length-weight parameters--------------------------------------------------

### Clean up LW to get a and b for each svspp
lw_par <-  lw_dat %>%
  tidyr::pivot_longer(cols = c(-SPECIES_GROUP_ID, -LW_SVSPP,
                               -SPECIES, -SEXMF),
                      names_to = "term",
                      values_to = "value") %>%
  mutate(season = case_when(grepl("SEASONLESS", term) ~ "all",
                            grepl("IND_WEIGHT_TOLERANCE$", term) ~ "all",
                            grepl("_SPRING$", term) ~ "spring",
                            grepl("_FALL$", term) ~ "fall",
                            TRUE ~ NA_character_),
         term = gsub("SEASONLESS_|_FALL$|_SPRING$", "", term)) %>%
  dplyr::filter(!term %in% c("IND_WEIGHT_TOLERANCE", "CONTAINER_WEIGHT_TOLERANCE")) %>%
  tidyr::pivot_wider(names_from = term, values_from = value) %>%
  mutate(SEXMF = ifelse(SEXMF %in% c("M", "F"),
                        SEXMF,
                        NA_character_)) %>%
  filter(is.na(SEXMF),
         season == "all") %>%
  select(svspp = LW_SVSPP,
         lw_b = EXPONENT,
         lw_a = COEFFICIENT)

### add lw parameters for each species
guild_lw <- guild_spp %>%
  left_join(lw_par) %>%
  left_join(guild_len)


# 1.3 Survey data ---------------------------------------------

surv_raw <- survdat %>%
  filter(!is.na(LON),
         !is.na(LAT),
         SEASON %in% c("SPRING", "FALL"),
         YEAR >= 1973,
         YEAR <= 2018) %>%
  mutate(YEAR = ifelse(is.na(YEAR),
                       gsub("\\d{2}$", "", CRUISE6),
                       YEAR),
         YEAR = as.numeric(YEAR),
         id = paste0(CRUISE6, "_", STATION))

## total biomass of functional group per haul, if INDWT is missing, infill from LW relationship
### Note -- i'm not sure if it's a good idea to add missing weights using lengths. I did it, though.
pred_raw <- surv_raw %>%
  left_join(guild_lw, by = c("SVSPP" = "svspp")) %>%
  filter(LENGTH >= min_val,
         LENGTH <= max_val) %>%
  mutate(marker = ifelse(is.na(INDWT),
                         "LW",
                         "measured"),
         Catch_KG = ifelse(!is.na(INDWT),
                           INDWT,
                           (exp(lw_a))*LENGTH**lw_b)) %>%
  select(id,
         Year = YEAR,
         Lat = LAT,
         Lon = LON,
         guild,
         Catch_KG) %>%
  group_by(id, Year, guild) %>%
  summarize(Catch_KG = sum(Catch_KG))

# 1.4 Functional group diet data -----------------------------------------------

## Filter out only good stomachs for strata and species, add predator functional group LW info, guild diet categories,
## and if predator weight isn't available use LW relationships
### Note -- Again, i'm not sure if it's a good idea to add missing weights using lengths. I did it, though.
prey_raw <- allfh %>%
  filter(pynam != 'BLOWN',
         pynam != 'PRESERVED',
         pynam != ' ',
         purcode == 10) %>%
  left_join(guild_lw, by = c("svspp" = "svspp")) %>%
  filter(pdlen >= min_val,
         pdlen <= max_val) %>%
  right_join(guild_diet, by = c("guild", "collcat" = "prey_taxa"))

  ## The Garrison and Link guild diets categories might be too much for VAST, as a proof of concept, I'll simplify
  ## the piscivores... this is kinda random and should be revisited

prey_group <- prey_raw %>%
  mutate(collcat = case_when(guild == "piscivore" & grepl("^URO|^MERB", collcat) ~ "hakes",
                             guild == "piscivore" & grepl("^CLU|^AMMO|^ENG|^PEP|^SCO", collcat) ~ "small_silvers",
                             guild == "piscivore" & grepl("^LOL|^ILL|^CEPH", collcat) ~ "cephalopods", ## Quite a few missing years for illex and loligo, so I lumped together
                             # guild == "piscivore" & grepl("^ILL", collcat) ~ "illex",
                             guild == "piscivore" & grepl("^CAN|^COP|^PAN|^POLY", collcat) ~ "other_inverts",
                             guild == "piscivore" & grepl("^GAD|^OTH|^PLE|^UNID", collcat) ~ "other_fish",
                             guild == "piscivore" & grepl("^AR", collcat) ~ "other_unid",
                             TRUE ~ collcat)) %>%
  mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         pyamtw = pyamtw/1000,
         pdwgt = ifelse(pdwgt <= 0,
                        NA,
                        pdwgt/1000),
         pd_kg = ifelse(!is.na(pdwgt),
                        pdwgt,
                        (exp(lw_a))*pdlen**lw_b)) %>%
  select(Year = year,
         id,
         guild,
         spp = collcat,
         pyamtw,
         pd_kg)

## calculate prey biomass per predator biomass
prey_bm <- prey_group %>%
  group_by(Year, id, spp, guild) %>%
  summarize(sum_pyamtw = sum(pyamtw),
         sum_pd_kg = sum(pd_kg),
         Catch_KG = sum_pyamtw/sum_pd_kg) %>%
  ungroup() %>%
  select(id,
         Year,
         guild,
         Catch_KG,
         spp) %>%
  distinct(.keep_all = TRUE) %>%
  filter(!is.na(spp))

## Calculate the mean biomass for each size_class and haul, which becomes the area swept
prey_swept <- prey_group %>%
  select(-spp, -pyamtw) %>%
  group_by(id, Year, guild) %>%
  mutate(AreaSwept_km2 = mean(pd_kg)) %>%
  select(-pd_kg) %>%
  distinct(.keep_all = TRUE)


# 1.5 Station data --------------------------------------------------------

## Select all stations and expand to the prey and size groups
prey_station <- prey_bm %>%
  select(-Catch_KG, -spp, -guild) %>%
  distinct(.keep_all = TRUE) %>%
  group_by(id) %>%
  rowwise() %>%
  mutate(guild = list(unique(prey_bm$guild)),
         spp = list(unique(prey_bm$spp))) %>%
  tidyr::unnest(guild) %>%
  tidyr::unnest(spp)

# prey_station <- prey_raw %>%
#   select(-Catch_KG, -spp, -size_class) %>%
#   distinct(.keep_all = TRUE) %>%
#   group_by(id) %>%
#   rowwise() %>%
#   mutate(size_class = list(c("small", "medium", "large")),
#          spp = list(unique(prey_raw$spp))) %>%
#   tidyr::unnest(size_class) %>%
#   tidyr::unnest(spp)

## Select all stations and expand to the prey groups of interest
station_dat <- surv_raw %>%
  dplyr::select(id,
                Year = YEAR,
                Lat = LAT,
                Lon = LON) %>%
  distinct(.keep_all = TRUE)

# 1.6 Join data -----------------------------------------------------------

# prey_dat <- station_dat %>%
#   left_join(prey_raw, by = c("id", "Year", "size_class", "spp")) %>%
#   mutate(Catch_KG = ifelse(is.na(Catch_KG),
#                            0,
#                            Catch_KG))
# prey_dat <- prey_raw %>%
#   right_join(prey_station) %>%
#   right_join(station_dat) %>%
#   group_by(id, size_class) %>%
#   filter(!all(is.na(Catch_KG))) %>%  # gets rid of hauls where all size_class are NA
#   filter(size_class == "medium") %>%
#   select(-size_class) %>%
#   mutate(Catch_KG = ifelse(is.na(Catch_KG),
#                            0,
#                            Catch_KG))

# head(prey_bm) ## 15107 - catch KG for each spp and size_class
# head(prey_station) ## 349284
# head(station_dat) ##39584
# head(prey_swept) ## 18196 - area swept for each size_class

# prey_dat <- prey_bm %>%
#   right_join(prey_station) %>%
#   right_join(station_dat) %>%
#   right_join(prey_swept) %>%
#   filter(size_class == "medium") %>%
#   select(-size_class) %>%
#   mutate(Catch_KG = ifelse(is.na(Catch_KG),
#                            0,
#                            Catch_KG))

prey_station <- station_dat %>% ##1543776
  # distinct(.keep_all = TRUE) %>%
  group_by(id) %>%
  rowwise() %>%
  mutate(guild = list(unique(prey_bm$guild)),
         spp = list(unique(prey_bm$spp))) %>%
  tidyr::unnest(guild) %>%
  tidyr::unnest(spp)

prey_dat <- prey_station %>%
  left_join(prey_bm) %>%
  left_join(prey_swept) %>%
  # filter(!is.na(AreaSwept_km2)) %>%
  filter(guild == "piscivore",
         spp %in% unique(prey_group$spp[prey_group$guild == "piscivore"])) %>%
  select(-guild) %>%
  mutate(Catch_KG = ifelse(is.na(Catch_KG),
                           0,
                           Catch_KG)) %>%
  ungroup()

### need to
missing_c_t <- prey_dat %>%
  group_by(Year, spp) %>%
  summarize(zero_cases = sum(Catch_KG != 0)) %>%
  filter(zero_cases == 0)

ggplot(missing_c_t, aes(x = Year, y = zero_cases))+
  geom_point() +
  facet_wrap(~spp)


# tb <- prey_dat %>%
#   group_by(Year, spp) %>%
#   mutate(new_kg = ifelse(all(Catch_KG == 0),
#                          NA,
#                          Catch_KG),
#          another_try = case_when(is.na(new_kg) ~ NA_real_,
#                               TRUE ~ Catch_KG))
#
#
# td <- tb %>%
#   group_by(Year, spp) %>%
#   summarize(sum2 = sum(is.na(new_kg))/n())

pred_dat <- pred_raw %>%
  filter(guild == "piscivore") %>%
  # dplyr::mutate(spp = "piscivore") %>%
  dplyr::right_join(station_dat) %>%
  dplyr::mutate(spp = "piscivore",
    # spp = ifelse(is.na(spp),
    #                          "piscivore",
    #                          spp),
                Catch_KG = ifelse(is.na(Catch_KG),
                                  0,
                                  Catch_KG),
                AreaSwept_km2 = 0.0384) %>%
  select(-guild) %>%
  ungroup()

all_dat <- bind_rows(pred_dat,
                     prey_dat) %>%
  distinct(.keep_all = TRUE) %>%
  mutate(spp = factor(spp, levels = c("piscivore", unique(prey_dat$spp)))) %>%
  group_by(Year, spp) %>%
  mutate(new_kg = ifelse(all(Catch_KG == 0),
                  NA,
                  Catch_KG),
         Catch_KG = case_when(is.na(new_kg) ~ NA_real_,
                        TRUE ~ Catch_KG),
         AreaSwept_km2 = 0.0384) %>%
  ungroup() %>%
  select(-id, -new_kg) %>%
  distinct() %>%
  data.frame(stringsAsFactors = FALSE)

# ggplot(data = all_dat, aes(x = Lon, y = Lat, color = spp, size = log(Catch_KG + 1)), alpha = 0.2)+
#   facet_wrap(~Year) +
#   geom_point()

######## Make settings
settings = make_settings(n_x = 50,
                         # bias.correct = FALSE,
                         Region = "northwest_atlantic",
                         purpose = "index2",
                         strata.limits = list('All_areas' = 1:1e5)) #list('Georges_Bank' = strata_limits))

######## Change settings from defaults
settings$ObsModel = c( 2, 1 ) #make_data()
settings$Options[2:4] = FALSE
settings$use_anisotropy = FALSE
settings$fine_scale = TRUE
# settings$FieldConfig = c("Omega1"="IID", "Epsilon1"="IID", "Omega2"="IID", "Epsilon2"="IID")

## Expansion_cz should look like this (first row of pred, 0,0, and the prey spp levels should be 1,0 )
Expansion_cz <- matrix(c(0, 0, rep(c(1,0), nlevels(all_dat$spp) - 1)),
                       ncol = 2,
                       byrow = TRUE)

# Expansion_cz = matrix(c(0, 1, 1, 1, 1, 1, 1,
#                         1, 1, 1, 1, 1, 1, 1,
#                         0, 0, 0, 0, 0, 0, 0,
#                         0, 0, 0, 0, 0, 0, 0), nrow = nlevels(all_dat$spp), ncol = 2)

# #       [,1]  [,2]
# # [1,]    0     0
# # [2,]    1     0
# # [3,]    1     0
# # [4,]    1     0
# # [5,]    1     0
# # [6,]    1     0
# # [7,]    1     0
# # [8,]    1     0


######## Run model
fit = fit_model( "settings" = settings,
                 # "Lat_i"=all_dat[,'Y'],
                 # "Lon_i"=all_dat[,'X'],
                 "Lat_i" = all_dat[,'Lat'],
                 "Lon_i" = all_dat[,'Lon'],
                 "t_i" = all_dat[,'Year'],
                 "c_i" = as.numeric(all_dat[,"spp"])-1,
                 "b_i" = as_units(all_dat[,'Catch_KG'], "kg"),
                 "a_i" = as_units(all_dat[,'AreaSwept_km2'], "km^2"),
                 "Expansion_cz" = Expansion_cz,
                 working_dir = paste0(working_dir, "/"),
                 # "input_grid"=example$input_grid,
                 "knot_method"="grid", "Npool"=20, "newtonsteps"=1, "getsd"=TRUE, test_fit=FALSE)

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))
saveRDS(all_dat, file = paste0(working_dir, "/all_dat.rds"))

Started this at 3:20 pm Nov 18 on my (new) laptop. Finished the run at 11:59 pm Nov 18.

Output messages:

Bias correcting 322 derived quantities ######################### The model is likely not converged #########################

PiscPESC from Scott

Sarah Gaichas

11/18/2021