Having built an initial dataset of haul-specific mean bluefish prey weight per stomach for the updated piscivore list from both NEFSC and NEAMAP, now we’ll run a few configurations to see if spatial and spatio-temporal random effects make sense to estimate the prey index in VAST.
Following what Ng et al. (2021) did for herring, we apply a Poisson-link delta model to estimate expected prey mass per predator stomach. However, we use 500 knots, estimated by k-means clustering of the data, to define the spatial dimensions of each seasonal model.
Model selection follows Ng et al. (2021) using REML.
I updated the script each time for index standardization model, using 500 knots to speed things up (run from script VASTunivariate__bfp_allsurvs_modsel.R
). This is the most recent version:
# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization
# Load packages
library(here)
library(dplyr)
library(VAST)
#Read in data, separate spring and fall, and rename columns for VAST:
# this dataset created in UpdateVAST_Inputs.Rmd
bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
# filter to assessment years at Tony's suggestion
# code Vessel as AL=0, HB=1, NEAMAP=2
bluepyagg_stn_fall <- bluepyagg_stn %>%
#ungroup() %>%
filter(season_ng == "FALL",
year > 1984) %>%
mutate(AreaSwept_km2 = 1, #Elizabeth's code
#declon = -declon already done before neamap merge
Vessel = as.numeric(as.factor(vessel))-1
) %>%
select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon) %>%
na.omit() %>%
as.data.frame()
bluepyagg_stn_spring <- bluepyagg_stn %>%
filter(season_ng == "SPRING",
year > 1984) %>%
mutate(AreaSwept_km2 =1, #Elizabeth's code
#declon = -declon already done before neamap merge
Vessel = as.numeric(as.factor(vessel))-1
) %>%
select(Catch_g = meanbluepywt,
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon)%>%
na.omit() %>%
as.data.frame()
# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302
# use only MAB, GB, GOM, SS EPUs
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE
MABGBGOM <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>%
select(MAB_GB_GOM = stratum_number) %>%
distinct()
MABGBGOMSS <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
"Scotian_Shelf")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
MABGB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
GB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Georges_Bank")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
MAB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
# configs
FieldConfig <- c(
"Omega1" = 0, # number of spatial variation factors (0, 1, AR1)
"Epsilon1" = 0, # number of spatio-temporal factors
"Omega2" = 0,
"Epsilon2" = 0
)
# Model selection options and naming:
# Season_knots + suffix below
# FieldConfig default (all IID), comment out FieldConfig below
# _noaniso FieldConfig default (all IID) and use_anistropy = FALSE
# _noomeps2 FieldConfig 0 for Omega2, Epsilon2
# _noomeps2_noaniso FieldConfig 0 for Omega2, Epsilon2 and use_anistropy = FALSE
# _noomeps2_noeps1 FieldConfig 0 for Omega2, Epsilon2, Epsilon1
# _noomeps2_noeps1_noaniso FieldConfig 0 for Omega2, Epsilon2, Epsilon1 and use_anistropy = FALSE
# _noomeps12 FieldConfig all 0
# _noomeps12_noaniso FieldConfig all 0 and use_anistropy = FALSE
RhoConfig <- c(
"Beta1" = 0, # temporal structure on years (intercepts)
"Beta2" = 0,
"Epsilon1" = 0, # temporal structure on spatio-temporal variation
"Epsilon2" = 0
)
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1
OverdispersionConfig <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight
strata.limits <- as.list(MABGBGOMSS)
settings = make_settings( n_x = 500,
Region = "northwest_atlantic",
#strata.limits = list('All_areas' = 1:1e5), full area
strata.limits = strata.limits,
purpose = "index2",
bias.correct = FALSE,
use_anisotropy = FALSE,
#fine_scale = FALSE,
FieldConfig = FieldConfig,
#RhoConfig = RhoConfig,
#OverdispersionConfig = OverdispersionConfig
)
# select dataset and set directory for output
#########################################################
# Run model fall
season <- c("fall_500_noomeps12_noaniso")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
fit <- fit_model(
settings = settings,
Lat_i = bluepyagg_stn_fall$Lat,
Lon_i = bluepyagg_stn_fall$Lon,
t_i = bluepyagg_stn_fall$Year,
b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
a_i = rep(1, nrow(bluepyagg_stn_fall)),
v_i = bluepyagg_stn_fall$Vessel,
Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
######################################################
# Run model spring
season <- c("spring_500_noomeps12_noaniso")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
fit = fit_model( settings = settings,
Lat_i = bluepyagg_stn_spring[,'Lat'],
Lon_i = bluepyagg_stn_spring[,'Lon'],
t_i = bluepyagg_stn_spring[,'Year'],
b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'),
a_i = as_units(bluepyagg_stn_spring[,'AreaSwept_km2'], 'km^2'),
v_i = bluepyagg_stn_spring$Vessel,
Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
We’ll scrape the settings and parameter estimates from each folder to compare AICs:
# from each output folder in pyindex,
outdir <- here("pyindex")
moddirs <- list.dirs(outdir)
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)
# function to apply extracting info
getmodinfo <- function(d.name){
# read settings
modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
modname <- modpath[length(modpath)]
settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
fill = TRUE, header = FALSE)
n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
#FieldConfig
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
}
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
beta1 <- "IID"
beta2 <- "IID"
}
#RhoConfig
rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
# read parameter estimates, object is called parameter_Estimates
load(file.path(d.name, "parameter_estimates.RData"))
AIC <- parameter_estimates$AIC[1]
converged <- parameter_estimates$Convergence_check[1]
fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
# return model atributes as a dataframe
out <- data.frame(modname = modname,
n_x = n_x,
grid_size_km = grid_size_km,
max_cells = max_cells,
use_anisotropy = use_anisotropy,
fine_scale = fine_scale,
bias.correct = bias.correct,
omega1 = omega1,
omega2 = omega2,
epsilon1 = epsilon1,
epsilon2 = epsilon2,
beta1 = beta1,
beta2 = beta2,
rho_epsilon1 = rho_epsilon1,
rho_epsilon2 = rho_epsilon2,
rho_beta1 = rho_beta1,
rho_beta2 = rho_beta2,
AIC = AIC,
converged = converged,
fixedcoeff = fixedcoeff,
randomcoeff = randomcoeff
)
return(out)
}
# combine into one table for comparison
modselect <- purrr::map_dfr(moddirs, getmodinfo)
Now compare the AIC for the 500 knot models to see if including the spatial and spatio-temporal random effects in the first and second linear predictors improved the model fit. Model structures tested include with and without anistropy (2 fixed parameters), and with and without spatial and spatio-temporal random effects in the second linear predictor or both linear predictors. This follows the model selection process outlined in Ng et al. (2021).
NEAMAP is now included in the analysis. We have only NEAMAP data for 2020-2021 in Fall and for 2021 in Spring.
# only compare AIC for the 500 knot models
modselect.500 <- modselect %>%
filter(n_x == 500) %>%
mutate(season = case_when(str_detect(modname, "fall") ~ "Fall",
str_detect(modname, "spring") ~ "Spring",
TRUE ~ as.character(NA))) %>%
mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
str_detect(converged, "is likely not") ~ "unlikely",
TRUE ~ as.character(NA))) %>%
group_by(season) %>%
mutate(deltaAIC = AIC-min(AIC)) %>%
select(modname, season, deltaAIC, fixedcoeff,
randomcoeff, use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2, AIC, converged2) %>%
arrange(AIC)
DT::datatable(modselect.500, rownames = FALSE,
options= list(pageLength = 25, scrollX = TRUE))
All of the models converged. Similar to the preliminary run with only NEFSC diet data and the previous predator list, models with anistopy, spatial and spatio-temporal random effects are most supported by the data.
Note that these do not have catchability coefficients applied yet, so are not final models.
Fall Index
Spring Index
Fall ln-density
All results in the respective pyindex/allagg_fall_500 and allagg_spring_500 folders.
The full results are on google drive rather than github to save space.