After discussion with both the working group and Elizabeth Ng, we will limit the years to assessment years (1985 to present) and limit the areas to those north of Cape Hatteras due to problems with patchy southern sampling.
A key point is ensuring that the - is included with the longitude in the input data!
Having built an initial dataset of haul-specific mean bluefish prey weight per stomach for piscivores, now we’ll run a few configurations to see if spatial and spatio-temporal random effects make sense to estimate the prey index in VAST.
Again following what Ng et al. (2021) did for herring, but with 500 knots, estimated by k-means clustering of the data, to define the spatial dimensions of each seasonal model. We similarly apply a Poisson-link delta model to estimate expected prey mass per predator stomach.
Model selection follows Ng et al. (2021) using REML.
I updated the script each time for index standardization model, using 500 knots to speed things up (run from script VASTunivariate_bluefishprey.R
, not in rmd). This is the most recent version:
# VAST attempt 1 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization
# and https://github.com/NOAA-EDAB/neusvast/blob/master/analysis/models/pisc_pesc.R
# may need to downgrade TMB to VAST Matrix package version?
# see https://github.com/James-Thorson-NOAA/VAST/issues/289
# Load packages
library(here)
library(dplyr)
library(VAST)
#Read in data, separate spring and fall, and rename columns for VAST:
bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds"))
#bluepyagg_stn <- bluepyagg_pred_stn
# filter to assessment years at Tony's suggestion
# try datasets with subset of predators (start with 1)
bluepyagg_stn_fall <- bluepyagg_stn %>%
#ungroup() %>%
filter(season_ng == "FALL",
year > 1984) %>%
mutate(Vessel = "NEFSC",
#AreaSwept_km2 = 0.0384) %>%
AreaSwept_km2 = 1, #Elizabeth's code
declon = -declon) %>%
select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon) %>%
na.omit() %>%
as.data.frame()
bluepyagg_stn_spring <- bluepyagg_stn %>%
filter(season_ng == "SPRING",
year > 1984) %>%
mutate(Vessel = "NEFSC",
#AreaSwept_km2 = 0.0384) %>%
AreaSwept_km2 =1, #Elizabeth's code
declon = -declon) %>%
select(Catch_g = meanbluepywt,
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon)%>%
na.omit() %>%
as.data.frame()
# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302
# use only MAB, GB, GOM, SS EPUs
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE
MABGBGOM <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>%
select(MAB_GB_GOM = stratum_number) %>%
distinct()
MABGBGOMSS <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
"Scotian_Shelf")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
MABGB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
GB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Georges_Bank")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
MAB <- northwest_atlantic_grid %>%
filter(EPU %in% c("Mid_Atlantic_Bight")) %>%
select(MAB_GB_GOM_SS = stratum_number) %>%
distinct()
# configs
FieldConfig <- c(
"Omega1" = 0, # number of spatial variation factors (0, 1, AR1)
"Epsilon1" = 0, # number of spatio-temporal factors
"Omega2" = 0,
"Epsilon2" = 0
)
# Model selection options,
# Season_knots + suffix below
# FieldConfig default (all IID)
# _noaniso FieldConfig default (all IID) and use_anistropy = FALSE
# _noomeps2 FieldConfig 0 for Omega2, Epsilon2
# _noomeps2_noaniso FieldConfig 0 for Omega2, Epsilon2 and use_anistropy = FALSE
# _noomeps2_noeps1 FieldConfig 0 for Omega2, Epsilon2, Omega1
# _noomeps2_noeps1_noaniso FieldConfig 0 for Omega2, Epsilon2, Omega1 and use_anistropy = FALSE
# _noomeps12 FieldConfig all 0
# _noomeps12_noaniso FieldConfig all 0 and use_anistropy = FALSE
RhoConfig <- c(
"Beta1" = 0, # temporal structure on years (intercepts)
"Beta2" = 0,
"Epsilon1" = 0, # temporal structure on spatio-temporal variation
"Epsilon2" = 0
)
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1
strata.limits <- as.list(MABGBGOMSS)
settings = make_settings( n_x = 500,
Region = "northwest_atlantic",
#strata.limits = list('All_areas' = 1:1e5), full area
strata.limits = strata.limits,
purpose = "index2",
bias.correct = FALSE,
use_anisotropy = FALSE,
#fine_scale = FALSE,
FieldConfig = FieldConfig,
#RhoConfig = RhoConfig
)
#Aniso=FALSE, #correct ln_H_input at bound
#FieldConfig["Epsilon1"]=0, #correct L_epsilon1_z approaching 0
#FieldConfig["Epsilon2"]=0 #correct L_epsilon2_z approaching 0
#increase knots to address bounds for logkappa?
# https://github.com/James-Thorson-NOAA/VAST/issues/300
# or try finescale=FALSE
# then Omegas hit bounds, had to turn then off too
# select dataset and set directory for output
season <- c("fall_500_noomeps12_noaniso")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
# Run model fall
# fit = fit_model( settings = settings,
# Lat_i = bluepyagg_stn_fall[,'Lat'],
# Lon_i = bluepyagg_stn_fall[,'Lon'],
# t_i = bluepyagg_stn_fall[,'Year'],
# b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
# a_i = as_units(bluepyagg_stn_fall[,'AreaSwept_km2'], 'km^2'),
# working_dir = paste0(working_dir, "/"))
fit <- fit_model(
settings = settings,
Lat_i = bluepyagg_stn_fall$Lat,
Lon_i = bluepyagg_stn_fall$Lon,
t_i = bluepyagg_stn_fall$Year,
b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),,
a_i = rep(1, nrow(bluepyagg_stn_fall)),
Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
# Run model spring
season <- c("spring_500_noomeps12_noaniso")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
fit = fit_model( settings = settings,
Lat_i = bluepyagg_stn_spring[,'Lat'],
Lon_i = bluepyagg_stn_spring[,'Lon'],
t_i = bluepyagg_stn_spring[,'Year'],
b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'),
a_i = as_units(bluepyagg_stn_spring[,'AreaSwept_km2'], 'km^2'),
Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
We’ll scrape the settings and parameter estimates from each folder to compare AICs:
# from each output folder in pyindex,
outdir <- here("pyindex")
moddirs <- list.dirs(outdir)
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)
# function to apply extracting info
getmodinfo <- function(d.name){
# read settings
modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
modname <- modpath[length(modpath)]
settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
fill = TRUE, header = FALSE)
n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
#FieldConfig
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
}
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
beta1 <- "IID"
beta2 <- "IID"
}
#RhoConfig
rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
# read parameter estimates, object is called parameter_Estimates
load(file.path(d.name, "parameter_estimates.RData"))
AIC <- parameter_estimates$AIC[1]
converged <- parameter_estimates$Convergence_check[1]
fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
# return model atributes as a dataframe
out <- data.frame(modname = modname,
n_x = n_x,
grid_size_km = grid_size_km,
max_cells = max_cells,
use_anisotropy = use_anisotropy,
fine_scale = fine_scale,
bias.correct = bias.correct,
omega1 = omega1,
omega2 = omega2,
epsilon1 = epsilon1,
epsilon2 = epsilon2,
beta1 = beta1,
beta2 = beta2,
rho_epsilon1 = rho_epsilon1,
rho_epsilon2 = rho_epsilon2,
rho_beta1 = rho_beta1,
rho_beta2 = rho_beta2,
AIC = AIC,
converged = converged,
fixedcoeff = fixedcoeff,
randomcoeff = randomcoeff
)
return(out)
}
# combine into one table for comparison
modselect <- purrr::map_dfr(moddirs, getmodinfo)
Now compare the AIC for the 500 knot models to see if including the spatial and spatio-temporal random effects in the first and second linear predictors improved the model fit. Model structures tested include with and without anistropy (2 fixed parameters), and with and without spatial and spatio-temporal random effects in the second linear predictor or both linear predictors. This follows the model selection process outlined in Ng et al. (2021).
This is preliminary as we have not yet included NEAMAP data in the analysis.
# only compare AIC for the 500 knot models
modselect.500 <- modselect %>%
filter(n_x == 500) %>%
mutate(season = case_when(str_detect(modname, "fall") ~ "Fall",
str_detect(modname, "spring") ~ "Spring",
TRUE ~ as.character(NA))) %>%
mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
str_detect(converged, "is likely not") ~ "unlikely",
TRUE ~ as.character(NA))) %>%
group_by(season) %>%
mutate(deltaAIC = AIC-min(AIC)) %>%
select(modname, season, deltaAIC, fixedcoeff,
randomcoeff, use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2, AIC, converged2) %>%
arrange(AIC)
DT::datatable(modselect.500, rownames = FALSE,
options= list(pageLength = 25, scrollX = TRUE))
All of the models converged. For both spring and fall NEFSC survey datasets, models including spatial and spatio-temporal effects were supported.
Fall Index
Spring Index
Fall ln-density
All results in the respective pyindex/allagg_fall_500 and allagg_spring_500 folders.
The full results are on google drive rather than github to save space.