Model comparisons led us to the best model fit using mean predator length, number of predator species, and SST at a survey station as catchability covariates. The model has been partitioned into several definitions of “inshore” and “offshore” for the stock assessment inputs. First we define a partition that is the MAB and GB areas only as the GOM is not relevant to the bluefish assessment (yet). This is called MABGB, while the full model is AllEPU. Within this partition,
Survey strata definitions are built into VAST already. The results presented here has all splits in one with a newly defined strata set. This requires use of the dev version of VAST, because the 3 mile split cuts across survey stratum boundaries.
remotes::install_github("james-thorson/VAST@dev")
# ALSO NEEDED dev version of FishStatsUtils
# re-installed after additions by Jim addressing plotting issues
remotes::install_github("james-thorson/FishStatsUtils@dev")
# I did not update any packages, we'll see how this goes
# These were the choices
# 4: FishStats... (c8bf4df1b... -> 305b3845e...) [GitHub]
# 5: pillar (1.8.0 -> 1.8.1 ) [CRAN]
# 6: evaluate (0.15 -> 0.16 ) [CRAN]
# 7: callr (3.7.1 -> 3.7.2 ) [CRAN]
# 8: xfun (0.31 -> 0.32 ) [CRAN]
# 9: stringr (1.4.0 -> 1.4.1 ) [CRAN]
# 10: knitr (1.39 -> 1.40 ) [CRAN]
# 11: tinytex (0.40 -> 0.41 ) [CRAN]
# 12: rmarkdown (2.14 -> 2.16 ) [CRAN]
# 13: httr (1.4.3 -> 1.4.4 ) [CRAN]
# 14: rstudioapi (0.13 -> 0.14 ) [CRAN]
# 15: gert (1.6.0 -> 1.7.1 ) [CRAN]
# 16: insight (0.18.0 -> 0.18.2 ) [CRAN]
# 17: TMB (1.9.0 -> 1.9.1 ) [CRAN]
#confirmed this version of VAST worked by running the simple example
library(VAST)
# load data set
# see `?load_example` for list of stocks with example data
# that are installed automatically with `FishStatsUtils`.
example = load_example( data_set="EBS_pollock" )
# Make settings (turning off bias.correct to save time for example)
settings = make_settings( n_x = 100,
Region = example$Region,
purpose = "index2",
Version = "VAST_v14_0_1", #needed to prevent error from newer dev version number
bias.correct = FALSE )
# Run model
fit = fit_model( settings = settings,
Lat_i = example$sampling_data[,'Lat'],
Lon_i = example$sampling_data[,'Lon'],
t_i = example$sampling_data[,'Year'],
b_i = example$sampling_data[,'Catch_KG'],
a_i = example$sampling_data[,'AreaSwept_km2'] )
# Plot results
plot( fit )
We have defined splits in the inshore strata that are inside 3 miles
and offshore of 3 miles and renumbered those strata. Then all strata are
called in one model.
Thorson’s recommendations for the dev version:
The dev branch has a feature where you can just supply
extrapolation_list
tofit_model
and it overrides the call tomake_extrapolation_info
. So you could make your own function locally by modifyingPrepare_NWA_Extrapolation_Data_Fn
and pass that through with no other changes needed
I recommend also plotting the predictive CV .. using dev branch this is done by
plot(fit, plot_set=3, plot_value = sd)
, or using earlier versions required a different function forplot_value
to get the CV (I improved the interface sometime recently but don’t remember exactly when)
This documents the new extrapolation list using a locally modified
Prepare_NWA_Extrapolation_Data_Fn
which was used here
in the not-bias corrected trial.
Definitions for strata used in the script
bluefishdiet/VASTunivariate_bfp_allsurvs_lencovSST_inoffsplit.R
:
# use only MAB, GB, GOM, SS EPUs
# could also leave out SS?
# These EPU definitions match what we use in SOE
bfinshore <- c(3020, 3050, 3080, 3110, 3140, 3170, 3200, 3230,
3260, 3290, 3320, 3350, 3380, 3410, 3440, 3450, 3460)
bfoffshore <- c(1010, 1730, 1690, 1650, 1050, 1060, 1090, 1100, 1250, 1200, 1190, 1610)
MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS <- c(1300:1352, 3840:3990)
MABGBinshore <- c(3010:3450, 3460, 3470, 3480, 3490, 3500, 3510, 3520:3550)
MABGBoffshore <- c(1010:1080, 1090, 1100:1120,1130:1210, 1230, 1250, 1600:1750)
# dont need
# MABoffshore <- c(1010:1080, 1100:1120, 1600:1750)
# GBoffshore <- c(1090, 1130:1210, 1230, 1250)
Read in spatial info saved from here
coast3nmbuff <- readRDS(here("spatialdat/neus_coast3nmbuff.rds"))
coast3nmbuffst <- coast3nmbuff %>%
dplyr::mutate(strat2 = 1) %>% #state waters = 1
dplyr::right_join(FishStatsUtils::northwest_atlantic_grid) %>%
dplyr::mutate(strat2 = replace_na(strat2, 2)) %>% #replace NA with 2 for fed waters
dplyr::mutate(strat2 = case_when(!stratum_number %in% c(MAB, GB) ~ 0, #ignore outside MABGB
TRUE ~ strat2)) %>%
dplyr::mutate(stratum_number2 = as.numeric(paste0(stratum_number, strat2))) %>%
dplyr::select(-strat2)
# new lookups
# MAB EPU
MAB2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% MAB) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# MAB state waters
MAB2state <- MAB2 %>%
dplyr::filter(stratum_number2 %% 10 == 1)
# MAB federal waters
MAB2fed <- MAB2 %>%
dplyr::filter(stratum_number2 %% 10 == 2)
# Georges Bank EPU
GB2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% GB) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# GB state waters
GB2state <- GB2 %>%
dplyr::filter(stratum_number2 %% 10 == 1)
#GB federal waters
GB2fed <- GB2 %>%
dplyr::filter(stratum_number2 %% 10 == 2)
# whole bluefish domain MABG
MABGB2 <- dplyr::bind_rows(MAB2, GB2)
# MABGB state waters
MABGBstate <- dplyr::bind_rows(MAB2state, GB2state)
# MABGB federal waters
MABGBfed <- dplyr::bind_rows(MAB2fed, GB2fed)
# gulf of maine EPU (for SOE)
GOM2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% GOM) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# scotian shelf EPU (for SOE)
SS2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% SS) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# previous bluefish strata
bfinshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% bfinshore) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# additional new bluefish strata 2022
bfoffshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% bfoffshore) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# all bluefish strata
bfall2 <- dplyr::bind_rows(bfinshore2, bfoffshore2)
# albatross inshore strata
albinshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% setdiff(MABGBinshore, bfinshore)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# offshore of all bluefish survey strata
MABGBothoffshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% setdiff(MABGBoffshore, bfoffshore)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# needed to cover the whole northwest atlantic grid
allother2 <- coast3nmbuffst %>%
dplyr::filter(!stratum_number %in% c(MAB, GB, GOM, SS)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# all epus
allEPU2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% c(MAB, GB, GOM, SS)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# save the modified grid
saveRDS(coast3nmbuffst, file = here("spatialdat/coast3nmbuffst.rds"))
Modify the function from
FishStatsUtils::Prepare_NWA_Extrapolation_Data_Fn
to make a
new grid with updated strata
Prepare_NWA_Extrapolation_Data_Fn_skg <- function (strata.limits = NULL,
epu_to_use = c("All", "Georges_Bank", "Mid_Atlantic_Bight", "Scotian_Shelf", "Gulf_of_Maine", "Other")[1],
projargs = NA, zone = NA, flip_around_dateline = FALSE, ...)
{
if (is.null(strata.limits)) {
strata.limits = list(All_areas = 1:1e+05)
}
message("Using strata ", strata.limits)
if (any(tolower(epu_to_use) %in% "all")) {
epu_to_use <- c("Georges_Bank", "Mid_Atlantic_Bight",
"Scotian_Shelf", "Gulf_of_Maine", "Other")
}
utils::data(northwest_atlantic_grid, package = "FishStatsUtils")
Data_Extrap <- coast3nmbuffst
Tmp = cbind(BEST_DEPTH_M = 0, BEST_LAT_DD = Data_Extrap[,
"Lat"], BEST_LON_DD = Data_Extrap[, "Lon"])
if (length(strata.limits) == 1 && strata.limits[1] == "EPU") {
Data_Extrap <- Data_Extrap[Data_Extrap$EPU %in% epu_to_use,
]
Data_Extrap$EPU <- droplevels(Data_Extrap$EPU)
a_el = matrix(NA, nrow = nrow(Data_Extrap), ncol = length(epu_to_use),
dimnames = list(NULL, epu_to_use))
Area_km2_x = Data_Extrap[, "Area_in_survey_km2"]
for (l in 1:ncol(a_el)) {
a_el[, l] = ifelse(Data_Extrap[, "EPU"] %in% epu_to_use[[l]],
Area_km2_x, 0)
}
}
else {
a_el = as.data.frame(matrix(NA, nrow = nrow(Data_Extrap),
ncol = length(strata.limits), dimnames = list(NULL,
names(strata.limits))))
Area_km2_x = Data_Extrap[, "Area_in_survey_km2"]
for (l in 1:ncol(a_el)) {
a_el[, l] = ifelse(Data_Extrap[, "stratum_number2"] %in%
strata.limits[[l]], Area_km2_x, 0)
}
}
tmpUTM = project_coordinates(X = Data_Extrap[, "Lon"], Y = Data_Extrap[,
"Lat"], projargs = projargs, zone = zone, flip_around_dateline = flip_around_dateline)
Data_Extrap = cbind(Data_Extrap, Include = 1)
Data_Extrap[, c("E_km", "N_km")] = tmpUTM[, c("X", "Y")]
Return = list(a_el = a_el, Data_Extrap = Data_Extrap, zone = attr(tmpUTM,
"zone"), projargs = attr(tmpUTM, "projargs"), flip_around_dateline = flip_around_dateline,
Area_km2_x = Area_km2_x)
return(Return)
}
Now define new strata.limits
. Ensure that the full
spatial domain includes the full extraoplation grid since it allows us
to cut down to any strata and models all converged as noted here.
strata.limits <- as.list(c("AllEPU" = allEPU2,
"MABGB" = MABGB2,
"MABGBstate" = MABGBstate,
"MABGBfed" = MABGBfed,
"MAB" = MAB2,
"GB" = GB2,
"GOM" = GOM2,
"bfall" = bfall2,
"bfin" = bfinshore2,
"bfoff" = bfoffshore2,
"MABGBalbinshore" = albinshore2,
"MABGBothoffshore" = MABGBothoffshore2,
"allother" = allother2))
Make the new extrapolation list:
Extrapolation_List <- Prepare_NWA_Extrapolation_Data_Fn_skg( strata.limits=strata.limits)
saveRDS(Extrapolation_List, file = here("spatialdat/CustomExtrapolationList.rds"))
This list is passed to fit_model
instead of running the
built in function:
fit <- fit_model(
settings = settings,
extrapolation_list = New_Extrapolation_List,
Lat_i = bluepyagg_stn_fall$Lat,
Lon_i = bluepyagg_stn_fall$Lon,
t_i = bluepyagg_stn_fall$Year,
b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
a_i = rep(1, nrow(bluepyagg_stn_fall)),
v_i = bluepyagg_stn_fall$Vessel,
Q_ik = as.matrix(bluepyagg_stn_fall[,c("npiscsp",
"meanpisclen",
"sstfill"
)]),
#Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
Bias correction is now turned on and using a different algorithm than in the current VAST release. It is much faster. We will compare how it scales results relative to the CRAN version’s bias correction below.
Here is the new VAST script:
# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization
# Load packages
library(here)
library(dplyr)
library(VAST)
#Read in data, separate spring and fall, and rename columns for VAST:
# this dataset created in SSTmethods.Rmd
bluepyagg_stn <- readRDS(here::here("fhdat/bluepyagg_stn_all_OISST.rds"))
# make SST column that uses surftemp unless missing or 0
# there are 3 surftemp 0 values in the dataset, all with oisst > 15
bluepyagg_stn <- bluepyagg_stn %>%
dplyr::mutate(sstfill = ifelse((is.na(surftemp)|surftemp==0), oisst, surftemp))
#bluepyagg_stn <- bluepyagg_pred_stn
# filter to assessment years at Tony's suggestion
# code Vessel as AL=0, HB=1, NEAMAP=2
bluepyagg_stn_fall <- bluepyagg_stn %>%
#ungroup() %>%
filter(season_ng == "FALL",
year > 1984) %>%
mutate(AreaSwept_km2 = 1, #Elizabeth's code
#declon = -declon already done before neamap merge
Vessel = as.numeric(as.factor(vessel))-1
) %>%
dplyr::select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon,
meanpisclen,
npiscsp,
#bottemp, #this leaves out many stations for NEFSC
#surftemp, #this leaves out many stations for NEFSC
oisst,
sstfill
) %>%
na.omit() %>%
as.data.frame()
bluepyagg_stn_spring <- bluepyagg_stn %>%
filter(season_ng == "SPRING",
year > 1984) %>%
mutate(AreaSwept_km2 =1, #Elizabeth's code
#declon = -declon already done before neamap merge
Vessel = as.numeric(as.factor(vessel))-1
) %>%
dplyr::select(Catch_g = meanbluepywt,
Year = year,
Vessel,
AreaSwept_km2,
Lat = declat,
Lon = declon,
meanpisclen,
npiscsp,
#bottemp, #this leaves out many stations for NEFSC
#surftemp, #this leaves out many stations for NEFSC
oisst,
sstfill
) %>%
na.omit() %>%
as.data.frame()
# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302
# use only MAB, GB, GOM, SS EPUs
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE
bfinshore <- c(3020, 3050, 3080, 3110, 3140, 3170, 3200, 3230,
3260, 3290, 3320, 3350, 3380, 3410, 3440, 3450, 3460)
bfoffshore <- c(1010, 1730, 1690, 1650, 1050, 1060, 1090, 1100, 1250, 1200, 1190, 1610)
MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS <- c(1300:1352, 3840:3990)
MABGBinshore <- c(3010:3450, 3460, 3470, 3480, 3490, 3500, 3510, 3520:3550)
MABGBoffshore <- c(1010:1080, 1090, 1100:1120,1130:1210, 1230, 1250, 1600:1750)
coast3nmbuffst <- readRDS(here::here("spatialdat/coast3nmbuffst.rds"))
MAB2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% MAB) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# MAB state waters
MAB2state <- MAB2 %>%
dplyr::filter(stratum_number2 %% 10 == 1)
# MAB federal waters
MAB2fed <- MAB2 %>%
dplyr::filter(stratum_number2 %% 10 == 2)
# Georges Bank EPU
GB2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% GB) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# GB state waters
GB2state <- GB2 %>%
dplyr::filter(stratum_number2 %% 10 == 1)
#GB federal waters
GB2fed <- GB2 %>%
dplyr::filter(stratum_number2 %% 10 == 2)
# whole bluefish domain MABG
MABGB2 <- dplyr::bind_rows(MAB2, GB2)
# MABGB state waters
MABGBstate <- dplyr::bind_rows(MAB2state, GB2state)
# MABGB federal waters
MABGBfed <- dplyr::bind_rows(MAB2fed, GB2fed)
# gulf of maine EPU (for SOE)
GOM2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% GOM) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# scotian shelf EPU (for SOE)
SS2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% SS) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# previous bluefish strata
bfinshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% bfinshore) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# additional new bluefish strata 2022
bfoffshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% bfoffshore) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# all bluefish strata
bfall2 <- dplyr::bind_rows(bfinshore2, bfoffshore2)
# albatross inshore strata
albinshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% setdiff(MABGBinshore, bfinshore)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# offshore of all bluefish survey strata
MABGBothoffshore2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% setdiff(MABGBoffshore, bfoffshore)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# needed to cover the whole northwest atlantic grid
allother2 <- coast3nmbuffst %>%
dplyr::filter(!stratum_number %in% c(MAB, GB, GOM, SS)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# all epus
allEPU2 <- coast3nmbuffst %>%
dplyr::filter(stratum_number %in% c(MAB, GB, GOM, SS)) %>%
dplyr::select(stratum_number2) %>%
dplyr::distinct()
# configs
FieldConfig <- c(
"Omega1" = 0, # number of spatial variation factors (0, 1, AR1)
"Epsilon1" = 0, # number of spatio-temporal factors
"Omega2" = 0,
"Epsilon2" = 0
)
# Model selection options, FieldConfig default (all IID)
# Season_knots + suffix below
# _base No vessel overdispersion or length/number covariates (ensure same dataset)
# _len Predator mean length covariate
# _no Number of predator species covariate
# _lenno Predator mean length and number of predator species covariates
# _eta10 Overdispersion (vessel effect) in first linear predictor (prey encounter)
# _eta11 Overdispersion (vessel effect) in both linear predictors (prey wt)
RhoConfig <- c(
"Beta1" = 0, # temporal structure on years (intercepts)
"Beta2" = 0,
"Epsilon1" = 0, # temporal structure on spatio-temporal variation
"Epsilon2" = 0
)
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1
OverdispersionConfig <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight
strata.limits <- as.list(c("AllEPU" = allEPU2,
"MABGB" = MABGB2,
"MABGBstate" = MABGBstate,
"MABGBfed" = MABGBfed,
"MAB" = MAB2,
"GB" = GB2,
"GOM" = GOM2,
"bfall" = bfall2,
"bfin" = bfinshore2,
"bfoff" = bfoffshore2,
"MABGBalbinshore" = albinshore2,
"MABGBothoffshore" = MABGBothoffshore2,
"allother" = allother2))
settings = make_settings( n_x = 500,
Region = "northwest_atlantic",
Version = "VAST_v14_0_1", #needed to prevent error from newer dev version number
#strata.limits = list('All_areas' = 1:1e5), full area
strata.limits = strata.limits,
purpose = "index2",
bias.correct = TRUE,
#use_anisotropy = FALSE,
#fine_scale = FALSE,
#FieldConfig = FieldConfig,
#RhoConfig = RhoConfig,
OverdispersionConfig = OverdispersionConfig
)
New_Extrapolation_List <- readRDS(here::here("spatialdat/CustomExtrapolationList.rds"))
# select dataset and set directory for output
#########################################################
# Run model fall
season <- c("fall_500_lennosst_ALLsplit_biascorrect")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
# subset for faster testing
#bluepyagg_stn_fall <- bluepyagg_stn_fall %>% filter(Year<1990)
fit <- fit_model(
settings = settings,
extrapolation_list = New_Extrapolation_List,
Lat_i = bluepyagg_stn_fall$Lat,
Lon_i = bluepyagg_stn_fall$Lon,
t_i = bluepyagg_stn_fall$Year,
b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
a_i = rep(1, nrow(bluepyagg_stn_fall)),
v_i = bluepyagg_stn_fall$Vessel,
Q_ik = as.matrix(bluepyagg_stn_fall[,c("npiscsp",
"meanpisclen",
"sstfill"
)]),
#Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
saveRDS(fit, file = paste0(working_dir, "/fit.rds"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
######################################################
# Run model spring
season <- c("spring_500_lennosst_ALLsplit_biascorrect")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
if(!dir.exists(working_dir)) {
dir.create(working_dir)
}
# subset for faster testing
#bluepyagg_stn_spring <- bluepyagg_stn_spring %>% filter(Year<1990)
fit <- fit_model( settings = settings,
extrapolation_list = New_Extrapolation_List,
Lat_i = bluepyagg_stn_spring[,'Lat'],
Lon_i = bluepyagg_stn_spring[,'Lon'],
t_i = bluepyagg_stn_spring[,'Year'],
b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'),
a_i = rep(1, nrow(bluepyagg_stn_spring)),
v_i = bluepyagg_stn_spring$Vessel,
Q_ik = as.matrix(bluepyagg_stn_spring[,c("npiscsp",
"meanpisclen",
"sstfill"
)]),
# Use_REML = TRUE,
working_dir = paste0(working_dir, "/"))
saveRDS(fit, file = paste0(working_dir, "/fit.rds"))
# Plot results
plot( fit,
working_dir = paste0(working_dir, "/"))
Make a lookup table:
# strata.limits <- as.list(c("AllEPU" = allEPU2,
# "MABGB" = MABGB2,
# "MABGBstate" = MABGBstate,
# "MABGBfed" = MABGBfed,
# "MAB" = MAB2,
# "GB" = GB2,
# "GOM" = GOM2,
# "bfall" = bfall2,
# "bfin" = bfinshore2,
# "bfoff" = bfoffshore2,
# "MABGBalbinshore" = albinshore2,
# "MABGBothoffshore" = MABGBothoffshore2,
# "allother" = allother2))
stratlook <- data.frame(Stratum = c("Stratum_1",
"Stratum_2",
"Stratum_3",
"Stratum_4",
"Stratum_5",
"Stratum_6",
"Stratum_7",
"Stratum_8",
"Stratum_9",
"Stratum_10",
"Stratum_11",
"Stratum_12",
"Stratum_13"),
Region = c("AllEPU",
"MABGB",
"MABGBstate",
"MABGBfed",
"MAB",
"GB",
"GOM",
"bfall",
"bfin",
"bfoff",
"MABGBalbinshore",
"MABGBothoffshore",
"allother"))
Plot individual time series with standard errors:
splitoutput <- read.csv("pyindex/allagg_fall_500_lennosst_ALLsplit_biascorrect/Index.csv")
splitoutput <- splitoutput %>%
left_join(stratlook)
ggplot(splitoutput, aes(x=Time, y=Estimate, colour=Region)) +
geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
geom_point()+
geom_line()+
facet_wrap(~Region, scales = "free_y") +
guides(colour = guide_legend(ncol=2)) +
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
theme(legend.position="none")
or just the indices from inshore (alb), inshore bluefish, offshore bluefish, and further out, plus the state and federal waters split.
in2off <- splitoutput %>%
dplyr::select(Time, Region, Estimate, Std..Error.for.Estimate) %>%
tidyr::pivot_longer(c(Estimate, Std..Error.for.Estimate), names_to = "Var") %>%
dplyr::group_by(Var) %>%
tidyr::pivot_wider(names_from = Region, values_from = value) %>%
dplyr::mutate(AlbInshore = MABGBalbinshore,
BlueInshore = bfin,
BlueOffshore = bfoff,
#OthOffshore = MABGB - (bfoff + bfin + MABGBalbinshore),
OthOffshore = MABGBothoffshore,
StateWaters = MABGBstate,
FedWaters = MABGBfed,
SumMABGB = AlbInshore + BlueInshore + BlueOffshore + OthOffshore) %>%
dplyr::select(Time, AlbInshore, BlueInshore, BlueOffshore, OthOffshore, SumMABGB, StateWaters, FedWaters, MABGB) %>%
tidyr::pivot_longer(!c(Time, Var), names_to = "Region", values_to = "value") %>%
tidyr::pivot_wider(names_from = "Var", values_from = "value")
ggplot(in2off, aes(x=Time, y=Estimate, colour = Region)) +
geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
geom_point()+
geom_line()+
#facet_wrap(~Region) + #+ , scales = "free_y"
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
ggtitle("Fall Prey Index, Mid-Atlantic and Georges Bank")
or as proportions (here proportion of MABGB index).
MABGBprop <- in2off %>%
#dplyr::filter(Region != "AllEPU") %>%
dplyr::select(Time, Region, Estimate) %>%
tidyr::pivot_wider(names_from = Region, values_from = Estimate) %>%
dplyr::mutate(AlbInshoreprop = AlbInshore/MABGB,
BlueInshoreprop = BlueInshore/MABGB,
BlueOffshoreprop = BlueOffshore/MABGB,
OthOffshoreprop = OthOffshore/MABGB,
StateWatersprop = StateWaters/MABGB,
FedWatersprop = FedWaters/MABGB) %>%
tidyr::pivot_longer(!Time, names_to = "Region", values_to = "Estimate") %>%
dplyr::filter(Region %in% c("AlbInshoreprop", "BlueInshoreprop", "BlueOffshoreprop",
"OthOffshoreprop", "StateWatersprop", "FedWatersprop"))
ggplot(MABGBprop, aes(x=Time, y=Estimate, colour = Region)) +
geom_point()+
geom_line()+
#facet_wrap(~Region) + #+ , scales = "free_y"
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
ggtitle("Fall Prey Index as proportion of Mid-Atlantic and Georges Bank")
Fall density maps with covariates
diagplots <- c("Data_and_knots",
"Data_by_year",
"quantile_residuals",
"quantile_residuals_on_map",
"Aniso",
"center_of_gravity")
for(p in diagplots){
cat(" \n####", as.character(p)," \n")
cat(paste0(""))
cat(" \n")
}
Plot individual time series
splitoutput <- read.csv("pyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/Index.csv")
splitoutput <- splitoutput %>%
left_join(stratlook)
ggplot(splitoutput, aes(x=Time, y=Estimate, colour=Region)) +
geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
geom_point()+
geom_line()+
facet_wrap(~Region, scales = "free_y") +
guides(colour = guide_legend(ncol=2)) +
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
theme(legend.position="none")
or just the indices from inshore (alb), inshore bluefish, offshore bluefish, and further out, plus the state and federal waters split.
in2off <- splitoutput %>%
dplyr::select(Time, Region, Estimate, Std..Error.for.Estimate) %>%
tidyr::pivot_longer(c(Estimate, Std..Error.for.Estimate), names_to = "Var") %>%
dplyr::group_by(Var) %>%
tidyr::pivot_wider(names_from = Region, values_from = value) %>%
dplyr::mutate(AlbInshore = MABGBalbinshore,
BlueInshore = bfin,
BlueOffshore = bfoff,
#OthOffshore = MABGB - (bfoff + bfin + MABGBalbinshore),
OthOffshore = MABGBothoffshore,
StateWaters = MABGBstate,
FedWaters = MABGBfed,
SumMABGB = AlbInshore + BlueInshore + BlueOffshore + OthOffshore) %>%
dplyr::select(Time, AlbInshore, BlueInshore, BlueOffshore, OthOffshore, SumMABGB, StateWaters, FedWaters, MABGB) %>%
tidyr::pivot_longer(!c(Time, Var), names_to = "Region", values_to = "value") %>%
tidyr::pivot_wider(names_from = "Var", values_from = "value")
ggplot(in2off, aes(x=Time, y=Estimate, colour = Region)) +
geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
geom_point()+
geom_line()+
#facet_wrap(~Region) + #+ , scales = "free_y"
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
ggtitle("Spring Prey Index, Mid-Atlantic and Georges Bank")
or as proportions (here proportion of MABGB index).
MABGBprop <- in2off %>%
#dplyr::filter(Region != "AllEPU") %>%
dplyr::select(Time, Region, Estimate) %>%
tidyr::pivot_wider(names_from = Region, values_from = Estimate) %>%
dplyr::mutate(AlbInshoreprop = AlbInshore/MABGB,
BlueInshoreprop = BlueInshore/MABGB,
BlueOffshoreprop = BlueOffshore/MABGB,
OthOffshoreprop = OthOffshore/MABGB,
StateWatersprop = StateWaters/MABGB,
FedWatersprop = FedWaters/MABGB) %>%
tidyr::pivot_longer(!Time, names_to = "Region", values_to = "Estimate") %>%
dplyr::filter(Region %in% c("AlbInshoreprop", "BlueInshoreprop", "BlueOffshoreprop",
"OthOffshoreprop", "StateWatersprop", "FedWatersprop"))
ggplot(MABGBprop, aes(x=Time, y=Estimate, colour = Region)) +
geom_point()+
geom_line()+
#facet_wrap(~Region) + #+ , scales = "free_y"
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
ggtitle("Spring Prey Index as proportion of Mid-Atlantic and Georges Bank")
Spring density maps with covariates
diagplots <- c("Data_and_knots",
"Data_by_year",
"quantile_residuals",
"quantile_residuals_on_map",
"Aniso",
"center_of_gravity")
for(p in diagplots){
cat(" \n####", as.character(p)," \n")
cat(paste0(""))
cat(" \n")
}
All results in the respective
pyindex/allagg_fall_500_lennosst_ALLsplit_biascorrect
and
allagg_spring_500_lennsst_ALLsplit_biascorrect
folders.
The full results are on google drive rather than github to save space.
moddir2 <- c("pyindex/allagg_fall_500_lennosst_ALLsplit",
"pyindex/allagg_fall_500_lennosst_ALLsplit_biascorrect",
"pyindex/allagg_spring_500_lennosst_ALLsplit",
"pyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect")
# function to apply extracting info
getmodinfo.index <- function(d.name){
# read settings
modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
modname <- modpath[length(modpath)]
settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
fill = TRUE, header = FALSE)
n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
#FieldConfig
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
}
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
beta1 <- "IID"
beta2 <- "IID"
}
#RhoConfig
rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
# read parameter estimates, object is called parameter_Estimates
load(file.path(d.name, "parameter_estimates.RData"))
AIC <- parameter_estimates$AIC[1]
converged <- parameter_estimates$Convergence_check[1]
fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
index <- read.csv(file.path(d.name, "Index.csv"))
# return model attributes as a dataframe
out <- data.frame(modname = modname,
n_x = n_x,
grid_size_km = grid_size_km,
max_cells = max_cells,
use_anisotropy = use_anisotropy,
fine_scale = fine_scale,
bias.correct = bias.correct,
omega1 = omega1,
omega2 = omega2,
epsilon1 = epsilon1,
epsilon2 = epsilon2,
beta1 = beta1,
beta2 = beta2,
rho_epsilon1 = rho_epsilon1,
rho_epsilon2 = rho_epsilon2,
rho_beta1 = rho_beta1,
rho_beta2 = rho_beta2,
AIC = AIC,
converged = converged,
fixedcoeff = fixedcoeff,
randomcoeff = randomcoeff,
index = index
)
return(out)
}
modcompare2 <- purrr::map_dfr(moddir2, getmodinfo.index)
Which models converged?
fallconverged2 <- modcompare2 %>%
dplyr::filter(grepl("fall", modname)) %>%
dplyr::select(modname, converged) %>%
dplyr::distinct()
knitr::kable(fallconverged2)
modname | converged |
---|---|
allagg_fall_500_lennosst_ALLsplit | There is no evidence that the model is not converged |
allagg_fall_500_lennosst_ALLsplit_biascorrect | There is no evidence that the model is not converged |
Plot index time series with standard errors by model:
falloutput2 <- modcompare2 %>%
dplyr::filter(grepl("fall", modname)) %>%
dplyr::left_join(stratlook, by = c("index.Stratum" = "Stratum")) %>%
#dplyr::mutate(Region = case_when(grepl("MABGB", modname) ~ "MABGB",
# TRUE ~ Region)) %>%
dplyr::filter(Region %in% c("AllEPU", "MABGB",
"bfin", "bfoff",
"MABGBalbinshore",
"MABGBstate", "MABGBfed"))
ggplot(falloutput2, aes(x=index.Time, y=index.Estimate, colour=modname)) +
geom_errorbar(aes(ymin=index.Estimate+index.Std..Error.for.Estimate, ymax=index.Estimate-index.Std..Error.for.Estimate))+
geom_point()+
geom_line()+
facet_wrap(~Region, scales = "free_y") +
guides(colour = guide_legend(ncol=2)) +
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
theme(legend.position="bottom")
Which models converged?
springconverged2 <- modcompare2 %>%
dplyr::filter(grepl("spring", modname)) %>%
dplyr::select(modname, converged) %>%
dplyr::distinct()
knitr::kable(springconverged2)
modname | converged |
---|---|
allagg_spring_500_lennosst_ALLsplit | There is no evidence that the model is not converged |
allagg_spring_500_lennosst_ALLsplit_biascorrect | There is no evidence that the model is not converged |
Plot index time series with standard errors by model:
springoutput2 <- modcompare2 %>%
dplyr::filter(grepl("spring", modname)) %>%
dplyr::left_join(stratlook, by = c("index.Stratum" = "Stratum")) %>%
#dplyr::mutate(Region = case_when(grepl("MABGB", modname) ~ "MABGB",
# TRUE ~ Region)) %>%
dplyr::filter(Region %in% c("AllEPU", "MABGB",
"bfin", "bfoff",
"MABGBalbinshore",
"MABGBstate", "MABGBfed"))
ggplot(springoutput2, aes(x=index.Time, y=index.Estimate, colour=modname)) +
geom_errorbar(aes(ymin=index.Estimate+index.Std..Error.for.Estimate, ymax=index.Estimate-index.Std..Error.for.Estimate))+
geom_point()+
geom_line()+
facet_wrap(~Region, scales = "free_y") +
guides(colour = guide_legend(ncol=2)) +
#theme(legend.position = c(1, 0),
# legend.justification = c(1, 0))
theme(legend.position="bottom")
As noted previously with VAST release code and only survey strata in the mix, bias correction tends to inflate the index a bit. If used as a covariate, this is unlikely to matter.
Another nifty dev trick from Jim:
Regarding simulating data, the dev-branch has a new function
reload_model
where you can now save a fit, close terminal, reload in new terminal, and runreload_model
to relink the DLL. This then allowssimulate_data
and other functions requiring a call to the DLL to work in a new session.
# read fall fit back in and save plot object from plot function
season <- c("spring_500_lennosst_ALLsplit_biascorrect")
working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))
fit <- readRDS(paste0(working_dir, "fit.rds"))
plotvarsspring <- plot( fit,
working_dir = paste0(working_dir, "/"))
# generate the residuals from fit object rather than rerunning VAST?
# yes we can, but we have to reload_model like Jim says above
reload_model(fit)
dharmaRes = summary( fit, what="residuals", working_dir=working_dir, type=1)
saveRDS(dharmaRes, file = here::here("pyindex/dharmaResSpringBCdev.rds"))
# this code from https://github.com/James-Thorson-NOAA/FishStatsUtils/blob/main/R/fit_model.R#L508-L612
x <- fit
n_samples <- 250
# Residuals
if( tolower(what) == "residuals" ){
# extract objects
Obj = x$tmb_list$Obj
# Change n_g
# Must change back explicitly because TMB appears to pass env as a pointer, so changes in copy affect original x outside of function!
n_g_orig = Obj$env$data$n_g
revert_settings = function(n_g){Obj$env$data$n_g = n_g}
on.exit( revert_settings(n_g_orig) )
Obj$env$data$n_g = 0
if( type %in% c(1,4) ){
b_iz = matrix(NA, nrow=length(x$data_list$b_i), ncol=n_samples)
message( "Sampling from the distribution of data conditional on estimated fixed and random effects" )
for( zI in 1:n_samples ){
if( zI%%max(1,floor(n_samples/10)) == 0 ){
message( " Finished sample ", zI, " of ",n_samples )
}
b_iz[,zI] = simulate_data( fit=list(tmb_list=list(Obj=Obj)), type=type, random_seed=list(random_seed+zI,NULL)[[1+is.null(random_seed)]] )$b_i
}
#if( any(is.na(x$data_list$b_i)) ){
# stop("dharmaRes not designed to work when any observations have b_i=NA")
#}
# Substitute any observation where b_i = NA with all zeros, which will then have a uniform PIT
which_na = which(is.na(x$data_list$b_i))
if( length(which_na) > 0 ){
x$data_list$b_i[which_na] = 0
b_iz[which_na,] = 0
warning("When calculating DHARMa residuals, replacing instances where b_i=NA with a uniform PIT residual")
}
if( any(is.na(b_iz)) ){
stop("Check simulated residuals for NA values")
}
# Run DHARMa
# Adding jitters because DHARMa version 0.3.2.0 sometimes still throws an error method="traditional" and integer=FALSE without jitters
dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz) + 1e-10*array(rnorm(prod(dim(b_iz))),dim=dim(b_iz)),
observedResponse=strip_units(x$data_list$b_i) + 1e-10*rnorm(length(x$data_list$b_i)),
fittedPredictedResponse=strip_units(x$Report$D_i),
integer=FALSE)
#dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz),
# observedResponse=strip_units(x$data_list$b_i),
# fittedPredictedResponse=strip_units(x$Report$D_i),
# method="PIT")
# Save to report error
if( FALSE ){
all = list( simulatedResponse=strip_units(b_iz), observedResponse=strip_units(x$data_list$b_i), fittedPredictedResponse=strip_units(x$Report$D_i) )
#save(all, file=paste0(root_dir,"all.RData") )
dharmaRes = DHARMa::createDHARMa(simulatedResponse=all$simulatedResponse + rep(1,nrow(all$simulatedResponse))%o%c(0.001*rnorm(1),rep(0,ncol(all$simulatedResponse)-1)),
observedResponse=all$observedResponse,
fittedPredictedResponse=all$fittedPredictedResponse,
method="PIT")
}
# Calculate probability-integral-transform (PIT) residuals
message( "Substituting probability-integral-transform (PIT) residuals for DHARMa-calculated residuals" )
prop_lessthan_i = apply( b_iz<outer(x$data_list$b_i,rep(1,n_samples)),
MARGIN=1,
FUN=mean )
prop_lessthanorequalto_i = apply( b_iz<=outer(x$data_list$b_i,rep(1,n_samples)),
MARGIN=1,
FUN=mean )
PIT_i = runif(min=prop_lessthan_i, max=prop_lessthanorequalto_i, n=length(prop_lessthan_i) )
# cbind( "Difference"=dharmaRes$scaledResiduals - PIT_i, "PIT"=PIT_i, "Original"=dharmaRes$scaledResiduals, "b_i"=x$data_list$b_i )
dharmaRes$scaledResiduals = PIT_i
}else if( type==0 ){
# Check for issues
if( !all(x$data_list$ObsModel_ez[1,] %in% c(1,2)) ){
stop("oneStepAhead residuals only code for gamma and lognormal distributions")
}
# Run OSA
message( "Running oneStepPredict_deltaModel for each observation, to then load them into DHARMa object for plotting" )
osa = TMBhelper::oneStepPredict_deltaModel( obj = x$tmb_list$Obj,
observation.name = "b_i",
method = "cdf",
data.term.indicator = "keep",
deltaSupport = 0,
trace = TRUE )
# Build DHARMa object on fake inputs and load OSA into DHARMa object
dharmaRes = DHARMa::createDHARMa(simulatedResponse=matrix(rnorm(x$data_list$n_i*10,mean=x$data_list$b_i),ncol=10),
observedResponse=x$data_list$b_i,
fittedPredictedResponse=x$Report$D_i,
integer=FALSE)
dharmaRes$scaledResiduals = pnorm(osa$residual)
}else{
stop("`type` only makes sense for 0 (oneStepAhead), 1 (conditional, a.k.a. measurement error) or 4 (unconditional) simulations")
}
# do plot
if( is.null(working_dir) ){
plot_dharma(dharmaRes, ...)
}else if(!is.na(working_dir) ){
png(file=paste0(working_dir,"quantile_residuals.png"), width=8, height=4, res=200, units='in')
plot_dharma(dharmaRes, ...)
dev.off()
}
# Return stuff
ans = dharmaRes
message( "Invisibly returning output from `DHARMa::createDHARMa`, e.g., to apply `plot.DHARMa` to this output")
}
# residual map plot code commented out of dev version
# https://github.com/James-Thorson-NOAA/FishStatsUtils/blob/dev/R/plot_results.R#L264-L286
# Plotting quantile residuals
#message("\n### Making quantile residuals using conditional simulation and package DHARMa")
dharmaRes = summary( fit, what="residuals", working_dir=working_dir, type=1)
dharmaRes <- plotvarsspring$dharmaRes
year_labels <-plotvarsspring$plot_maps_args$year_labels
years_to_plot <- plotvarsspring$plot_maps_args$years_to_plot
n_cells_residuals <- NULL #plotvarsfall$plot_maps_args$n_cells #?
projargs <- plotvarsspring$plot_maps_args$projargs
# Mapping quantile residuals
#message("\n### Plotting quantile residuals ")
dharma_raster = plot_quantile_residuals( dharmaRes = dharmaRes,
fit = fit,
working_dir = working_dir,
year_labels = year_labels,
years_to_plot = years_to_plot,
n_cells_residuals = n_cells_residuals,
projargs = projargs#,
#...
)
# Semivariance for quantile residuals
# Disabled due to problems with raster plots in V >= 4.2.1
if( fit$data_list$n_t > 1 ){
3message("\n### Plotting semivariance for normal-transformed quantile residuals ")
residual_semivariance = plot_residual_semivariance( fit = fit,
dharma_raster = dharma_raster,
dharmaRes = dharmaRes,
working_dir = working_dir )
}else{
message("\n### Skipping plot of semivariance for normal-transformed quantile residuals")
residual_semivariance = NULL
}