Add NEAMAP to aggregate prey index VAST model

Thoughts from the WG in January

Not just add NEAMAP but also let model estimate any q adjustments for Albatross/Bigelow years

While we are there consider mean (and variance?) of predator length as a catchabiliy covariate

Is there output that can translate into inshore/offshore availability for the aggregate univariate index?

Center of gravity plot, partition index into quadrants? Need to realign NE SE instead of N and E and track the SE axis (lower inshore, higher offshore) see https://github.com/James-Thorson-NOAA/VAST/wiki/Define-new-axes-for-range-edge-and-centroid

And a coastline for NEUS already developed here https://github.com/afredston/range-edge-niches/blob/master/scripts/get_axes_of_measurement.R#L33

Use fit$ model output components for density and define portions of extrapolation grid as inshore and offshore, find density in each over time

Options for defining inshore vs offshore (discussion with Mike, Katie, and Tony as summarized by Tony): 1. An inshore band representing old Albatross strata (current NEAMAP) strata,
2. An inshore band representing the inshore strata sampled by the Bigelow, 3. a 3 mile boundary (representng availability to recreational fishery-MRIP).

Timeline:

Finalize index 30 April
Working paper finalized 1 October
Review 14-18 November 2022

Add NEAMAP

Ensure NEAMAP data colums align with current dataset. Also, add vessel column and fix declon to be negative in nefsc.

# current dataset, fix declon, add vessel
nefsc_bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds")) %>%
  mutate(declon = -declon,
         vessel = case_when(year<2009 ~ "AL",
                            year>=2009 ~ "HB", 
                            TRUE ~ as.character(NA)))


# NEAMAP add vessel and rename
neamap_bluepreyagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish Prey.csv")) %>%
  mutate(vessel = "NEAMAP") %>%
  rename(id = station,
         sumbluepywt = sumbluepreywt,
         nbluepysp = nbfpreyspp,
         #npreysp = ,
         npiscsp = npiscspp,
         nstomtot = nstomtot, 
         meanbluepywt = meanbluepreywt,
         meanpisclen = meanpisclen.simple, 
         #varpisclen = ,
         season_ng = season,
         declat  = lat,
         declon = lon,
         bottemp = bWT,
         #surftemp = , 
         setdepth = depthm) 
  
  
# combine  
bluepyagg_stn <-  nefsc_bluepyagg_stn %>%
  bind_rows(neamap_bluepreyagg_stn) 

saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn_all.rds"))

Get 28,596 observations but one nefsc has a NA year so no vessel assigned–check this. (Two stations from cruise 201202, missing lat and long so drop.)

This script uses the full dataset and compares different vessel effect parameterizations. This treats vessel differences as overdispersion in the first and or second linear predictor.

# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

#bluepyagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

bluepyagg_stn_fall <- bluepyagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL",
         year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon) %>%
  na.omit() %>%
  as.data.frame()

bluepyagg_stn_spring <- bluepyagg_stn %>%
  filter(season_ng == "SPRING",
         year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  select(Catch_g = meanbluepywt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon)%>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MABGBGOM <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>% 
  select(MAB_GB_GOM = stratum_number) %>% 
  distinct()

MABGBGOMSS <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
                    "Scotian_Shelf")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

MABGB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

GB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()


MAB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

# configs
FieldConfig <- c(
  "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
  "Epsilon1" = 0,   # number of spatio-temporal factors
  "Omega2"   = 0, 
  "Epsilon2" = 0
) 

# Model selection options, 
# Season_knots + suffix below
#                           FieldConfig default (all IID)
# _noaniso                  FieldConfig default (all IID) and use_anistropy = FALSE
# _noomeps2                 FieldConfig 0 for Omega2, Epsilon2
# _noomeps2_noaniso         FieldConfig 0 for Omega2, Epsilon2 and use_anistropy = FALSE
# _noomeps2_noeps1          FieldConfig 0 for Omega2, Epsilon2, Omega1
# _noomeps2_noeps1_noaniso  FieldConfig 0 for Omega2, Epsilon2, Omega1 and use_anistropy = FALSE
# _noomeps12                FieldConfig all 0
# _noomeps12_noaniso        FieldConfig all 0 and use_anistropy = FALSE

RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(MABGBGOMSS)

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = FALSE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )

 #Aniso=FALSE, #correct ln_H_input at bound
 #FieldConfig["Epsilon1"]=0, #correct L_epsilon1_z approaching 0  
 #FieldConfig["Epsilon2"]=0 #correct L_epsilon2_z approaching 0 
 #increase knots to address bounds for logkappa?
 # https://github.com/James-Thorson-NOAA/VAST/issues/300
 # or try finescale=FALSE
 # then Omegas hit bounds, had to turn then off too

# select dataset and set directory for output

season <- c("fall_500_wneamap")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# Run model fall
# fit = fit_model( settings = settings, 
#                  Lat_i = bluepyagg_stn_fall[,'Lat'], 
#                  Lon_i = bluepyagg_stn_fall[,'Lon'], 
#                  t_i = bluepyagg_stn_fall[,'Year'], 
#                  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'), 
#                  a_i = as_units(bluepyagg_stn_fall[,'AreaSwept_km2'], 'km^2'),
#                  working_dir = paste0(working_dir, "/"))

fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)),
  v_i = bluepyagg_stn_fall$Vessel,
  #Use_REML = TRUE,
  working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

# Run model spring

season <- c("spring_500_wneamap")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
                          

fit = fit_model( settings = settings, 
                 Lat_i = bluepyagg_stn_spring[,'Lat'], 
                 Lon_i = bluepyagg_stn_spring[,'Lon'], 
                 t_i = bluepyagg_stn_spring[,'Year'], 
                 b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = as_units(bluepyagg_stn_spring[,'AreaSwept_km2'], 'km^2'),
                 v_i = bluepyagg_stn_spring$Vessel,
                # Use_REML = TRUE,
                 working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

Outputs are in this google folder.

Next option is to use vessel as a catchabilty covariate instead.

Altenatively, adding the predator length covariate may more directly model vessel differences in predator catch that affect stomach contents than modeling a vessel catchability covariate direclty. This was the appropach taken by Ng et al. (2021). They found that predator length covariates were strongly supported as catchability covariates (larger predators being more likely to have more prey in stomachs).

This script uses predator mean length as a catchability covariate, with the option to add number of distinct predator types, and temperature as well.

The rationale for including number of predator species is that more species “sampling” the prey field may result in more prey in stomachs.

It turns out including bottom temperature leaves out a lot of data, so for now I will not use this. If the WG wants to explore it I’ll need to do runs with only the subset of data with temperature to compare with vessel effects, or use a different bottom temperature dataset like Charles Perretti did for summer flounder. If we use it, the rationale for including bottom temperature is that predator metabolism and consumption changes with temperature, although this may not be linear depending on the temperature range.

# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

#bluepyagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

bluepyagg_stn_fall <- bluepyagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL",
         year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp #this leaves out many stations for NEFSC
         ) %>%
  na.omit() %>%
  as.data.frame()

bluepyagg_stn_spring <- bluepyagg_stn %>%
  filter(season_ng == "SPRING",
         year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp #this leaves out many stations for NEFSC
         ) %>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MABGBGOM <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>% 
  select(MAB_GB_GOM = stratum_number) %>% 
  distinct()

MABGBGOMSS <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
                    "Scotian_Shelf")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

MABGB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

GB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()


MAB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

# configs
FieldConfig <- c(
  "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
  "Epsilon1" = 0,   # number of spatio-temporal factors
  "Omega2"   = 0, 
  "Epsilon2" = 0
) 

# Model selection options, 
# Season_knots + suffix below
#                           FieldConfig default (all IID)
# _noaniso                  FieldConfig default (all IID) and use_anistropy = FALSE
# _noomeps2                 FieldConfig 0 for Omega2, Epsilon2
# _noomeps2_noaniso         FieldConfig 0 for Omega2, Epsilon2 and use_anistropy = FALSE
# _noomeps2_noeps1          FieldConfig 0 for Omega2, Epsilon2, Omega1
# _noomeps2_noeps1_noaniso  FieldConfig 0 for Omega2, Epsilon2, Omega1 and use_anistropy = FALSE
# _noomeps12                FieldConfig all 0
# _noomeps12_noaniso        FieldConfig all 0 and use_anistropy = FALSE

RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(MABGBGOMSS)

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = FALSE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )

 #Aniso=FALSE, #correct ln_H_input at bound
 #FieldConfig["Epsilon1"]=0, #correct L_epsilon1_z approaching 0  
 #FieldConfig["Epsilon2"]=0 #correct L_epsilon2_z approaching 0 
 #increase knots to address bounds for logkappa?
 # https://github.com/James-Thorson-NOAA/VAST/issues/300
 # or try finescale=FALSE
 # then Omegas hit bounds, had to turn then off too

# select dataset and set directory for output

season <- c("fall_500_allsurvs_no")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# Run model fall
# fit = fit_model( settings = settings, 
#                  Lat_i = bluepyagg_stn_fall[,'Lat'], 
#                  Lon_i = bluepyagg_stn_fall[,'Lon'], 
#                  t_i = bluepyagg_stn_fall[,'Year'], 
#                  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'), 
#                  a_i = as_units(bluepyagg_stn_fall[,'AreaSwept_km2'], 'km^2'),
#                  working_dir = paste0(working_dir, "/"))

fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)),
  v_i = bluepyagg_stn_fall$Vessel,
  Q_ik = as.matrix(bluepyagg_stn_fall[,c("npiscsp")]),
  #Use_REML = TRUE,
  working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

# Run model spring

season <- c("spring_500_allsurvs_no")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
                          

fit = fit_model( settings = settings, 
                 Lat_i = bluepyagg_stn_spring[,'Lat'], 
                 Lon_i = bluepyagg_stn_spring[,'Lon'], 
                 t_i = bluepyagg_stn_spring[,'Year'], 
                 b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(bluepyagg_stn_spring)),
                 v_i = bluepyagg_stn_spring$Vessel,
                 Q_ik = as.matrix(bluepyagg_stn_spring[,c("npiscsp")]),
                # Use_REML = TRUE,
                 working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

Initial run with length and number of predators has a lower AIC than runs with overdispersion for vessel effects.

We’ll compare all runs with NEAMAP added and overdispersion vs catchability covariates here (using maximum likelihood to calculate AIC instead of REML because we are not changing the random effects):

# from each output folder in pyindex, 
outdir <- here("pyindex")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)

# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  load(file.path(d.name, "parameter_estimates.RData"))
  
  AIC <- parameter_estimates$AIC[1]  
  converged <- parameter_estimates$Convergence_check[1]
  fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
  randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
  
  
  # return model atributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff
  )
    
    return(out)

}

# combine into one table for comparison

modselect <- purrr::map_dfr(moddirs, getmodinfo)

Only compare the models which include NEAMAP data:

# only compare AIC for the 500 knot models with neamap included
modselect.500.allsurvs <- modselect %>%
  filter(n_x == 500) %>%
  filter(str_detect(modname, c("wneamap|allsurvs"))) %>%
  mutate(season = case_when(str_detect(modname, "fall") ~ "Fall",
                            str_detect(modname, "spring") ~ "Spring",
                            TRUE ~ as.character(NA))) %>%
  mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
                                str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) %>%
  group_by(season) %>%
  mutate(deltaAIC = AIC-min(AIC)) %>%
  arrange(AIC) %>%
  dplyr::select(modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy,
         omega1, omega2, epsilon1, epsilon2,
         beta1, beta2, AIC, converged2)

DT::datatable(modselect.500.allsurvs, rownames = FALSE, 
              options= list(pageLength = 25, scrollX = TRUE))

Best fit model(s)? allsurvs is a check that should be identical to wneamap, and it is.

Including predator mean length and number of predator species seems to fit better than including either alone, vessel overdispersion parameters, or no covariates.

Realign center of gravity along NEUS coastline

Following the excellent instructions of Alexa Fredston at Rutgers, we can use the NE coast as an axis for calculating center of gravity to realign the calculations as alongshore and inshore/offshore.

From https://github.com/James-Thorson-NOAA/VAST/wiki/Define-new-axes-for-range-edge-and-centroid and using the coastline from https://github.com/afredston/range-edge-niches/blob/master/scripts/get_axes_of_measurement.R

Get coastline, make supplemental figure from Fredson et al. 2021:

# First get the coastline, verbatim lines 15-43 here
# https://github.com/afredston/range-edge-niches/blob/master/scripts/get_axes_of_measurement.R 

usamap <- rnaturalearth::ne_countries(scale = "small", country = "united states of america", returnclass = "sf")[1] %>% 
  st_cast("MULTILINESTRING") # get basic map of the country 

## Northeast:

# set bounding boxes
neus.xmin=-78
neus.xmax=-66
neus.ymin=35
neus.ymax=45

neus.bbox1 <- st_set_crs(st_as_sf(as(raster::extent(neus.xmin, neus.xmax, neus.ymin, neus.ymax), "SpatialPolygons")), st_crs(usamap))
neus.bbox2 <- st_set_crs(st_as_sf(as(raster::extent(-78, -74, 42, 45), "SpatialPolygons")), st_crs(usamap)) # smaller bounding box to get rid of extra lines on the map 

neusmap <- usamap %>% 
  st_intersection(neus.bbox1) %>%  
  st_difference(neus.bbox2) # gets rid of extra non coastal line 

# if WG wants a different smooth, change below based on fig
neus.smoothgeom <- neusmap %>% 
  smoothr::smooth(method="ksmooth", smoothness=8) %>% # smoother was applied incrementally more until the Chesapeake went away 
  as("Spatial") %>% 
  geom()

neus.geomdists <- pointDistance(neus.smoothgeom[-nrow(neus.smoothgeom), c("x", "y")], neus.smoothgeom[-1, c("x", "y")], lonlat=TRUE)
neus.coastdistdat <- data.frame(neus.smoothgeom[, c('x','y')], seglength=c(0, neus.geomdists))
neus.coastdistdat$lengthfromhere <- rev(cumsum(rev(neus.coastdistdat[,"seglength"])))
# first row should match st_length(smoothmap)

write_rds(neus.coastdistdat, here("spatialdat","neus_coastdistdat.rds"))

# plot it, lines 114-129 https://github.com/afredston/range-edge-niches/blob/master/scripts/get_axes_of_measurement.R

usoutline <- rnaturalearth::ne_states("united states of america", returnclass = "sf") %>% 
  st_sf()

# iterate over NEUS smoothness values that we used 
neus_smoothplot <- function(smoothval){
  out <- ggplot() +
    geom_sf(data=usoutline, color="#999999") +
    geom_sf(data=neusmap %>% 
              smoothr::smooth(method="ksmooth", smoothness=smoothval), color="darkblue", lwd=1.5) + 
    scale_x_continuous(limits=c(neus.xmin, neus.xmax)) +
    scale_y_continuous(limits=c(neus.ymin, neus.ymax)) +
    theme_bw() +
    labs(title=paste0("Smoothness=", smoothval)) +
    theme(axis.text.x=element_text(angle=45)) 
}

neus.smoothmap.list <- lapply(seq(1, 8, 1), neus_smoothplot)

neus.smoothmap <- do.call("grid.arrange", c(neus.smoothmap.list, ncol=3))

Apply to model. Now following steps in https://github.com/James-Thorson-NOAA/VAST/wiki/Define-new-axes-for-range-edge-and-centroid we first set up the model to get default coordinates but don’t run it, create the custom axis using the coastline object above, then pass the custom axis to the model and run it.

We’ll first test with the previously running model without the additional complication of NEAMAP.

# VAST attempt 1 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization
# and https://github.com/NOAA-EDAB/neusvast/blob/master/analysis/models/pisc_pesc.R

# may need to downgrade TMB to VAST Matrix package version?
# see https://github.com/James-Thorson-NOAA/VAST/issues/289

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds"))

#bluepyagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion
# try datasets with subset of predators (start with 1)

bluepyagg_stn_fall <- bluepyagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL",
         year > 1984) %>%
  mutate(Vessel = "NEFSC",
         #AreaSwept_km2 = 0.0384) %>%
         AreaSwept_km2 = 1, #Elizabeth's code
         declon = -declon) %>% 
  select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon) %>%
  na.omit() %>%
  as.data.frame()

bluepyagg_stn_spring <- bluepyagg_stn %>%
  filter(season_ng == "SPRING",
         year > 1984) %>%
  mutate(Vessel = "NEFSC",
         #AreaSwept_km2 = 0.0384) %>%
         AreaSwept_km2 =1, #Elizabeth's code
         declon = -declon) %>% 
  select(Catch_g = meanbluepywt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon)%>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MABGBGOM <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>% 
  select(MAB_GB_GOM = stratum_number) %>% 
  distinct()

MABGBGOMSS <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
                    "Scotian_Shelf")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

MABGB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>% 
  select(MAB_GB = stratum_number) %>% 
  distinct()

GB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Georges_Bank")) %>% 
  select(GB = stratum_number) %>% 
  distinct()


MAB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight")) %>% 
  select(MAB = stratum_number) %>% 
  distinct()

# configs--use full model for this
# FieldConfig <- c(
#   "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
#   "Epsilon1" = 0,   # number of spatio-temporal factors
#   "Omega2"   = 0, 
#   "Epsilon2" = 0
# ) 
# 
# RhoConfig <- c(
#   "Beta1" = 0,      # temporal structure on years (intercepts) 
#   "Beta2" = 0, 
#   "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
#   "Epsilon2" = 0
# ) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1


strata.limits <- as.list(MABGBGOMSS)

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = FALSE,
                          use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig
                          )


# select dataset and set directory for output

season <- c("fall_500_tiltaxis")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# Run model fall
# fit = fit_model( settings = settings, 
#                  Lat_i = bluepyagg_stn_fall[,'Lat'], 
#                  Lon_i = bluepyagg_stn_fall[,'Lon'], 
#                  t_i = bluepyagg_stn_fall[,'Year'], 
#                  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'), 
#                  a_i = as_units(bluepyagg_stn_fall[,'AreaSwept_km2'], 'km^2'),
#                  working_dir = paste0(working_dir, "/"))

fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)), 
  #Use_REML = TRUE,
  run_model = FALSE,
  working_dir = paste0(working_dir, "/"))

coastdistdat <-  readRDS(here("spatialdat/neus_coastdistdat.rds"))

# extract northings and eastings 
Z_gm = fit$spatial_list$loc_g 

# convert northings/eastings to lat/lon
tmpUTM = cbind('PID'=1,'POS'=1:nrow(Z_gm),'X'=Z_gm[,'E_km'],'Y'=Z_gm[,'N_km'])

# create UTM object with correct attributes 
attr(tmpUTM,"projection") = "UTM"
attr(tmpUTM,"zone") = fit$extrapolation_list$zone - ifelse( fit$extrapolation_list$flip_around_dateline==TRUE, 30, 0 )

# convert to lat/lon for merging with custom axis
latlon_g = PBSmapping::convUL(tmpUTM)                                                         #$
latlon_g = cbind( 'Lat'=latlon_g[,"Y"], 'Lon'=latlon_g[,"X"])
latlon_g <- as.data.frame(latlon_g)

# function to save the custom axis position that minimize Euclidean distance from each set of VAST coordinates
get_length <- function(lon, lat, distdf) {
  tmp <- distdf 
  tmp$abs.diff.x2 = abs(distdf$x-lon)^2
  tmp$abs.diff.y2 = abs(distdf$y-lat)^2
  # get Euclidean dist from VAST points to all points along the coastal axis  
  tmp$abs.diff.xy = sqrt(tmp$abs.diff.x2 + tmp$abs.diff.y2) 
  tmp <- tmp[tmp$abs.diff.xy==min(tmp$abs.diff.xy),] # keep only the row with the minimum distance
  return(tmp$lengthfromhere) # save the position of that row
}

# apply this to match each VAST knot with a position along the custom axis
# use lat/lon to get coastal distance
coast_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = coastdistdat)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  coast_km <- c(coast_km, out)
}

# bind the axis positions that match each knot back to Z_gm
Z_gm = cbind( Z_gm, "coast_km"=coast_km )
Z_gm_axes = colnames(Z_gm)

# save for later
#saveRDS(Z_gm, Z_gmFile)

# Plot to confirm
plot( x=Z_gm[,1], y=Z_gm[,2], col=viridisLite::viridis(25)[cut(Z_gm[,3],25)] )

# Fit model with new coordinates
fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)), 
  #Use_REML = TRUE,
  Z_gm = Z_gm,
  working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

# Run model spring

season <- c("spring_500_tiltaxis")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
                          

fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_spring$Lat, 
  Lon_i = bluepyagg_stn_spring$Lon, 
  t_i = bluepyagg_stn_spring$Year, 
  b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_spring)), 
  #Use_REML = TRUE,
  run_model = FALSE,
  working_dir = paste0(working_dir, "/"))

coastdistdat <-  readRDS(here("spatialdat/neus_coastdistdat.rds"))

# extract northings and eastings 
Z_gm = fit$spatial_list$loc_g 

# convert northings/eastings to lat/lon
tmpUTM = cbind('PID'=1,'POS'=1:nrow(Z_gm),'X'=Z_gm[,'E_km'],'Y'=Z_gm[,'N_km'])

# create UTM object with correct attributes 
attr(tmpUTM,"projection") = "UTM"
attr(tmpUTM,"zone") = fit$extrapolation_list$zone - ifelse( fit$extrapolation_list$flip_around_dateline==TRUE, 30, 0 )

# convert to lat/lon for merging with custom axis
latlon_g = PBSmapping::convUL(tmpUTM)                                                         #$
latlon_g = cbind( 'Lat'=latlon_g[,"Y"], 'Lon'=latlon_g[,"X"])
latlon_g <- as.data.frame(latlon_g)

# function to save the custom axis position that minimize Euclidean distance from each set of VAST coordinates
get_length <- function(lon, lat, distdf) {
  tmp <- distdf 
  tmp$abs.diff.x2 = abs(distdf$x-lon)^2
  tmp$abs.diff.y2 = abs(distdf$y-lat)^2
  # get Euclidean dist from VAST points to all points along the coastal axis  
  tmp$abs.diff.xy = sqrt(tmp$abs.diff.x2 + tmp$abs.diff.y2) 
  tmp <- tmp[tmp$abs.diff.xy==min(tmp$abs.diff.xy),] # keep only the row with the minimum distance
  return(tmp$lengthfromhere) # save the position of that row
}

# apply this to match each VAST knot with a position along the custom axis
# use lat/lon to get coastal distance
coast_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = coastdistdat)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  coast_km <- c(coast_km, out)
}

# bind the axis positions that match each knot back to Z_gm
Z_gm = cbind( Z_gm, "coast_km"=coast_km )
Z_gm_axes = colnames(Z_gm)

# save for later
#saveRDS(Z_gm, Z_gmFile)


# Plot to confirm
plot( x=Z_gm[,1], y=Z_gm[,2], col=viridisLite::viridis(25)[cut(Z_gm[,3],25)] )

# Fit model with new coordinates
fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_spring$Lat, 
  Lon_i = bluepyagg_stn_spring$Lon, 
  t_i = bluepyagg_stn_spring$Year, 
  b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_spring)), 
  #Use_REML = TRUE,
  Z_gm = Z_gm,
  working_dir = paste0(working_dir, "/"))


# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

This is giving us alongshore center of gravity. We also want the rotated inshore-offshore center of gravity. For that I need to add another coordinate?

Alternatively, rotate the axes to be offset 45 degrees from Northings and Eastings?

Following the example for the Bering Sea linear axis, we add two lines. The along shelf axis directly follows Kevin Friedland’s axis for measuring species distribution change:

The along-shelf axis begins at 76.53°W 34.60°N and terminates at 65.71°W 43.49°N.

And the offshore axis is perpendicular to the alongshore axis, originating from its center. Code to rotate a geographic feature from xhttps://geocompr.robinlovelace.net/geometric-operations.html#affine-transformations, a portion of a great book featuring sf functions and usage: Geocomputation with R

# set line endpoints 
neus.alongshore.lon <- c(-76.53,-65.71)
neus.alongshore.lat <- c(34.60,43.49)

# draw line
neus.alongshore.line <- data.frame(neus.alongshore.lon, neus.alongshore.lat) %>%
  rename('x'=neus.alongshore.lon,'y'=neus.alongshore.lat) %>%
  st_as_sf(coords=c('x','y')) %>%
  summarise() %>%
  st_cast("LINESTRING") %>% 
  smoothr::densify(n=99) %>% # choose number of line segments: here it's 99 for 100 points
  st_cast("MULTIPOINT") 

neus.alongshore.points <- data.frame(st_coordinates(neus.alongshore.line)) %>%
  rename("x"=X, "y"=Y)

neus.alongshore.dists <- pointDistance(neus.alongshore.points[-nrow(neus.alongshore.points), c("x", "y")], neus.alongshore.points[-1, c("x", "y")], lonlat=TRUE)

neus.alongshore.axisdistdat <- data.frame(neus.alongshore.points[, c('x','y')], seglength=c(0, neus.alongshore.dists))
neus.alongshore.axisdistdat$lengthfromhere <- rev(cumsum(rev(neus.alongshore.axisdistdat[,"seglength"])))

#plot to ensure it's right!
# ggplot() +
#     geom_sf(data=neusmap, color="#999999") +
#     geom_sf(data=neus.alongshore.line %>% st_set_crs(st_crs(neusmap)), color="darkblue", lwd=1.5) + 
#     #scale_x_continuous(limits=c(neus.xmin, neus.xmax)) +
#     #scale_y_continuous(limits=c(neus.ymin, neus.ymax)) +
#     theme_bw() +
#     #labs(title=paste0("Smoothness=", smoothval)) +
#     theme(axis.text.x=element_text(angle=45))

#find perpendicular axis at midpoint of alongshore line
# since we made this line 100 units, center is at ~50
halfway <- neus.alongshore.axisdistdat[50,]

# or we could calculate it properly
mid.lat <- sum(neus.alongshore.lat)/2
mid.lon <- sum(neus.alongshore.lon)/2

# or use a built in function
midpoint.alongshore <- st_centroid(neus.alongshore.line)

# rotate alongshore line 90 degress at midpoint
# functions see https://geocompr.robinlovelace.net/geometric-operations.html
rotation = function(a){
  r = a * pi / 180 #degrees to radians
  matrix(c(cos(r), sin(r), -sin(r), cos(r)), nrow = 2, ncol = 2)
} 

#nz_rotate = (nz_sfc - nz_centroid_sfc) * rotation(30) + nz_centroid_sfc

neus.alongshore.line_sfc <- st_geometry(neus.alongshore.line)
midpoint.alongshore_sfc <- st_centroid(neus.alongshore.line_sfc)

neus.offshore.line <- (neus.alongshore.line_sfc - midpoint.alongshore_sfc) * rotation(90) + midpoint.alongshore_sfc

ggplot() +
    geom_sf(data=neusmap, color="#999999") +
    geom_sf(data=neus.alongshore.line_sfc %>% st_set_crs(st_crs(neusmap)), color="darkblue", lwd=1.5) +
    geom_sf(data=neus.offshore.line %>% st_set_crs(st_crs(neusmap)), color="darkblue", lwd=1.5) +
    #scale_x_continuous(limits=c(neus.xmin, neus.xmax)) +
    #scale_y_continuous(limits=c(neus.ymin, neus.ymax)) +
    theme_bw() +
    #labs(title=paste0("Smoothness=", smoothval)) +
    theme(axis.text.x=element_text(angle=45))

neus.offshore.points <- data.frame(st_coordinates(neus.offshore.line)) %>%
  rename("x"=X, "y"=Y)

neus.offshore.dists <- pointDistance(neus.offshore.points[-nrow(neus.offshore.points), c("x", "y")], neus.offshore.points[-1, c("x", "y")], lonlat=TRUE)

neus.offshore.axisdistdat <- data.frame(neus.offshore.points[, c('x','y')], seglength=c(0, neus.offshore.dists))
neus.offshore.axisdistdat$lengthfromhere <- rev(cumsum(rev(neus.offshore.axisdistdat[,"seglength"])))

write_rds(neus.alongshore.axisdistdat, here("spatialdat","neus.alongshore.axisdistdat.rds"))
write_rds(neus.offshore.axisdistdat, here("spatialdat","neus.offshore.axisdistdat.rds"))

Affline transformations may not preserve angles so this rotation looks a little off when 90 is specified.

We could also project a linear axis offshore from the non-linear coastline axis?

This should project coast, alongshore, and offshore axes:

# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

#bluepyagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

bluepyagg_stn_fall <- bluepyagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL",
         year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp #this leaves out many stations for NEFSC
         ) %>%
  na.omit() %>%
  as.data.frame()

bluepyagg_stn_spring <- bluepyagg_stn %>%
  filter(season_ng == "SPRING",
         year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp #this leaves out many stations for NEFSC
         ) %>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MABGBGOM <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine")) %>% 
  select(MAB_GB_GOM = stratum_number) %>% 
  distinct()

MABGBGOMSS <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank", "Gulf_of_Maine",
                    "Scotian_Shelf")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

MABGB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight", "Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

GB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Georges_Bank")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()


MAB <- northwest_atlantic_grid %>% 
  filter(EPU %in% c("Mid_Atlantic_Bight")) %>% 
  select(MAB_GB_GOM_SS = stratum_number) %>% 
  distinct()

# configs
FieldConfig <- c(
  "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
  "Epsilon1" = 0,   # number of spatio-temporal factors
  "Omega2"   = 0, 
  "Epsilon2" = 0
) 

# Model selection options, 
# Season_knots + suffix below
#                           FieldConfig default (all IID)
# _noaniso                  FieldConfig default (all IID) and use_anistropy = FALSE
# _noomeps2                 FieldConfig 0 for Omega2, Epsilon2
# _noomeps2_noaniso         FieldConfig 0 for Omega2, Epsilon2 and use_anistropy = FALSE
# _noomeps2_noeps1          FieldConfig 0 for Omega2, Epsilon2, Omega1
# _noomeps2_noeps1_noaniso  FieldConfig 0 for Omega2, Epsilon2, Omega1 and use_anistropy = FALSE
# _noomeps12                FieldConfig all 0
# _noomeps12_noaniso        FieldConfig all 0 and use_anistropy = FALSE

RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(MABGBGOMSS)

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = FALSE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )

 #Aniso=FALSE, #correct ln_H_input at bound
 #FieldConfig["Epsilon1"]=0, #correct L_epsilon1_z approaching 0  
 #FieldConfig["Epsilon2"]=0 #correct L_epsilon2_z approaching 0 
 #increase knots to address bounds for logkappa?
 # https://github.com/James-Thorson-NOAA/VAST/issues/300
 # or try finescale=FALSE
 # then Omegas hit bounds, had to turn then off too

# select dataset and set directory for output

season <- c("fall_500_allsurvs_lenno_tilt")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# Run model fall
# fit = fit_model( settings = settings, 
#                  Lat_i = bluepyagg_stn_fall[,'Lat'], 
#                  Lon_i = bluepyagg_stn_fall[,'Lon'], 
#                  t_i = bluepyagg_stn_fall[,'Year'], 
#                  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'), 
#                  a_i = as_units(bluepyagg_stn_fall[,'AreaSwept_km2'], 'km^2'),
#                  working_dir = paste0(working_dir, "/"))

fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)),
  v_i = bluepyagg_stn_fall$Vessel,
  Q_ik = as.matrix(bluepyagg_stn_fall[,c("meanpisclen", "npiscsp")]),
  #Use_REML = TRUE,
  run_model = FALSE,
  working_dir = paste0(working_dir, "/"))

coastdistdat <-  readRDS(here("spatialdat/neus_coastdistdat.rds"))
alongshore <- readRDS(here("spatialdat/neus.alongshore.axisdistdat.rds"))
offshore <- readRDS(here("spatialdat/neus.offshore.axisdistdat.rds"))

# extract northings and eastings 
Z_gm = fit$spatial_list$loc_g 

# convert northings/eastings to lat/lon
tmpUTM = cbind('PID'=1,'POS'=1:nrow(Z_gm),'X'=Z_gm[,'E_km'],'Y'=Z_gm[,'N_km'])

# create UTM object with correct attributes 
attr(tmpUTM,"projection") = "UTM"
attr(tmpUTM,"zone") = fit$extrapolation_list$zone - ifelse( fit$extrapolation_list$flip_around_dateline==TRUE, 30, 0 )

# convert to lat/lon for merging with custom axis
latlon_g = PBSmapping::convUL(tmpUTM)                                                         #$
latlon_g = cbind( 'Lat'=latlon_g[,"Y"], 'Lon'=latlon_g[,"X"])
latlon_g <- as.data.frame(latlon_g)

# function to save the custom axis position that minimize Euclidean distance from each set of VAST coordinates
get_length <- function(lon, lat, distdf) {
  tmp <- distdf 
  tmp$abs.diff.x2 = abs(distdf$x-lon)^2
  tmp$abs.diff.y2 = abs(distdf$y-lat)^2
  # get Euclidean dist from VAST points to all points along the coastal axis  
  tmp$abs.diff.xy = sqrt(tmp$abs.diff.x2 + tmp$abs.diff.y2) 
  tmp <- tmp[tmp$abs.diff.xy==min(tmp$abs.diff.xy),] # keep only the row with the minimum distance
  return(tmp$lengthfromhere) # save the position of that row
}

# apply this to match each VAST knot with a position along the custom axis
# use lat/lon to get coastal distance
coast_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = coastdistdat)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  coast_km <- c(coast_km, out)
}

along_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = alongshore)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  along_km <- c(along_km, out)
}

offshore_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = offshore)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  offshore_km <- c(offshore_km, out)
}

# bind the axis positions that match each knot back to Z_gm
Z_gm = cbind( Z_gm, "coast_km"=coast_km, "along_km"=along_km, "offshore_km"=offshore_km )
Z_gm_axes = colnames(Z_gm)

#fit model with new coordinates
fit <- fit_model(
  settings = settings, 
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)),
  v_i = bluepyagg_stn_fall$Vessel,
  Q_ik = as.matrix(bluepyagg_stn_fall[,c("meanpisclen","npiscsp")]),
  #Use_REML = TRUE,
  Z_gm = Z_gm,
  working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

# Save fit
saveRDS(fit, file = paste0(working_dir, "/fit.rds"))
saveRDS(bluepyagg_stn_fall, file = paste0(working_dir, "/bluepyagg_stn_fall.rds"))

# Run model spring

season <- c("spring_500_allsurvs_lenno_tilt")

working_dir <- here::here(sprintf("pyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
                          

fit = fit_model( settings = settings, 
                 Lat_i = bluepyagg_stn_spring[,'Lat'], 
                 Lon_i = bluepyagg_stn_spring[,'Lon'], 
                 t_i = bluepyagg_stn_spring[,'Year'], 
                 b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(bluepyagg_stn_spring)),
                 v_i = bluepyagg_stn_spring$Vessel,
                 Q_ik = as.matrix(bluepyagg_stn_spring[,c("meanpisclen","npiscsp")]),
                # Use_REML = TRUE,
                 run_model = FALSE,
                 working_dir = paste0(working_dir, "/"))

coastdistdat <-  readRDS(here("spatialdat/neus_coastdistdat.rds"))
alongshore <- readRDS(here("spatialdat/neus.alongshore.axisdistdat.rds"))
offshore <- readRDS(here("spatialdat/neus.offshore.axisdistdat.rds"))

# extract northings and eastings 
Z_gm = fit$spatial_list$loc_g 

# convert northings/eastings to lat/lon
tmpUTM = cbind('PID'=1,'POS'=1:nrow(Z_gm),'X'=Z_gm[,'E_km'],'Y'=Z_gm[,'N_km'])

# create UTM object with correct attributes 
attr(tmpUTM,"projection") = "UTM"
attr(tmpUTM,"zone") = fit$extrapolation_list$zone - ifelse( fit$extrapolation_list$flip_around_dateline==TRUE, 30, 0 )

# convert to lat/lon for merging with custom axis
latlon_g = PBSmapping::convUL(tmpUTM)                                                         #$
latlon_g = cbind( 'Lat'=latlon_g[,"Y"], 'Lon'=latlon_g[,"X"])
latlon_g <- as.data.frame(latlon_g)

# function to save the custom axis position that minimize Euclidean distance from each set of VAST coordinates
get_length <- function(lon, lat, distdf) {
  tmp <- distdf 
  tmp$abs.diff.x2 = abs(distdf$x-lon)^2
  tmp$abs.diff.y2 = abs(distdf$y-lat)^2
  # get Euclidean dist from VAST points to all points along the coastal axis  
  tmp$abs.diff.xy = sqrt(tmp$abs.diff.x2 + tmp$abs.diff.y2) 
  tmp <- tmp[tmp$abs.diff.xy==min(tmp$abs.diff.xy),] # keep only the row with the minimum distance
  return(tmp$lengthfromhere) # save the position of that row
}

# apply this to match each VAST knot with a position along the custom axis
# use lat/lon to get coastal distance
coast_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = coastdistdat)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  coast_km <- c(coast_km, out)
}

along_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = alongshore)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  along_km <- c(along_km, out)
}

offshore_km = NULL
for(k in 1:nrow(latlon_g)){
  out = get_length(lon=latlon_g$Lon[k], lat=latlon_g$Lat[k], distdf = offshore)/1000 # works better in a for loop than in apply where it's hard to get the rowwise nature to work--revisit sometime?
  offshore_km <- c(offshore_km, out)
}

# bind the axis positions that match each knot back to Z_gm
Z_gm = cbind( Z_gm, "coast_km"=coast_km, "along_km"=along_km, "offshore_km"=offshore_km )
Z_gm_axes = colnames(Z_gm)

# fit model with new coordinates
fit = fit_model( settings = settings, 
                 Lat_i = bluepyagg_stn_spring[,'Lat'], 
                 Lon_i = bluepyagg_stn_spring[,'Lon'], 
                 t_i = bluepyagg_stn_spring[,'Year'], 
                 b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(bluepyagg_stn_spring)),
                 v_i = bluepyagg_stn_spring$Vessel,
                 Q_ik = as.matrix(bluepyagg_stn_spring[,c("meanpisclen","npiscsp")]),
                 # Use_REML = TRUE,
                 Z_gm = Z_gm,
                 working_dir = paste0(working_dir, "/"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/")) 

# Save fit
saveRDS(fit, file = paste0(working_dir, "/fit.rds"))
saveRDS(bluepyagg_stn_spring, file = paste0(working_dir, "/bluepyagg_stn_spring.rds"))

Here is the best model with both surveys, predator mean length and number of predator species as covariates, with the rotated axes.

Fall Index

Fall Index ### Fall Center of Gravity (far right is inshore-offshore)

Fall COG

Spring Index

Spring Index ### Spring Center of Gravity (far right is inshore-offshore) Spring COG

Define inshore and offshore regions

Three options for different strata for derived quantities: 3 miles from shore, NEAMAP outer boundary, Albatross inshore strata boundary.

In progress…

Ng, E. L., Deroba, J. J., Essington, T. E., Grüss, A., Smith, B. E., and Thorson, J. T. 2021. Predator stomach contents can provide accurate indices of prey biomass. ICES Journal of Marine Science, 78: 1146–1159. https://doi.org/10.1093/icesjms/fsab026 (Accessed 1 September 2021).