Updated with data through 2024, Oct 2024

Mark Wuenschel provided the data and code to produce the figures from working papers on spawning phenology of haddock and yellowtail flounder stocks.

Mark’s input data and code are in this repo.

I just ran Mark’s code and will save them as rds files for input into ecodata.

Haddock

Mark’s conclusion from the Haddock assessment working paper (Mark Wuenschel 2021, Spawning phenology of haddock: analysis of reproductive condition determined on NEFSC bottom trawl surveys):

Haddock spawn during the late winter-spring months in the region, with the spring survey sampling fish in the latter portion of the spawning season (peak to end). There is evidence that spawning of both haddock stocks occurred earlier in the year (and ended earlier) in the recent decade (2010s) as compared to earlier in the time series. Recent spring surveys have sampled a greater proportion of post spawning fish.

2023 Input data: haddock_1970_2019_export.csv

2024 Input data: haddock_1963_2024_BTS.csv

Processing code: Haddock_spawning_phenology_2021WP.R

I ran the code from lines 1:161 to get the datasets.

## program to read data files,
# plot proportions pre -spawn-post spawn to evaluate changes in spawning phenology during SBTS
## program to read data files,
# plot proportions pre -spawn-post spawn to evaluate changes in spawning phenology during SBTS
####condense proportions maturity by temp and Day of year
# last modified 2/16/2021 for haddock assessment working paper

rm(list=ls(all=TRUE))  # clear the workspace of all existing variables
#setwd("C:\\Users\\mark.wuenschel\\Documents\\Rfiles\\R\\Spawning_Phenology\\haddock") # set the path for the working directory
#od= 'C:/Users/mark.wuenschel/Documents/Rfiles/R/Spawning_Phenology/haddock/'   #directory for writing output

#library(MASS)


HADdata=as.data.frame(read.csv(file="haddock_1963_2024_BTS.csv", header=T, sep=","))
#Allspp_SPdata$mean[which(Allspp_SPdata$mean==0)] <- NA
data=subset(HADdata[HADdata$SVSPP=='74',]) #select species

#maxFL<-max(as.numeric(data[,7])) #need to change to appropriate column in new dataset

###add in stock by specifying strata
GBSTRAT<- c('1130','1140','1150','1160','1170','1180','1190','1200','1210','1220','1230','1240','1250','1290','1300')
GOMSTRAT<-c('1260','1270','1280','1360','1370','1380','1390','1400')


data$STOCK <-NA

sel <-which(data$STRATUM %in% GBSTRAT)
data$STOCK[sel]<- with (data[sel,], 'GB')


sel <-which(data$STRATUM %in% GOMSTRAT)
data$STOCK[sel]<- with (data[sel,], 'GOM')

###  summary(as.factor(data$STOCK))

###add in day of year from date
data$DOY <-strftime(as.Date(data$BEGIN_EST_TOWDATE, "%d-%b-%y"), format = "%j")
data$DOY<- as.numeric(data$DOY)


### calculate proportions pre-spawning, spawning active, post-spawning in each season/year/stock
###drop males and immatures and unknowns
data=subset(data[data$BOTTEMP!=0,])
data=subset(data[data$SEX=='2',])
data=subset(data[data$MATURITY!='Unknown',])
data=subset(data[data$MATURITY!='Immature',])
data=subset(data[data$EST_YEAR!=1969,])
# data$SPAWNCOND <- NA
# sel <-which(data$MATURITY=="Developing" )
# data$SPAWNCOND[sel]<- with (data[sel,], 'Prespawning')
# sel <-which(data$MATURITY=="Ripe"|data$MATURITY=="Ripe and Running" )
# data$SPAWNCOND[sel]<- with (data[sel,], 'Spawning')
# sel <-which(data$MATURITY=="Spent"|data$MATURITY=="Resting" )
# data$SPAWNCOND[sel]<- with (data[sel,], 'Postspawning')
data$SPAWNCOND <- NA
sel <-which(data$MATURITY=="Developing" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Developing')
sel <-which(data$MATURITY=="Ripe"|data$MATURITY=="Ripe and Running" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Ripe')
sel <-which(data$MATURITY=="Spent"|data$MATURITY=="Spent" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Spent')
sel <-which(data$MATURITY=="Resting" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Resting')


#data2=subset(data[data$STOCK==c("CC","SNE","GB"),])
data2<- data[!is.na(data$STOCK), ]

##concatenate cruise 6 and stock to ease calculations of proportions
data$CRUISE6_STOCK <- paste(data$CRUISE6,"_",data$STOCK)

data$PROPSPAWN <- NA
#data$PROPSPAWN<- rowsum(data$SPAWNCOND, c(data$EST_YEAR,data$SEASON,dta$STOCK), na.rm=T)
###loop to calculateproportions for each year, season, stock


####cleaning up the data a little#####
###may need to remove some small fish that are listed as mature (<20 cm) seems unlikely. 
data$EST_YEAR <- as.factor(data$EST_YEAR)
data$STOCK <- as.factor(data$STOCK)
#library(plyr); 
#library(dplyr)
# results.by.year.season.stock <- ddply(.data=data, .var=C(data$EST_YEAR,data$SEASON, data$STOCK), .fun=function(x) {
#   data.frame(n=nrow(x),
#              num_prespawn = nrow(subset(x, data$SPAWNCOND %in%
#                                           "Prespawning")) 
#              
#              #prop_prespawn= nrow(subset(x, data$SPAWNCOND %in%
#              #                             "Prespawning")) / nrow(x)
#             )
# })


#results.by.year.season.stock <- ddply(data, C(data$EST_YEAR,data$SEASON, data$STOCK), mutate, nprespawn=(sum(count(data$SPAWNCOND=="Prespawning"))))


###aggregate by year and season

####this parts gets the summary date and temp data
data3<- data2 %>%
  group_by (EST_YEAR, SEASON, STOCK, SPAWNCOND) %>%
  summarize(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP),
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            totalMF=n()
            )
## `summarise()` has grouped output by 'EST_YEAR', 'SEASON', 'STOCK'. You can
## override using the `.groups` argument.
###this part tallies by spawn condition and adds colums for each, so there is one row per yearxseasonxstock
#library(reshape2)
data3<-dcast(data3,EST_YEAR+SEASON+STOCK~ SPAWNCOND, value.var="totalMF")
  

data4<- data2 %>%
  group_by (EST_YEAR, SEASON, STOCK) %>%
  summarise(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP)
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            #totalMF=n()
  )
## `summarise()` has grouped output by 'EST_YEAR', 'SEASON'. You can override
## using the `.groups` argument.
### merging to tack on the bio and env data to the summary of spawning info
data5<- merge(data3, data4, by = c("EST_YEAR", "SEASON", "STOCK"))

### add in columns for proportions
# data5$Postspawning[is.na(data5$Postspawning)]<-0
# data5$Prespawning[is.na(data5$Prespawning)]<-0
# data5$Spawning[is.na(data5$Spawning)]<-0
# 
# data5$PROPPRE<- 100*data5$Prespawning/(data5$Prespawning+data5$Spawning+data5$Postspawning)
# data5$PROPSP<- 100*data5$Spawning/(data5$Prespawning+data5$Spawning+data5$Postspawning)
# data5$PROPPOS<- 100*data5$Postspawning/(data5$Prespawning+data5$Spawning+data5$Postspawning)
# data5$MF<-(data5$Prespawning+data5$Spawning+data5$Postspawning)
# #data$mean[which(data$mean==0)] <- NA
# ##break data into spring and fall separately for plotting and analysis

data5$DURATION <-data5$maxJDAY-data5$minJDAY
### add in columns for proportions
data5$Developing[is.na(data5$Developing)]<-0
data5$Ripe[is.na(data5$Ripe)]<-0
data5$Spent[is.na(data5$Spent)]<-0
data5$Resting[is.na(data5$Resting)]<-0


data5$PROPD<- 100*data5$Developing/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPRI<- 100*data5$Ripe/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPSP<- 100*data5$Spent/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPRE<- 100*data5$Resting/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$MF<-(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
#data$mean[which(data$mean==0)] <- NA
##break data into spring and fall separately for plotting and analysis

data5$DURATION <-data5$maxJDAY-data5$minJDAY

Haddock is already by year, plot:

annualhad <- data5 |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(EST_YEAR)) |>
  dplyr::select(SEASON, STOCK, Time, 
                Developing = PROPD, 
                Ripe = PROPRI, 
                Spent = PROPSP, 
                Resting = PROPRE) |>
  tidyr::pivot_longer(Developing:Resting, names_to = "Stage", values_to = "Value") 

mat.col=c("orange", "cyan", "purple", "coral3")
names(mat.col) <- as.factor(c("Developing", "Ripe", "Spent", "Resting"))



p <- ggplot2::ggplot(annualhad, 
                     ggplot2::aes(x=Time, y=Value, fill = as.factor(Stage))) + # , colour = Stage
  #ggplot2::geom_point() +
  #ggplot2::geom_line() +
  #ecodata::geom_gls() +
  ggplot2::geom_bar(stat="identity", position="fill") +
  ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

Which stages are increasing or decreasing in proportion?

p <- ggplot2::ggplot(annualhad |> dplyr::filter(Stage %in% c("Developing")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

p <- ggplot2::ggplot(annualhad |> dplyr::filter(Stage %in% c("Resting")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

p <- ggplot2::ggplot(annualhad |> dplyr::filter(Stage %in% c("Ripe")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

p <- ggplot2::ggplot(annualhad |> dplyr::filter(Stage %in% c("Spent")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p
## Error in `coef<-.corARMA`(`*tmp*`, value = value[parMap[, i]]) : 
##   Coefficient matrix not invertible
## Warning: The following aesthetics were dropped during statistical transformation: x and
## y.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_gls()`).

Plot temperatures, bottom temp from survey stations with maturity samples increasing in GB Spring, GOM Fall

annualhadtemp <- data5 |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(EST_YEAR)) |>
  dplyr::select(SEASON, STOCK, Time, meanTEMP, MF) # adds number of mature females

p <- ggplot2::ggplot(annualhadtemp, 
                     ggplot2::aes(x=Time, y=meanTEMP)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

Mean surveyed day of year, not changing over time

annualhadday <- data5 |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(EST_YEAR)) |>
  dplyr::select(SEASON, STOCK, Time, meanJDAY)

p <- ggplot2::ggplot(annualhadday, 
                     ggplot2::aes(x=Time, y=meanJDAY, color = SEASON)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Haddock stocks")

p

haddock <- left_join(annualhad, annualhadtemp) |>
  left_join(annualhadday)
## Joining with `by = join_by(SEASON, STOCK, Time)`
## Joining with `by = join_by(SEASON, STOCK, Time)`
haddock$Species <- "Haddock"

Yellowtail

Mark’s conclusions from the YT assessment working paper (Spawning phenology of Yellowtail Flounder: analysis of reproductive condition determined on NEFSC bottom trawl surveys)

Yellowtail Flounder spawn during the spring months in the region, with the spring survey sampling fish during the spawning season. There is evidence that spawning condition is related to the bottom temperature, week of year, and decade sampled for each of the three stocks and survey timing and environmental conditions (bottom temperature) have been variable over time. The analysis of macroscopic data presented here provides some evidence for a change in the spawning season of CC GOM and SNE stocks. Early in the time series, sampling captured the beginning/early portion of spawning season, while later in the time series sampling has captured more of the peak in spawning.

2023 Input data: YT_63_23_BTS.csv

2024 Input data: YT_1963_2024_BTS.csv

Processing code: YT_Prop_Mat_by_temp_week_timeblocks_CC_WP.R

I ran the code from lines 1:219 to get the datasets.

## program to read data files,
# plot proportions pre -spawn-post spawn to evaluate changes in spawning phenology during SBTS
####condense proportions maturity by temp and Day of year
# last modified 10/10/2018

#rm(list=ls(all=TRUE))  # clear the workspace of all existing variables
#setwd("C:\\R\\Spawning_Phenology") # set the path for the working directory
#od= 'C:/R/Spawning_Phenology/output/'   #directory for writing output

#library(MASS)

YTdata=as.data.frame(read.csv(file="YT_1963_2024_BTS.csv", header=T, sep=","))
#Allspp_SPdata$mean[which(Allspp_SPdata$mean==0)] <- NA
data=subset(YTdata[YTdata$SVSPP=='105',]) #select species

#maxFL<-max(as.numeric(data[,7])) #need to change to appropriate column in new dataset

###add in stock by specifying strata
GBSTRAT<- c('1130','1140','1150','1160','1170','1180','1190','1200','1210')
SNESTRAT_SP<- c('1010','1020','1050','1060','1090','1100','1690','1730','1740')
SNESTRAT_FA<- c('1010','1020','1050','1060','1090','1100')
CCSTRAT_SP<- c('1250','1260','1270','1390','1400','3560','3570','3590','3600','3610','3620','3640','3650','3660')
CCSTRAT_FA<- c('1250','1260',       '1390','1400','3560','3570','3590','3600','3610','3620','3640','3650','3660')


data$STOCK <-NA

sel <-which(data$STRATUM %in% GBSTRAT)
data$STOCK[sel]<- with (data[sel,], 'GB')

sel <-which(data$STRATUM %in% SNESTRAT_SP)
data$STOCK[sel]<- with (data[sel,], 'SNE')


sel <-which(data$STRATUM %in% CCSTRAT_SP)
data$STOCK[sel]<- with (data[sel,], 'CC')

###  summary(as.factor(data$STOCK))

###add in day of year from date
data$DOY <-strftime(as.Date(data$BEGIN_EST_TOWDATE, "%d-%b-%y"), format = "%j")
data$DOY<- as.numeric(data$DOY)


### calculate proportions pre-spawning, spawning active, post-spawning in each season/year/stock
###drop males and immatures and unknowns
data=subset(data[data$EST_YEAR!='1977',])
data=subset(data[data$SEX=='2',])
data=subset(data[data$MATURITY!='Unknown',])
data=subset(data[data$MATURITY!='Immature',])
data=subset(data[data$EST_YEAR!=1969,])



data$SPAWNCOND <- NA
sel <-which(data$MATURITY=="Developing" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Prespawning')
sel <-which(data$MATURITY=="Ripe"|data$MATURITY=="Ripe and Running" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Spawning')
sel <-which(data$MATURITY=="Spent"|data$MATURITY=="Resting" )
data$SPAWNCOND[sel]<- with (data[sel,], 'Postspawning')

sel <-which(data$MATURITY=="Ripe"|data$MATURITY=="Ripe and Running" )
data$MATURITY[sel]<- with (data[sel,], 'Ripe')

data$EST_YEAR <- as.factor(data$EST_YEAR)
data$STOCK <- as.factor(data$STOCK)
data$PROPSPAWN <- NA
data$TEMPBIN <-round(data$BOTTEMP, digits=0)
data$TEMPBIN <-as.factor(data$TEMPBIN)
data$WOY <- data$DOY/7
data$WOY <-round(data$WOY, digits=0)
data$WOY <-as.factor(data$WOY)
data<-(data[!is.na(data$TEMPBIN),])

##concatenate cruise 6 and stock to ease calculations of proportions
#data$CRUISE6_STOCK <- paste(data$CRUISE6,"_",data$STOCK)

data$YEARBLOCK <- NA
YB70s<- c('1970','1971','1972','1973','1974','1975','1976','1977','1978','1979')
YB80s<- c('1980','1981','1982','1983','1984','1985','1986','1987','1988','1989')
YB90s<- c('1990','1991','1992','1993','1994','1995','1996','1997','1998','1999')
YB00s<- c('2000','2001','2002','2003','2004','2005','2006','2007','2008','2009')
YB10s<- c('2010','2011','2012','2013','2014','2015','2016','2017','2018','2019')
YB20s<- c('2020','2021','2022','2023','2024','2025','2026','2027','2028','2029')
sel <-which(as.character(data$EST_YEAR) %in% YB70s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB70s')
sel <-which(as.character(data$EST_YEAR) %in% YB80s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB80s')
sel <-which(data$EST_YEAR %in% YB90s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB90s')
sel <-which(data$EST_YEAR %in% YB00s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB00s')
sel <-which(data$EST_YEAR %in% YB10s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB10s')
sel <-which(data$EST_YEAR %in% YB20s )
data$YEARBLOCK[sel]<- with (data[sel,], 'YB20s')

#data2=subset(data[data$STOCK==c("CC","SNE","GB"),])
data2<- data[!is.na(data$STOCK), ]

#data$PROPSPAWN<- rowsum(data$SPAWNCOND, c(data$EST_YEAR,data$SEASON,dta$STOCK), na.rm=T)
###loop to calculateproportions for each year, season, stock


####cleaning up the data a little#####
###may need to remove some small fish that are listed as mature (<20 cm) seems unlikely. 
#library(plyr); 
#library(dplyr)
# results.by.year.season.stock <- ddply(.data=data, .var=C(data$EST_YEAR,data$SEASON, data$STOCK), .fun=function(x) {
#   data.frame(n=nrow(x),
#              num_prespawn = nrow(subset(x, data$SPAWNCOND %in%
#                                           "Prespawning")) 
#              
#              #prop_prespawn= nrow(subset(x, data$SPAWNCOND %in%
#              #                             "Prespawning")) / nrow(x)
#             )
# })


#results.by.year.season.stock <- ddply(data, C(data$EST_YEAR,data$SEASON, data$STOCK), mutate, nprespawn=(sum(count(data$SPAWNCOND=="Prespawning"))))


###aggregate by year and season

####this parts gets the summary date and temp data
data3<- data2 %>%
  group_by (SEASON, STOCK, TEMPBIN, YEARBLOCK, MATURITY) %>%
  summarize(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T),
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK', 'TEMPBIN', 'YEARBLOCK'.
## You can override using the `.groups` argument.
###this part tallies by spawn condition and adds colums for each, so there is one row per yearxseasonxstock
#library(reshape2)
data3<-dcast(data3,SEASON+STOCK+TEMPBIN+YEARBLOCK~ MATURITY, value.var="totalMF")


data4<- data2 %>%
  group_by ( SEASON, STOCK, TEMPBIN,YEARBLOCK) %>%
  summarise(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T)
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            #totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK', 'TEMPBIN'. You can
## override using the `.groups` argument.
### merging to tack on the bio and env data to the summary of spawning info
data5<- merge(data3, data4, by = c( "SEASON", "STOCK","TEMPBIN", "YEARBLOCK"))
### add in columns for proportions
data5$Developing[is.na(data5$Developing)]<-0
data5$Ripe[is.na(data5$Ripe)]<-0
data5$Spent[is.na(data5$Spent)]<-0
data5$Resting[is.na(data5$Resting)]<-0

data5$PROPD<- 100*data5$Developing/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPR<- 100*data5$Ripe/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPS<- 100*data5$Spent/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$PROPT<- 100*data5$Resting/(data5$Developing+data5$Ripe+data5$Spent+data5$Resting)
data5$MF<-(data5$Developing+data5$Ripe+data5$Spent+data5$Resting )
#data$mean[which(data$mean==0)] <- NA


#####doing similar merge to get WOY summary
####this parts gets the summary date and temp data
WOYdata3<- data2 %>%
  group_by (SEASON, STOCK, WOY, MATURITY, YEARBLOCK) %>%
  summarize(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T),
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK', 'WOY', 'MATURITY'. You
## can override using the `.groups` argument.
###this part tallies by spawn condition and adds colums for each, so there is one row per yearxseasonxstock
#library(reshape2)
WOYdata3<-dcast(WOYdata3,SEASON+STOCK+WOY+YEARBLOCK~ MATURITY, value.var="totalMF")


WOYdata4<- data2 %>%
  group_by ( SEASON, STOCK, WOY,YEARBLOCK) %>%
  summarise(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T)
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            #totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK', 'WOY'. You can override
## using the `.groups` argument.
### merging to tack on the bio and env data to the summary of spawning info
WOYdata5<- merge(WOYdata3, WOYdata4, by = c( "SEASON", "STOCK","WOY","YEARBLOCK"))


### add in columns for proportions
WOYdata5$Developing[is.na(WOYdata5$Developing)]<-0
WOYdata5$Ripe[is.na(WOYdata5$Ripe)]<-0
WOYdata5$Spent[is.na(WOYdata5$Spent)]<-0
WOYdata5$Resting[is.na(WOYdata5$Resting)]<-0

WOYdata5$PROPD<- 100*WOYdata5$Developing/(WOYdata5$Developing+WOYdata5$Ripe+WOYdata5$Spent+WOYdata5$Resting)
WOYdata5$PROPR<- 100*WOYdata5$Ripe/(WOYdata5$Developing+WOYdata5$Ripe+WOYdata5$Spent+WOYdata5$Resting)
WOYdata5$PROPS<- 100*WOYdata5$Spent/(WOYdata5$Developing+WOYdata5$Ripe+WOYdata5$Spent+WOYdata5$Resting)
WOYdata5$PROPT<- 100*WOYdata5$Resting/(WOYdata5$Developing+WOYdata5$Ripe+WOYdata5$Spent+WOYdata5$Resting)
WOYdata5$MF<-(WOYdata5$Developing+WOYdata5$Ripe+WOYdata5$Spent+WOYdata5$Resting )
#data$mean[which(data$mean==0)] <- NA
##break data into spring and fall separately for plotting and analysis

To get annual percent by spawning stage, I’ll start with data2 and group by year instead of yearblock but otherwise use the same code:

###aggregate by year and season

####this parts gets the summary date and temp data
data3yr<- data2 %>%
  group_by (SEASON, STOCK, EST_YEAR, MATURITY) %>%
  summarize(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T),
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK', 'EST_YEAR'. You can
## override using the `.groups` argument.
data3yr<-dcast(data3yr,SEASON+STOCK+EST_YEAR~ MATURITY, value.var="totalMF")

data4yr<- data2 %>%
  group_by ( SEASON, STOCK, EST_YEAR) %>%
  summarise(minJDAY= min(DOY),
            meanJDAY= mean(DOY),
            maxJDAY=max(DOY),
            meanLENGTH = mean(LENGTH),
            meanTEMP=mean(BOTTEMP, na.rm=T)
            #Prespawning= n (SPAWNCOND=="Prespawning"),
            #Prespawning= length (SPAWNCOND=="Prespawning"),
            #totalMF=n()
  )
## `summarise()` has grouped output by 'SEASON', 'STOCK'. You can override using
## the `.groups` argument.
data5yr<- merge(data3yr, data4yr, by = c( "SEASON", "STOCK", "EST_YEAR"))

### add in columns for proportions
data5yr$Developing[is.na(data5yr$Developing)]<-0
data5yr$Ripe[is.na(data5yr$Ripe)]<-0
data5yr$Spent[is.na(data5yr$Spent)]<-0
data5yr$Resting[is.na(data5yr$Resting)]<-0

data5yr$PROPD<- 100*data5yr$Developing/(data5yr$Developing+data5yr$Ripe+data5yr$Spent+data5yr$Resting)
data5yr$PROPR<- 100*data5yr$Ripe/(data5yr$Developing+data5yr$Ripe+data5yr$Spent+data5yr$Resting)
data5yr$PROPS<- 100*data5yr$Spent/(data5yr$Developing+data5yr$Ripe+data5yr$Spent+data5yr$Resting)
data5yr$PROPT<- 100*data5yr$Resting/(data5yr$Developing+data5yr$Ripe+data5yr$Spent+data5yr$Resting)
data5yr$MF<-(data5yr$Developing+data5yr$Ripe+data5yr$Spent+data5yr$Resting )

Plot annual percentage by stage

annualyt <- data5yr |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(as.character(EST_YEAR))) |>
  dplyr::select(SEASON, STOCK, Time, 
                Developing = PROPD, 
                Ripe = PROPR, 
                Spent = PROPS, 
                Resting = PROPT) |>
  tidyr::pivot_longer(Developing:Resting, names_to = "Stage", values_to = "Value") 

mat.col=c("orange", "cyan", "purple", "coral3")
names(mat.col) <- as.factor(c("Developing", "Ripe", "Spent", "Resting"))



p <- ggplot2::ggplot(annualyt, 
                     ggplot2::aes(x=Time, y=Value, fill = as.factor(Stage))) + # , colour = Stage
  #ggplot2::geom_point() +
  #ggplot2::geom_line() +
  #ecodata::geom_gls() +
  ggplot2::geom_bar(stat="identity", position="fill") +
  ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

Which stages are increasing or decreasing in proportion?

p <- ggplot2::ggplot(annualyt |> dplyr::filter(Stage %in% c("Developing")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

p <- ggplot2::ggplot(annualyt |> dplyr::filter(Stage %in% c("Resting")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

p <- ggplot2::ggplot(annualyt |> dplyr::filter(Stage %in% c("Ripe")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p
## Error in `coef<-.corARMA`(`*tmp*`, value = value[parMap[, i]]) : 
##   Coefficient matrix not invertible
## Warning: The following aesthetics were dropped during statistical transformation: x and
## y.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_gls()`).

p <- ggplot2::ggplot(annualyt |> dplyr::filter(Stage %in% c("Spent")), 
                     ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

Plot time block and temp bin percentage by stage (not evaluated)

decadal <- data5 |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = YEARBLOCK) |>
  dplyr::select(SEASON, STOCK, Time, PROPD, PROPR, PROPS, PROPT) |>
  tidyr::pivot_longer(PROPD:PROPT, names_to = "Stage", values_to = "Value")

p <- ggplot2::ggplot(decadal, # #|>dplyr::filter(Stage == "PROPT"), 
                     ggplot2::aes(x=Time, y=Value, colour = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  #ecodata::geom_gls() +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet()

p

Plot temperatures, bottom temp from survey stations with maturity samples increasing in all but SNE Spring

annualyttemp <- data5yr |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(as.character(EST_YEAR))) |>
  dplyr::select(SEASON, STOCK, Time, meanTEMP, MF) # adds number of mature females) 

p <- ggplot2::ggplot(annualyttemp, 
                     ggplot2::aes(x=Time, y=meanTEMP)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(SEASON~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

Day of year, not changing over time aside from GB spring

annualytday <- data5yr |> 
  dplyr::filter(SEASON %in% c("SPRING", "FALL")) |>
  dplyr::mutate(Time = as.numeric(as.character(EST_YEAR))) |>
  dplyr::select(SEASON, STOCK, Time, meanJDAY)

p <- ggplot2::ggplot(annualytday, 
                     ggplot2::aes(x=Time, y=meanJDAY, color = SEASON)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Yellowtail flounder stocks")

p

yellowtail <- left_join(annualyt, annualyttemp) |>
  left_join(annualytday)
## Joining with `by = join_by(SEASON, STOCK, Time)`
## Joining with `by = join_by(SEASON, STOCK, Time)`
yellowtail$Species <- "Yellowtail"

Combined datastet

spawntime <- dplyr::bind_rows(haddock, yellowtail)

p <- spawntime |>
  dplyr::filter(SEASON == "SPRING") |>
  ggplot2::ggplot(ggplot2::aes(x=Time, y=Value, fill = as.factor(Stage))) + # , colour = Stage
  #ggplot2::geom_point() +
  #ggplot2::geom_line() +
  #ecodata::geom_gls() +
  ggplot2::geom_bar(stat="identity", position="fill") +
  ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(Species~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Spawning stage proportions")

g <- ggplot2::ggplotGrob(p)

gl <- g$layout
idcol <- gl$r == (ncol(g)-2)
g$layout[idcol & gl$b < 5, c("t", "b")] <- gl[idcol & gl$b < 5, c("t", "b")] + 4
grid::grid.newpage()
grid::grid.draw(g)

N mature females

p <- spawntime |>
  dplyr::filter(SEASON == "SPRING") |>
  dplyr::select(-c(Stage, Value)) |>
  dplyr::distinct() |>
  ggplot2::ggplot(ggplot2::aes(x=Time, y=MF)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(Species~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Number of mature females sampled")

p

p <- spawntime |>
  dplyr::filter(SEASON == "SPRING",
                Stage %in% c("Resting")) |>
  ggplot2::ggplot(ggplot2::aes(x=Time, y=Value, color = Stage)) + # , colour = Stage
  ggplot2::geom_point() +
  #ggplot2::geom_line() +
  ecodata::geom_gls() +
  #ggplot2::geom_bar(stat="identity", position="fill") +
  #ggplot2::scale_fill_manual(values = mat.col) +
  ggplot2::facet_wrap(Species~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Spring Spawning Stage")

p

# g <- ggplot2::ggplotGrob(p)
# 
# gl <- g$layout
# idcol <- gl$r == (ncol(g)-2)
# g$layout[idcol & gl$b < 5, c("t", "b")] <- gl[idcol & gl$b < 5, c("t", "b")] + 4
# grid::grid.newpage()
# grid::grid.draw(g)
p <- spawntime |>
  dplyr::filter(SEASON == "SPRING") |> 
  dplyr::select(-c(Stage, Value)) |>
  dplyr::distinct() |>
  ggplot2::ggplot(ggplot2::aes(x=Time, y=meanTEMP)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(Species~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Mean sampled bottom temperature")

p

p <- spawntime |>
  dplyr::filter(SEASON == "SPRING") |> 
  dplyr::select(-c(Stage, Value)) |>
  dplyr::distinct() |>
  ggplot2::ggplot(ggplot2::aes(x=Time, y=meanJDAY)) + # , colour = Stage
  ggplot2::geom_point() +
  ggplot2::geom_line() +
  ecodata::geom_gls() +
  ggplot2::facet_wrap(Species~STOCK) + 
  ecodata::theme_facet() +
  ggplot2::ggtitle("Mean sampled day of year")

p

Dataset for ecodata

This code takes spawntime dataset above and formats for ecodata. Also makes get and plot functions.

Variable names and units:

Variable names follow the convention “SEASON_Species_STOCK_Variable” where Variable and Units are: meanTEMP: mean sampled bottom temperature in degrees C MF: number of mature females sampled meanJDAY: mean julian day of year sampled Developing: percent of mature females at developing (pre-spawning) stage Ripe: percent of mature females at ripe (spawning) stage Spent: percent of mature females at spent (immediately post-spawning) stage Resting: percent of mature females at resting (non-spawning) stage

# Mark suggested adding number of mature females

spawn_timing <- spawntime |>
  # Var is SEASON Species STOCK MatStage or SEASON Species STOCK meanTemp or SEASON Species STOCK meanJDAY
  tidyr::unite("Var", c(SEASON, Species, STOCK), sep = "_") |>
  tidyr::pivot_wider(names_from = "Stage", values_from = "Value") |>
  tidyr::pivot_longer(meanTEMP:Resting, names_to = "Var2", values_to = "Value") |>
  dplyr::mutate(Units = dplyr::case_when(Var2 == "meanTEMP" ~ "bottom temperature degrees C",
                                         Var2 == "meanJDAY" ~ "julian day",
                                         Var2 == "MF" ~ "number of mature females sampled",
                                         TRUE ~ "percent of mature females at stage"),
                EPU = NA) |>
  tidyr::unite("Var", c(Var, Var2)) |>
  dplyr::select(Time, Var, Value, EPU, Units)

saveRDS(spawn_timing, here::here("spawn_timing.rds"))

get function for ecodata

## Spawn timing
raw.dir<- here::here("data-raw/")
sptime <- readRDS(file.path(raw.dir, fal))

get_spawn_timing <- function(save_clean = F){

  spawn_timing<- readRDS(file.path(raw.dir, sptime))

  if (save_clean){
    usethis::use_data(spawn_timing, overwrite = T)
  } else {
    return(spawn_timing)
  }
}
get_spawn_timing(save_clean = T)

plot function for ecodata

#' plot spawn_timing
#'
#' Plots time series of maturity stage and associated data for available stocks
#'
#' @param shadedRegion Numeric vector. Years denoting the shaded region of the plot (most recent 10)
#' @param report Character string. Which SOE report ("MidAtlantic", "NewEngland")
#' @param varName Character string. Which variable to plot: 
#' maturity stage ("Resting", "Ripe", "Spent", "Developing"),
#' number of mature females ("MF"), 
#' mean sampled bottom temperature ("meanTEMP"), 
#' or mean sampled day of year ("meanJDAY")
#'
#' @return ggplot object
#'
#'
#' @export
#'

plot_spawn_timing <- function(shadedRegion = NULL,
                              report="MidAtlantic",
                              varName = "Resting") {
  
  # generate plot setup list (same for all plot functions)
  setup <- ecodata::plot_setup(shadedRegion = shadedRegion,
                               report=report)
  
  # which report? this may be bypassed for some figures
  # dataset has NA EPU field but may be added later, so keep this here
  # NOTE no filtering by EPU happens below, change later!
  
  if (report == "MidAtlantic") {
    filterEPUs <- c("MAB")
  } else {
    filterEPUs <- c("GB", "GOM")
  }
  
  # optional code to wrangle ecodata object prior to plotting
  # e.g., calculate mean, max or other needed values to join below
  fix<- ecodata::spawn_timing |>
    tidyr::separate(Var, into = c("Season", "Species", "Stock", "Var"), sep = "_") 
  
  # code for generating plot object p
  # ensure that setup list objects are called as setup$...
  # e.g. fill = setup$shade.fill, alpha = setup$shade.alpha,
  # xmin = setup$x.shade.min , xmax = setup$x.shade.max
  #
  
  # SPRING data plots by default for spring spawning species
  # May have to change to add fall option later if we get fall spawners
  
  
  filt <- fix |>
    dplyr::filter(Season == "SPRING",
                  Var == varName) |>
    dplyr::mutate(Species = factor(Species, levels = c("Yellowtail", "Haddock")),
                  Stock = factor(Stock, levels = c("GOM", "CC", "GB", "SNE")))
  
  p <- ggplot2::ggplot(filt, ggplot2::aes(x = Time, y = Value)) +
    ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                      xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                      ymin = -Inf, ymax = Inf) +
    ggplot2::geom_point() +
    ggplot2::geom_line() +
    ecodata::geom_gls() +
    #ggplot2::geom_bar(stat="identity", position="fill") +
    #ggplot2::scale_fill_manual(values = mat.col) +
    ggplot2::ylab(filt$Units) +
    ggplot2::facet_wrap(Species~Stock,
                        labeller = ggplot2::label_wrap_gen(multi_line=FALSE)) + 
    ecodata::theme_facet() + 
    ecodata::theme_title()    
  
  if(varName %in% c("Resting", "Ripe", "Spent", "Developing")){
    p <- p + ggplot2::ggtitle(paste(stringr::str_to_sentence(filt$Season), varName, "Spawning Stage")) 
  }
  
  if(varName %in% c("MF", "meanTEMP", "meanJDAY")){
    varLong <- dplyr::case_when(varName == "MF" ~ "Mature Females",
                                varName == "meanTEMP" ~ "Mean Sampled Temperature",
                                varName == "meanJDAY" ~ "Mean Sampling Date")
    
    p <-p + ggplot2::ggtitle(paste(stringr::str_to_sentence(filt$Season), varLong))
  }
  
  return(p)
  
}
attr(plot_forage_index,"report") <- c("MidAtlantic","NewEngland")
attr(plot_forage_index, "varName") <- c("Resting", "Ripe", "Spent", "Developing", "MF", "meanTEMP", "meanJDAY")