Comparing model types - missing years

Author

Murray Logan

Published

06/07/2025

1 Purpose

2 Preparations

[1] "/home/runner/work/gcrmn_model_dev/gcrmn_model_dev/docs"

Load the necessary R libraries

        library(tidyverse) # for data manipulation and visualisation
        library(sf) # for spatial data handling and visualisation
        library(glmmTMB) # for frequentist GLMM
        library(emmeans) # for post-hoc analysis
        library(DHARMa) # for model diagnostics
        library(patchwork)
        library(brms) # for Bayesian GLMM
        library(rstan) # for Bayesian modelling
        library(bayesplot) # for Bayesian plotting
        library(tidybayes) # for tidy bayesian analysis
        library(posterior) # for posterior analysis
        library(gbm) # for gradient boosted regression trees
        library(dbarts) # for Bayesian additive regression trees
        library(HDInterval) # for Bayesian credible intervals

Establish the global environment and paths

        assign(x = "data_path", value = "../data/", envir = .GlobalEnv)
        assign(x = "output_path", value = "../output/", envir = .GlobalEnv)
        paths <- list(
          data_path = data_path,
          synthetic_path = paste0(data_path, "synthetic/"),
          output_path = output_path,
          fig_path = paste0(output_path, "figures/")
        )
        lapply(paths, function(x) {
          if (!dir.exists(x)) {
            dir.create(x)
          }
        })

Load any helper functions

        source("model_functions.R")
        ## source("helper_functions.R")

3 Import data

These data represent the synthesized “truth” and are thereby useful to compare to the various modelled outcomes.

      benthos_reefs_sf <- readRDS(file = paste0(
        data_path,
        "synthetic/benthos_reefs_sf.rds"
      ))
benthos_reefs_sf 
Simple feature collection with 267132 features and 8 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 0.1086254 ymin: -20.96124 xmax: 9.508625 ymax: -9.981244
Geodetic CRS:  WGS 84
# A tibble: 267,132 × 9
    Year Reef              geometry   HCC     SC    MA    CYC   DHW OTHER
   <int> <chr>          <POINT [°]> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>
 1     1 Reef1 (9.398625 -19.95124) 0.310 0.0750 0.565 0      0.399 0.512
 2     2 Reef1 (9.398625 -19.95124) 0.270 0.0627 0.592 0.570  0.379 0.745
 3     3 Reef1 (9.398625 -19.95124) 0.279 0.0652 0.586 0      0.417 0.486
 4     4 Reef1 (9.398625 -19.95124) 0.291 0.0691 0.578 0.0180 0.340 0.615
 5     5 Reef1 (9.398625 -19.95124) 0.301 0.0722 0.571 0      0.419 0.429
 6     6 Reef1 (9.398625 -19.95124) 0.316 0.0769 0.561 0      0.318 0.728
 7     7 Reef1 (9.398625 -19.95124) 0.328 0.0811 0.553 0      0.355 0.648
 8     8 Reef1 (9.398625 -19.95124) 0.309 0.0748 0.565 0      0.637 0.691
 9     9 Reef1 (9.398625 -19.95124) 0.261 0.0599 0.599 0      0.961 0.575
10    10 Reef1 (9.398625 -19.95124) 0.246 0.0557 0.609 0      0.650 0.519
# ℹ 267,122 more rows
Show the code
      benthos_reefs_temporal_summary <- benthos_reefs_sf |>
        st_drop_geometry() |>
        group_by(Year) |>
        summarise(Mean = mean(HCC),
          Median = median(HCC),
          SD = sd(HCC),
          Lower = quantile(HCC, 0.025),
          Upper = quantile(HCC, 0.975))
      benthos_reefs_temporal_summary |> 
        saveRDS(
          file = paste0(
            data_path,
            "synthetic/benthos_reefs_temporal_summary.rds"
          )
        )
Show the code
      benthos_reefs_temporal_summary <- 
        readRDS(
          file = paste0(
            data_path,
            "synthetic/benthos_reefs_temporal_summary.rds"
          )
        )
      g <- benthos_reefs_temporal_summary |>
        ggplot() +
        geom_ribbon(aes(x = Year, ymin = Lower, ymax = Upper), alpha = 0.2) +
        geom_line(aes(x = Year, y = Mean, colour = "mean")) +
        geom_line(aes(x = Year, y = Median, colour = "median")) +
        theme_bw()
      ggsave(
        filename = paste0(
          fig_path, "R_all_temporal_summary_plot.png"
        ),
        g,
        width = 8, height = 6, dpi = 72
      )

These data represent a simulated sampling design in which 25 fixed (though randomly selected from among all the defined reefs in the synthetic spatial domain) reefs are monitored over a continuous 12 year period.

      benthos_fixed_locs_obs <- readRDS(file = paste0(
        data_path,
        "synthetic/benthos_fixed_locs_obs.rds"
      ))
      benthos_fixed_locs_obs <- benthos_fixed_locs_obs |>
        left_join(
          benthos_reefs_sf |>
            st_drop_geometry() |>
            dplyr::select(Year, Reef, CYC, DHW, OTHER) |>
            group_by(Year, Reef) |>
            summarise(across(c(CYC, DHW, OTHER), mean)),
          by = c("Year", "Reef")
        )
`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
benthos_fixed_locs_obs 
# A tibble: 3,000 × 13
   Reef    Longitude Latitude Site  Transect  Year   HCC    SC    MA
   <chr>       <dbl>    <dbl> <chr> <chr>    <int> <dbl> <dbl> <dbl>
 1 Reef103      3.66    -19.5 S1    T1           1  18.6  3.99  54.9
 2 Reef103      3.66    -19.5 S1    T1           2  19.0  4.32  50.5
 3 Reef103      3.66    -19.5 S1    T1           3  19.0  4.18  48.7
 4 Reef103      3.66    -19.5 S1    T1           4  21.6  4.81  50.2
 5 Reef103      3.66    -19.5 S1    T1           5  25.4  5.20  49.0
 6 Reef103      3.66    -19.5 S1    T1           6  21.2  5.47  46.3
 7 Reef103      3.66    -19.5 S1    T1           7  22.2  5.26  45.6
 8 Reef103      3.66    -19.5 S1    T1           8  23.2  5.18  46.1
 9 Reef103      3.66    -19.5 S1    T1           9  19.7  4.43  47.2
10 Reef103      3.66    -19.5 S1    T1          10  17.4  4.09  54.4
# ℹ 2,990 more rows
# ℹ 4 more variables: Date <dttm>, CYC <dbl>, DHW <dbl>, OTHER <dbl>
Show the code
      benthos_fixed_locs_obs <- benthos_fixed_locs_obs |>
        mutate(
          fYear = as.factor(Year),
          Reef = as.factor(Reef),
          Site = interaction(Reef, Site),
          Transect = interaction(Site, Transect),
          cover = HCC/100
        )
      print(benthos_fixed_locs_obs)
      ## Raw summary means
      all_sampled_sum <-
        benthos_fixed_locs_obs |>
        group_by(fYear) |>
        summarise(
          mean_mean_response = mean(cover),
          median_median_response = median(cover),
          mean_sd = sd(cover),
          mean_lower_lower = mean_mean_response - 1.96 * mean_sd,
          mean_upper_upper = mean_mean_response + 1.96 * mean_sd,
          median_lower_lower = quantile(cover, 0.025),
          median_upper_upper = quantile(cover, 0.975)
        ) |>
        dplyr::select(-mean_sd) |>
        pivot_longer(
          cols = c(
            mean_mean_response, median_median_response, mean_lower_lower,
            mean_upper_upper, median_lower_lower, median_upper_upper
          ),
          names_to = c("type", "variable", "stat"),
          names_sep = "_",
          values_to = "values"
        ) |>
        mutate(type = paste("all_sampled_", type)) |>
        dplyr::select(-variable) |>
        pivot_wider(
          names_from = stat,
          values_from = values
        ) |>
        mutate(Year = as.numeric(as.character(fYear)))
      print(all_sampled_sum)
Show the code
      g <- all_sampled_sum |>
        ggplot(aes(x = Year, y = response, color = type, fill = type)) +
        geom_line() +
        geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, color = NA) +
        scale_y_continuous(
          name = "Hard Coral Cover (%)",
          limits = c(0, 1),
          labels = scales::percent_format(accuracy = 1)
        ) +
        theme_bw()
      ggsave(
        filename = paste0(
          fig_path, "R_full_simple_raw_means_plot.png"
        ),
        g,
        width = 8, height = 6, dpi = 72
      )

These data represent the fixed 25 sites, however some of the reefs have a temporal gap between years 2 and 6.

        benthos_fixed_locs_obs_3 <- readRDS(
          file = paste0(
            data_path,
            "synthetic/benthos_fixed_locs_obs_3.rds"
          )
        )
        benthos_fixed_locs_obs_3 <- benthos_fixed_locs_obs_3 |>
          left_join(
            benthos_reefs_sf |>
              st_drop_geometry() |>
              dplyr::select(Year, Reef, CYC, DHW, OTHER) |>
              group_by(Year, Reef) |>
              summarise(across(c(CYC, DHW, OTHER), mean)),
            by = c("Year", "Reef")
          )
`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
benthos_fixed_locs_obs_3 
# A tibble: 2,850 × 13
   Reef    Longitude Latitude Site  Transect  Year   HCC    SC    MA
   <chr>       <dbl>    <dbl> <chr> <chr>    <int> <dbl> <dbl> <dbl>
 1 Reef103      3.66    -19.5 S1    T1           1  18.6  3.99  54.9
 2 Reef103      3.66    -19.5 S1    T1           2  19.0  4.32  50.5
 3 Reef103      3.66    -19.5 S1    T1           3  19.0  4.18  48.7
 4 Reef103      3.66    -19.5 S1    T1           4  21.6  4.81  50.2
 5 Reef103      3.66    -19.5 S1    T1           5  25.4  5.20  49.0
 6 Reef103      3.66    -19.5 S1    T1           6  21.2  5.47  46.3
 7 Reef103      3.66    -19.5 S1    T1           7  22.2  5.26  45.6
 8 Reef103      3.66    -19.5 S1    T1           8  23.2  5.18  46.1
 9 Reef103      3.66    -19.5 S1    T1           9  19.7  4.43  47.2
10 Reef103      3.66    -19.5 S1    T1          10  17.4  4.09  54.4
# ℹ 2,840 more rows
# ℹ 4 more variables: Date <dttm>, CYC <dbl>, DHW <dbl>, OTHER <dbl>
Show the code
      g <- benthos_fixed_locs_obs_3 |>
        dplyr::select(-MA, -SC) |>
        pivot_wider(
          id_cols = c(Reef, Longitude, Latitude, Site, Transect),
          names_from = Year,
          values_from = HCC
        ) |>
        pivot_longer(
          cols = -c(Reef, Longitude, Latitude, Site, Transect),
          names_to = "Year",
          values_to = "HCC"
        ) |>
        mutate(Year = as.numeric(Year)) |>
        ggplot() +
        geom_line(aes(
          y = HCC, x = Year, colour = Site,
          group = interaction(Site, Transect)
        )) +
        facet_wrap(~Reef) +
        theme_bw()

      ggsave(
        filename = paste0(
          fig_path, "R_sampled_reefs_3_plot.png"
        ),
        g,
        width = 8, height = 6, dpi = 72
      )

These data represent the fixed 25 sites, however all of the reefs have a temporal gap between years 2 and 6.

        benthos_fixed_locs_obs_4 <- readRDS(
          file = paste0(
            data_path,
            "synthetic/benthos_fixed_locs_obs_4.rds"
          )
        )
        benthos_fixed_locs_obs_4 <- benthos_fixed_locs_obs_4 |>
          left_join(
            benthos_reefs_sf |>
              st_drop_geometry() |>
              dplyr::select(Year, Reef, CYC, DHW, OTHER) |>
              group_by(Year, Reef) |>
              summarise(across(c(CYC, DHW, OTHER), mean)),
            by = c("Year", "Reef")
          )
`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
benthos_fixed_locs_obs_4 
# A tibble: 2,250 × 13
   Reef    Longitude Latitude Site  Transect  Year   HCC    SC    MA
   <chr>       <dbl>    <dbl> <chr> <chr>    <int> <dbl> <dbl> <dbl>
 1 Reef103      3.66    -19.5 S1    T1           1  18.6  3.99  54.9
 2 Reef103      3.66    -19.5 S1    T1           2  19.0  4.32  50.5
 3 Reef103      3.66    -19.5 S1    T1           6  21.2  5.47  46.3
 4 Reef103      3.66    -19.5 S1    T1           7  22.2  5.26  45.6
 5 Reef103      3.66    -19.5 S1    T1           8  23.2  5.18  46.1
 6 Reef103      3.66    -19.5 S1    T1           9  19.7  4.43  47.2
 7 Reef103      3.66    -19.5 S1    T1          10  17.4  4.09  54.4
 8 Reef103      3.66    -19.5 S1    T1          11  18.5  4.17  53.2
 9 Reef103      3.66    -19.5 S1    T1          12  17.1  4.13  49.1
10 Reef103      3.66    -19.5 S1    T2           1  20.6  3.88  48.2
# ℹ 2,240 more rows
# ℹ 4 more variables: Date <dttm>, CYC <dbl>, DHW <dbl>, OTHER <dbl>
Show the code
      yrs <- benthos_fixed_locs_obs_4 |>
        pull(Year) |>
        full_seq(period = 1)
      g <- benthos_fixed_locs_obs_4 |>
        complete(Year = yrs, nesting(Reef, Longitude, Latitude, Site, Transect)) |>
        ggplot() +
        geom_line(aes(
          y = HCC, x = Year, colour = Site,
          group = interaction(Site, Transect)
        )) +
        facet_wrap(~Reef) +
        theme_bw()

      ggsave(
        filename = paste0(
          fig_path, "R_sampled_reefs_4_plot.png"
        ),
        g,
        width = 8, height = 6, dpi = 72
      )

4 Data preparations

Perform the following data preparation steps:

  • specifically declare any categorical variables (as factors in R, categories in python)
  • ensure that all levels of hierarchical (varying) effects have unique IDs
  • express cover as a proportion out of 1 as is necessary for modelling against a beta distribution
  • create a prediction grid containing all years in the monitoring range
      benthos_fixed_locs_obs_0 <- benthos_fixed_locs_obs |>
        mutate(
          fYear = as.factor(Year),
          Reef = as.factor(Reef),
          Site = interaction(Reef, Site),
          Transect = interaction(Site, Transect),
          cover = HCC/100
        )
      newdata_0 <-
        benthos_fixed_locs_obs_0 |>
        tidyr::expand(fYear)
      benthos_fixed_locs_obs_3 <- benthos_fixed_locs_obs_3 |>
        mutate(
          fYear = as.factor(Year),
          Reef = as.factor(Reef),
          Site = interaction(Reef, Site),
          Transect = interaction(Site, Transect),
          cover = HCC/100
        )
      newdata_3 <-
        benthos_fixed_locs_obs_3 |>
        tidyr::expand(fYear)
      benthos_fixed_locs_obs_4 <- benthos_fixed_locs_obs_4 |>
        mutate(
          fYear = as.factor(Year),
          Reef = as.factor(Reef),
          Site = interaction(Reef, Site),
          Transect = interaction(Site, Transect),
          cover = HCC/100
        )
      newdata_4 <-
        benthos_fixed_locs_obs_4 |>
        tidyr::expand(fYear)