INTRO

This is analysis of a survivorship study set up and run by Maddy Bernstein (notebook entry). The goal of the study was to assess the survivorship of 9 different families of M.gigas oysters when exposed to heat stress at 36^oC.

MATERIALS & METHODS

Juvenile M.gigas oysters were distributed across nine 12-well plates and submerged in 4mL of Instant Ocean (~15^oC at beginning of experiment). Plates were incubated at 36^oC and periodically assessed for mortalities over the course of 52hrs.

The post below was knitted from 00.00-mgig-heat-survivorship.Rmd (GitHub).

RESULTS

Overall mortality was high: 81 of 99 oysters (82%) died during the experiment, with a median survival time of 48 hours across all families.

Survival differed significantly among families (log-rank test: χ² = 21.1, df = 8, p = 0.007). Families 1 and 7 showed the highest mortality (100%; 11/11 deaths each). Family 10 had the lowest median survival time (29 hrs), indicating the fastest rate of mortality. Families 8 and 9 were the most resilient, with the fewest deaths (7/11 each) and median survival times of 50 and 48 hours, respectively.

Despite the significant global log-rank result, pairwise comparisons between all family pairs showed no significant differences after Benjamini-Hochberg correction (all adjusted p > 0.05), suggesting the overall significance is driven by the cumulative pattern across families rather than any single strong pairwise contrast.

1 BACKGROUND

This is a Kaplan-Meier survival analysis of 36^oC heat stress of Magallana gigas oysters, conducted on 20260609 (GitHub repo) by Maddy Bernstein. It compares 9 different families bred/selected by the USDA.

Oysters were distributed across nine 12-well plates and submerged in 4mL of Instant Ocean (~15^oC at beginning of experiment). Plates were incubated at 36^oC and periodically assessed for mortalities over the course of 52hrs.

1.1 SETUP

1.1.1 Load packages

library(readr)
library(dplyr)
library(survival)
library(survminer)
library(ggplot2)

knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = TRUE,        # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  comment = "",         # Prevents appending '##' to beginning of lines in code output
  results = 'hold',    # Holds output so it's all printed together after code chunk
  fig.path = "00.00-mgig-heat-survivorship-20260609/"
)

1.1.2 Read in and view raw data

survivorship_raw <- read_csv(
    "../data/20260609-36C-USDA-families/survivorship.csv",
    show_col_types = FALSE
)

# Preview newly created object
cat("\n=== survivorship_raw: str() ===\n")
str(survivorship_raw)

=== survivorship_raw: str() ===
spc_tbl_ [756 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ plate_ID         : chr [1:756] "plate-A" "plate-A" "plate-A" "plate-A" ...
 $ plate_well       : chr [1:756] "A01" "A02" "A03" "A04" ...
 $ individual_id    : num [1:756] 1 2 3 4 5 6 7 8 9 10 ...
 $ familly_id.group : chr [1:756] "7" "9" "5" "6" ...
 $ is_blank         : logi [1:756] FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ timepoint_count  : num [1:756] 0 0 0 0 0 0 0 0 0 0 ...
 $ timepoint_hrs    : num [1:756] 0 0 0 0 0 0 0 0 0 0 ...
 $ alive.measurement: logi [1:756] TRUE TRUE TRUE TRUE TRUE TRUE ...
 $ date             : num [1:756] 6092026 6092026 6092026 6092026 6092026 ...
 $ time             : 'hms' num [1:756] 12:35:00 12:35:00 12:35:00 12:35:00 ...
  ..- attr(*, "units")= chr "secs"
 $ notes            : logi [1:756] NA NA NA NA NA NA ...
 - attr(*, "spec")=
  .. cols(
  ..   plate_ID = col_character(),
  ..   plate_well = col_character(),
  ..   individual_id = col_double(),
  ..   familly_id.group = col_character(),
  ..   is_blank = col_logical(),
  ..   timepoint_count = col_double(),
  ..   timepoint_hrs = col_double(),
  ..   alive.measurement = col_logical(),
  ..   date = col_double(),
  ..   time = col_time(format = ""),
  ..   notes = col_logical()
  .. )
 - attr(*, "problems")=<pointer: 0x63ffb62b0a10>

1.2 ANALYSIS

1.2.1 Clean and standardize columns used for survival analysis

survivorship_clean <- survivorship_raw %>%
    mutate(
        individual_id = as.character(individual_id),
        family_group = as.character(`familly_id.group`),
        timepoint_hrs = as.numeric(timepoint_hrs),
        alive_chr = toupper(trimws(as.character(`alive.measurement`))),
        alive = case_when(
            alive_chr %in% c("TRUE", "T", "1", "YES", "Y") ~ TRUE,
            alive_chr %in% c("FALSE", "F", "0", "NO", "N") ~ FALSE,
            TRUE ~ NA
        )
    ) %>%
    select(
        individual_id,
        family_group,
        timepoint_count,
        timepoint_hrs,
        alive,
        date,
        time,
        everything()
    )

# Preview newly created object
cat("\n=== survivorship_clean: str() ===\n")
str(survivorship_clean)
cat("\n=== survivorship_clean: summary(alive) ===\n")
summary(survivorship_clean$alive)

=== survivorship_clean: str() ===
tibble [756 × 14] (S3: tbl_df/tbl/data.frame)
 $ individual_id    : chr [1:756] "1" "2" "3" "4" ...
 $ family_group     : chr [1:756] "7" "9" "5" "6" ...
 $ timepoint_count  : num [1:756] 0 0 0 0 0 0 0 0 0 0 ...
 $ timepoint_hrs    : num [1:756] 0 0 0 0 0 0 0 0 0 0 ...
 $ alive            : logi [1:756] TRUE TRUE TRUE TRUE TRUE TRUE ...
 $ date             : num [1:756] 6092026 6092026 6092026 6092026 6092026 ...
 $ time             : 'hms' num [1:756] 12:35:00 12:35:00 12:35:00 12:35:00 ...
  ..- attr(*, "units")= chr "secs"
 $ plate_ID         : chr [1:756] "plate-A" "plate-A" "plate-A" "plate-A" ...
 $ plate_well       : chr [1:756] "A01" "A02" "A03" "A04" ...
 $ familly_id.group : chr [1:756] "7" "9" "5" "6" ...
 $ is_blank         : logi [1:756] FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ alive.measurement: logi [1:756] TRUE TRUE TRUE TRUE TRUE TRUE ...
 $ notes            : logi [1:756] NA NA NA NA NA NA ...
 $ alive_chr        : chr [1:756] "TRUE" "TRUE" "TRUE" "TRUE" ...

=== survivorship_clean: summary(alive) ===
   Mode   FALSE    TRUE     NAs 
logical     265     428      63

1.2.2 Reduce repeated observations to one survival row per individual

individual_survival <- survivorship_clean %>%
    filter(!is.na(individual_id), !is.na(timepoint_hrs), !is.na(alive)) %>%
    arrange(individual_id, timepoint_hrs) %>%
    group_by(individual_id) %>%
    summarise(
        family_group = first(family_group[!is.na(family_group)]),
        event = if_else(any(!alive), 1L, 0L),
        time_to_event = {
            if (any(!alive)) {
                min(timepoint_hrs[!alive])
            } else {
                max(timepoint_hrs)
            }
        },
        n_observations = n(),
        .groups = "drop"
    )

# Preview newly created object
cat("\n=== individual_survival: str() ===\n")
str(individual_survival)
cat("\n=== individual_survival: summary(time_to_event) ===\n")
summary(individual_survival$time_to_event)
cat("\n=== individual_survival: table(event) ===\n")
table(individual_survival$event)

=== individual_survival: str() ===
tibble [99 × 5] (S3: tbl_df/tbl/data.frame)
 $ individual_id : chr [1:99] "1" "10" "11" "12" ...
 $ family_group  : chr [1:99] "7" "3" "7" "7" ...
 $ event         : int [1:99] 1 1 1 1 1 1 1 1 0 0 ...
 $ time_to_event : num [1:99] 48 50 48 48 48 48 48 26 52 52 ...
 $ n_observations: int [1:99] 7 7 7 7 7 7 7 7 7 7 ...

=== individual_survival: summary(time_to_event) ===
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  24.00   48.00   48.00   44.72   50.00   52.00 

=== individual_survival: table(event) ===

 0  1 
18 81

1.2.3 Create Surv object and fit Kaplan-Meier models

surv_object <- with(individual_survival, Surv(time = time_to_event, event = event))

km_fit_overall <- survfit(surv_object ~ 1, data = individual_survival)
km_fit_by_family <- survfit(surv_object ~ family_group, data = individual_survival)

# Preview newly created objects
cat("\n=== km_fit_overall ===\n")
print(km_fit_overall)
cat("\n=== km_fit_by_family ===\n")
print(km_fit_by_family)
cat("\n=== km_fit_overall: str() ===\n")
str(km_fit_overall)
cat("\n=== km_fit_by_family: str() ===\n")
str(km_fit_by_family)

=== km_fit_overall ===
Call: survfit(formula = surv_object ~ 1, data = individual_survival)

      n events median 0.95LCL 0.95UCL
[1,] 99     81     48      48      48

=== km_fit_by_family ===
Call: survfit(formula = surv_object ~ family_group, data = individual_survival)

                 n events median 0.95LCL 0.95UCL
family_group=1  11     11     48      48      NA
family_group=10 11     10     29      26      NA
family_group=3  11     10     48      48      NA
family_group=5  11      8     48      48      NA
family_group=6  11      9     48      48      NA
family_group=7  11     11     48      NA      NA
family_group=8  11      7     50      48      NA
family_group=9  11      7     48      48      NA
family_group=9b 11      8     48      48      NA

=== km_fit_overall: str() ===
List of 17
 $ n        : int 99
 $ time     : num [1:6] 24 26 29 48 50 52
 $ n.risk   : num [1:6] 99 92 86 79 34 20
 $ n.event  : num [1:6] 7 6 7 45 14 2
 $ n.censor : num [1:6] 0 0 0 0 0 18
 $ surv     : num [1:6] 0.929 0.869 0.798 0.343 0.202 ...
 $ std.err  : num [1:6] 0.0277 0.0391 0.0506 0.139 0.1997 ...
 $ cumhaz   : num [1:6] 0.0707 0.1359 0.2173 0.7869 1.1987 ...
 $ std.chaz : num [1:6] 0.0267 0.0377 0.0487 0.0979 0.1473 ...
 $ type     : chr "right"
 $ logse    : logi TRUE
 $ conf.int : num 0.95
 $ conf.type: chr "log"
 $ lower    : num [1:6] 0.88 0.805 0.723 0.262 0.137 ...
 $ upper    : num [1:6] 0.981 0.938 0.881 0.451 0.299 ...
 $ t0       : num 0
 $ call     : language survfit(formula = surv_object ~ 1, data = individual_survival)
 - attr(*, "class")= chr "survfit"

=== km_fit_by_family: str() ===
List of 18
 $ n        : int [1:9] 11 11 11 11 11 11 11 11 11
 $ time     : num [1:36] 24 29 48 50 52 24 26 29 48 52 ...
 $ n.risk   : num [1:36] 11 9 8 5 1 11 10 6 4 1 ...
 $ n.event  : num [1:36] 2 1 3 4 1 1 4 2 3 0 ...
 $ n.censor : num [1:36] 0 0 0 0 0 0 0 0 0 1 ...
 $ surv     : num [1:36] 0.8182 0.7273 0.4545 0.0909 0 ...
 $ std.err  : num [1:36] 0.142 0.185 0.33 0.953 Inf ...
 $ cumhaz   : num [1:36] 0.182 0.293 0.668 1.468 2.468 ...
 $ std.chaz : num [1:36] 0.129 0.17 0.275 0.486 1.112 ...
 $ strata   : Named int [1:9] 5 5 5 4 5 2 4 2 4
  ..- attr(*, "names")= chr [1:9] "family_group=1" "family_group=10" "family_group=3" "family_group=5" ...
 $ type     : chr "right"
 $ logse    : logi TRUE
 $ conf.int : num 0.95
 $ conf.type: chr "log"
 $ lower    : num [1:36] 0.619 0.506 0.238 0.014 NA ...
 $ upper    : num [1:36] 1 1 0.868 0.589 NA ...
 $ t0       : num 0
 $ call     : language survfit(formula = surv_object ~ family_group, data = individual_survival)
 - attr(*, "class")= chr "survfit"

1.2.4 Perform log-rank test to compare survival between families

logrank_test <- survdiff(surv_object ~ family_group, data = individual_survival)

# Display test results
cat("\n=== Log-rank test: survdiff() output ===\n")
print(logrank_test)

# Extract and report key statistics
cat("\n=== Log-Rank Test Summary ===\n")
cat("Chi-square statistic:", logrank_test$chisq, "\n")
cat("Degrees of freedom:", length(logrank_test$n) - 1, "\n")
cat("p-value:", 1 - pchisq(logrank_test$chisq, df = length(logrank_test$n) - 1), "\n")
cat("\nInterpretation:\n")
p_val <- 1 - pchisq(logrank_test$chisq, df = length(logrank_test$n) - 1)
if (p_val < 0.05) {
  cat("p < 0.05: Family groups show SIGNIFICANTLY DIFFERENT survival (reject null hypothesis)\n")
} else {
  cat("p >= 0.05: No significant difference in survival detected between family groups\n")
}

=== Log-rank test: survdiff() output ===
Call:
survdiff(formula = surv_object ~ family_group, data = individual_survival)

                 N Observed Expected (O-E)^2/E (O-E)^2/V
family_group=1  11       11     8.81   0.54267    1.0179
family_group=10 11       10     4.71   5.94644    9.6692
family_group=3  11       10     8.55   0.24667    0.4561
family_group=5  11        8    10.45   0.57256    1.0981
family_group=6  11        9     8.72   0.00871    0.0164
family_group=7  11       11     7.22   1.97389    3.8135
family_group=8  11        7    10.88   1.38128    2.6734
family_group=9  11        7    11.22   1.58425    3.0942
family_group=9b 11        8    10.45   0.57256    1.0981

 Chisq= 21.1  on 8 degrees of freedom, p= 0.007 

=== Log-Rank Test Summary ===
Chi-square statistic: 21.122 
Degrees of freedom: 8 
p-value: 0.006830309 

Interpretation:
p < 0.05: Family groups show SIGNIFICANTLY DIFFERENT survival (reject null hypothesis)

1.2.5 Pairwise log-rank tests with BH correction

pairwise_results <- pairwise_survdiff(
    Surv(time_to_event, event) ~ family_group,
    data = individual_survival,
    p.adjust.method = "BH"
)

cat("\n=== Pairwise log-rank test (BH-adjusted p-values) ===\n")
print(pairwise_results)

# Extract and display only significant pairs
p_mat <- pairwise_results$p.value
sig_pairs <- which(p_mat <= 0.05, arr.ind = TRUE)

cat("\n=== Significant pairwise comparisons (BH-adjusted p <= 0.05) ===\n")
if (nrow(sig_pairs) == 0) {
    cat("No significant pairwise differences found.\n")
} else {
    sig_df <- data.frame(
        group1      = rownames(p_mat)[sig_pairs[, 1]],
        group2      = colnames(p_mat)[sig_pairs[, 2]],
        p_adjusted  = p_mat[sig_pairs]
    )
    print(sig_df)
}

=== Pairwise log-rank test (BH-adjusted p-values) ===

    Pairwise comparisons using Log-Rank test 

data:  individual_survival and family_group 

   1     10    3     5     6     7     8     9    
10 0.383 -     -     -     -     -     -     -    
3  0.940 0.337 -     -     -     -     -     -    
5  0.369 0.099 0.383 -     -     -     -     -    
6  0.758 0.315 0.856 0.574 -     -     -     -    
7  0.337 0.372 0.383 0.112 0.383 -     -     -    
8  0.264 0.099 0.337 0.762 0.383 0.099 -     -    
9  0.167 0.099 0.264 0.694 0.369 0.099 0.940 -    
9b 0.369 0.099 0.383 1.000 0.574 0.112 0.762 0.694

P value adjustment method: BH 

=== Significant pairwise comparisons (BH-adjusted p <= 0.05) ===
No significant pairwise differences found.

1.2.6 Convert Kaplan-Meier fit to plotting data frames

km_overall_df <- bind_rows(
    data.frame(
        time = 0,
        surv = 1,
        n_risk = km_fit_overall$n,
        n_event = 0,
        n_censor = 0
    ),
    summary(km_fit_overall) %>%
        with(
            data.frame(
                time = time,
                surv = surv,
                n_risk = n.risk,
                n_event = n.event,
                n_censor = n.censor
            )
        )
) %>%
    arrange(time)

km_family_df <- bind_rows(
    data.frame(
        time = 0,
        surv = 1,
        n_risk = as.numeric(km_fit_by_family$n),
        n_event = 0,
        n_censor = 0,
        strata = names(km_fit_by_family$strata)
    ),
    summary(km_fit_by_family) %>%
        with(
            data.frame(
                time = time,
                surv = surv,
                n_risk = n.risk,
                n_event = n.event,
                n_censor = n.censor,
                strata = strata
            )
        )
) %>%
    mutate(family_group = sub("^family_group=", "", strata)) %>%
    arrange(family_group, time)

# Preview newly created objects
cat("\n=== km_overall_df: head() ===\n")
print(head(km_overall_df))
cat("\n=== km_overall_df: str() ===\n")
str(km_overall_df)

cat("\n=== km_family_df: head() ===\n")
print(head(km_family_df))
cat("\n=== km_family_df: str() ===\n")
str(km_family_df)

=== km_overall_df: head() ===
  time      surv n_risk n_event n_censor
1    0 1.0000000     99       0        0
2   24 0.9292929     99       7        0
3   26 0.8686869     92       6        0
4   29 0.7979798     86       7        0
5   48 0.3434343     79      45        0
6   50 0.2020202     34      14        0

=== km_overall_df: str() ===
'data.frame':   7 obs. of  5 variables:
 $ time    : num  0 24 26 29 48 50 52
 $ surv    : num  1 0.929 0.869 0.798 0.343 ...
 $ n_risk  : num  99 99 92 86 79 34 20
 $ n_event : num  0 7 6 7 45 14 2
 $ n_censor: num  0 0 0 0 0 0 18

=== km_family_df: head() ===
  time       surv n_risk n_event n_censor         strata family_group
1    0 1.00000000     11       0        0 family_group=1            1
2   24 0.81818182     11       2        0 family_group=1            1
3   29 0.72727273      9       1        0 family_group=1            1
4   48 0.45454545      8       3        0 family_group=1            1
5   50 0.09090909      5       4        0 family_group=1            1
6   52 0.00000000      1       1        0 family_group=1            1

=== km_family_df: str() ===
'data.frame':   39 obs. of  7 variables:
 $ time        : num  0 24 29 48 50 52 0 24 26 29 ...
 $ surv        : num  1 0.8182 0.7273 0.4545 0.0909 ...
 $ n_risk      : num  11 11 9 8 5 1 11 11 10 6 ...
 $ n_event     : num  0 2 1 3 4 1 0 1 4 2 ...
 $ n_censor    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ strata      : chr  "family_group=1" "family_group=1" "family_group=1" "family_group=1" ...
 $ family_group: chr  "1" "1" "1" "1" ...

1.3 PLOTS

1.3.1 Plot Kaplan-Meier curves

1.3.1.1 Families

family_levels <- local({
    lvls <- unique(km_family_df$family_group)
    lvls[order(readr::parse_number(lvls), lvls)]
})

ggplot(
    km_family_df %>% mutate(family_group = factor(family_group, levels = family_levels)),
    aes(x = time, y = surv, color = family_group)
) +
    geom_step(linewidth = 1.0) +
    labs(
        title = "Kaplan-Meier Survival Curves by Family Group",
        x = "Time (hours)",
        y = "Survival probability",
        color = "Family group"
    ) +
    ylim(0, 1) +
    theme_minimal(base_size = 12)