Model comparison via likelihood ratio tests (LRTs)

Setup and load required libraries

# Set root directory
here::i_am("scripts/DE_model_comparison.Rmd")

# Load libraries 
suppressPackageStartupMessages({
  library(DESeq2)
  library(tidyverse)
  library(dplyr)
  library(UpSetR)
  library(here)
  library(ggplot2)
  library(variancePartition)
  library(BiocParallel)
  library(limma)
})

Data Import and Preprocessing

# Load transcript abundance matrix 
abundance_matrix <- read.table(here::here("data", "DE", "abundance_matrix.txt"), header = TRUE, row.names = 1, check.names = FALSE)
head(abundance_matrix)
##                         39_Melops1040 38_Melops1037 37_Melops1036 36_Melops1035
## TRINITY_GG_716_c838_g1              0             2             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             0             7             0             3
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              1             2             1             0
## TRINITY_GG_606_c418_g2              1             1             1             1
##                         35_Melops1033 34_Melops1025 33_Melops1024 32_Melops1023
## TRINITY_GG_716_c838_g1              0             0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             0             0             1             0
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              0             0             0             0
## TRINITY_GG_606_c418_g2              3             6             5             0
##                         31_Melops1022 30_Melops1021 29_Melops1211 28_Melops1210
## TRINITY_GG_716_c838_g1              0             0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             2             1             2             0
## TRINITY_GG_1586_c745_g1             0             0             0             3
## TRINITY_GG_560_c228_g1              0             0             4             0
## TRINITY_GG_606_c418_g2              2             2             0             1
##                         27_Melops1209 26_Melops1206 25_Melops1205 24_Melops1082
## TRINITY_GG_716_c838_g1              0             0             0             1
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             0             0             0             0
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              0             0             0             0
## TRINITY_GG_606_c418_g2              0             3             5             0
##                         23_Melops1065 22_Melops1061 21_Melops1058 20_Melops1046
## TRINITY_GG_716_c838_g1              0             0             0             1
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             0             0             0             0
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              0             0             0             0
## TRINITY_GG_606_c418_g2              4             2             1             1
##                         19_Melops1131 18_Melops1075 17_Melops1072 16_Melops1017
## TRINITY_GG_716_c838_g1              0             0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             0             0             0             0
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              1             1             0             1
## TRINITY_GG_606_c418_g2              2             1             0             0
##                         15_Melops1014 14_Melops1202 13_Melops1200 12_Melops1197
## TRINITY_GG_716_c838_g1              0             0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             1
## TRINITY_GG_1463_c526_g6             0             0             0             2
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              2             0             0             2
## TRINITY_GG_606_c418_g2              2             3             4             0
##                         11_Melops1194 10_Melops1192 09_Melops1158 08_Melops1126
## TRINITY_GG_716_c838_g1              0             0             0             1
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             1             0             2             1
## TRINITY_GG_1586_c745_g1             0             0             3             0
## TRINITY_GG_560_c228_g1              0             1             4             0
## TRINITY_GG_606_c418_g2              3             5             5             2
##                         07_Melops1015 06_Melops1010 05_Melops1259 04_Melops1239
## TRINITY_GG_716_c838_g1              0             0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0             0
## TRINITY_GG_1463_c526_g6             1             0             0             0
## TRINITY_GG_1586_c745_g1             0             0             0             0
## TRINITY_GG_560_c228_g1              0             0             0             0
## TRINITY_GG_606_c418_g2              0             4             0             4
##                         03_Melops1218 02_Melops1195 01_Melops1188
## TRINITY_GG_716_c838_g1              0             0             0
## TRINITY_GG_1114_c422_g3             0             0             0
## TRINITY_GG_1463_c526_g6             0             0             0
## TRINITY_GG_1586_c745_g1             0             0             0
## TRINITY_GG_560_c228_g1              0             0             0
## TRINITY_GG_606_c418_g2              3             0             2
# Load sample metadata
sample_info <- read.table(here::here("data", "DE", "samples.txt"), header = FALSE, col.names = c("condition", "sample"))

# Parse temperature and origin from condition
sample_info <- sample_info %>%
  mutate(
    origin = substr(condition, 1, 1),
    temperature = substr(condition, 2, nchar(condition))
  ) %>%
  mutate(
    origin = recode(origin, "W" = "West", "H" = "Hybrid", "S" = "South"),
    temperature = factor(temperature, levels = c("12", "15", "18")),
    origin = factor(origin)
  )
sample_info
##    condition        sample origin temperature
## 1        W18 39_Melops1040   West          18
## 2        W18 38_Melops1037   West          18
## 3        W18 37_Melops1036   West          18
## 4        W18 36_Melops1035   West          18
## 5        W18 35_Melops1033   West          18
## 6        W15 34_Melops1025   West          15
## 7        W15 33_Melops1024   West          15
## 8        W15 32_Melops1023   West          15
## 9        W15 31_Melops1022   West          15
## 10       W15 30_Melops1021   West          15
## 11       W12 29_Melops1211   West          12
## 12       W12 28_Melops1210   West          12
## 13       W12 27_Melops1209   West          12
## 14       W12 26_Melops1206   West          12
## 15       W12 25_Melops1205   West          12
## 16       H18 24_Melops1082 Hybrid          18
## 17       H18 23_Melops1065 Hybrid          18
## 18       H18 22_Melops1061 Hybrid          18
## 19       H18 21_Melops1058 Hybrid          18
## 20       H18 20_Melops1046 Hybrid          18
## 21       H15 19_Melops1131 Hybrid          15
## 22       H15 18_Melops1075 Hybrid          15
## 23       H15 17_Melops1072 Hybrid          15
## 24       H15 16_Melops1017 Hybrid          15
## 25       H15 15_Melops1014 Hybrid          15
## 26       H12 14_Melops1202 Hybrid          12
## 27       H12 13_Melops1200 Hybrid          12
## 28       H12 12_Melops1197 Hybrid          12
## 29       H12 11_Melops1194 Hybrid          12
## 30       H12 10_Melops1192 Hybrid          12
## 31       S18 09_Melops1158  South          18
## 32       S18 08_Melops1126  South          18
## 33       S15 07_Melops1015  South          15
## 34       S15 06_Melops1010  South          15
## 35       S12 05_Melops1259  South          12
## 36       S12 04_Melops1239  South          12
## 37       S12 03_Melops1218  South          12
## 38       S12 02_Melops1195  South          12
## 39       S12 01_Melops1188  South          12

Prepare abundance matrix

# Match column names 
colnames(abundance_matrix) <- sample_info$sample

# Ensure non-negative integer count
abundance_matrix <- round(pmax(abundance_matrix, 0))  

# Keep transcripts with total count ≥ 10 across all samples
keep_transcripts <- rowSums(abundance_matrix) >= 10
abundance_matrix <- abundance_matrix[keep_transcripts, ]

Create DESeq2 datasets for each model

# Define condition model
dds_condition <- DESeqDataSetFromMatrix(countData = abundance_matrix, colData = sample_info, design = ~ condition)

# Define additive model
dds_additive <- DESeqDataSetFromMatrix(countData = abundance_matrix, colData = sample_info, design = ~ temperature + origin)

# Define interaction model
dds_interaction <- DESeqDataSetFromMatrix(countData = abundance_matrix, colData = sample_info, design = ~ temperature * origin)

Run DESeq2 for full model

dds_interaction <- DESeq(dds_interaction)

Compare interaction vs additive model

dds_lrt_inter_vs_add <- DESeq(dds_interaction, test = "LRT", reduced = ~ temperature + origin)
res_inter_vs_add <- results(dds_lrt_inter_vs_add, alpha = 0.05)
summary(res_inter_vs_add)
## 
## out of 168455 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 162, 0.096%
## LFC < 0 (down)     : 579, 0.34%
## outliers [1]       : 116, 0.069%
## low counts [2]     : 140332, 83%
## (mean count < 18)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Extract transcripts with significant interaction effects for Tier 2. These are candidates for origin-specific temperature responses.

# Filter transcripts with adjusted p-value < 0.05 from the LRT
interaction_transcripts <- rownames(res_inter_vs_add)[which(res_inter_vs_add$padj < 0.05)]

# Save to file for use in additive model DE script (Tier 2 overlap)
write.csv(
  data.frame(transcript_id = interaction_transcripts),
  here::here("results", "DE", "interaction_effects", "significant_interaction_transcripts.csv"),
  row.names = FALSE
)

# Print number of transcripts
cat("Number of transcripts with significant interaction effects:", length(interaction_transcripts), "\n")
## Number of transcripts with significant interaction effects: 741

Compare condition vs null model

dds_condition <- DESeq(dds_condition)
dds_lrt_cond_vs_null <- DESeq(dds_condition, test = "LRT", reduced = ~ 1)
res_cond_vs_null <- results(dds_lrt_cond_vs_null, alpha = 0.05)
summary(res_cond_vs_null)
## 
## out of 168455 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 9449, 5.6%
## LFC < 0 (down)     : 8567, 5.1%
## outliers [1]       : 116, 0.069%
## low counts [2]     : 22862, 14%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Compare additive vs null model

dds_additive <- DESeq(dds_additive)
dds_lrt_add_vs_null <- DESeq(dds_additive, test = "LRT", reduced = ~ 1)
res_add_vs_null <- results(dds_lrt_add_vs_null, alpha = 0.05)
summary(res_add_vs_null)
## 
## out of 168455 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 10969, 6.5%
## LFC < 0 (down)     : 11903, 7.1%
## outliers [1]       : 322, 0.19%
## low counts [2]     : 88071, 52%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Compare interaction vs null model

dds_lrt_inter_vs_null <- DESeq(dds_interaction, test = "LRT", reduced = ~ 1)
res_inter_vs_null <- results(dds_lrt_inter_vs_null, alpha = 0.05)
summary(res_inter_vs_null)
## 
## out of 168455 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 8829, 5.2%
## LFC < 0 (down)     : 9187, 5.5%
## outliers [1]       : 116, 0.069%
## low counts [2]     : 22862, 14%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Visualize results

# Create list of significant transcripts from each comparison
listInput <- list(
  Inter_vs_Add = rownames(res_inter_vs_add)[which(res_inter_vs_add$padj < 0.05)],
  Cond_vs_Null = rownames(res_cond_vs_null)[which(res_cond_vs_null$padj < 0.05)],
  Add_vs_Null = rownames(res_add_vs_null)[which(res_add_vs_null$padj < 0.05)],
  Inter_vs_Null = rownames(res_inter_vs_null)[which(res_inter_vs_null$padj < 0.05)]
)

# UpSet plot
upset(fromList(listInput), order.by = "freq", main.bar.color = "steelblue")

# Intersection table
all_transcripts <- unique(unlist(listInput))
presence_df <- tibble(
  transcript = all_transcripts,
  Inter_vs_Add = all_transcripts %in% listInput$Inter_vs_Add,
  Cond_vs_Null = all_transcripts %in% listInput$Cond_vs_Null,
  Add_vs_Null = all_transcripts %in% listInput$Add_vs_Null,
  Inter_vs_Null = all_transcripts %in% listInput$Inter_vs_Null
)

summary_table <- presence_df %>%
  group_by(Inter_vs_Add, Cond_vs_Null, Add_vs_Null, Inter_vs_Null) %>%
  summarise(TranscriptCount = n(), .groups = "drop") %>%
  arrange(desc(TranscriptCount))

print(summary_table)
## # A tibble: 6 × 5
##   Inter_vs_Add Cond_vs_Null Add_vs_Null Inter_vs_Null TranscriptCount
##   <lgl>        <lgl>        <lgl>       <lgl>                   <int>
## 1 FALSE        TRUE         TRUE        TRUE                    16317
## 2 FALSE        FALSE        TRUE        FALSE                    5883
## 3 FALSE        TRUE         FALSE       TRUE                      962
## 4 TRUE         TRUE         TRUE        TRUE                      672
## 5 TRUE         TRUE         FALSE       TRUE                       65
## 6 TRUE         FALSE        FALSE       FALSE                       4
# Totals for each model
totals_table <- tibble(
  Model = c("Interaction vs Additive", "Condition vs Null", "Additive vs Null", "Interaction vs Null"),
  SignificantTranscripts = c(
    length(listInput$Inter_vs_Add),
    length(listInput$Cond_vs_Null),
    length(listInput$Add_vs_Null),
    length(listInput$Inter_vs_Null)
  )
)

print(totals_table)
## # A tibble: 4 × 2
##   Model                   SignificantTranscripts
##   <chr>                                    <int>
## 1 Interaction vs Additive                    741
## 2 Condition vs Null                        18016
## 3 Additive vs Null                         22872
## 4 Interaction vs Null                      18016

Variance partition

Setup and shared data

# Register parallel backend 
register(SnowParam(4))  

# Use variance-stabilized counts 
vsd <- vst(dds_interaction, blind = FALSE)
expr_matrix <- assay(vsd)
metadata <- as.data.frame(colData(vsd))

# Filter out low-variance transcripts
expr_filtered <- expr_matrix[rowVars(expr_matrix) > 0.1, ]

# Ensure factors
metadata$origin <- factor(metadata$origin)
metadata$temperature <- factor(metadata$temperature)

# Set the rownames of sample_info to match the expression matrix for variance partition on the condition model
rownames(sample_info) <- sample_info$sample

Check number of filtered transcripts

# Number of transcripts before filtering
n_before <- nrow(expr_matrix)

# Number of transcripts after filtering
n_after <- nrow(expr_filtered)

# Number of transcripts removed
n_removed <- n_before - n_after

cat("Transcripts before filtering:", n_before, "\n")
## Transcripts before filtering: 168455
cat("Transcripts after filtering:", n_after, "\n")
## Transcripts after filtering: 22177
cat("Transcripts removed:", n_removed, "\n")
## Transcripts removed: 146278

Variance partition and plotting

# Run variance partitioning
form_condition <- ~ (1|condition)
form_additive <- ~ (1|origin) + (1|temperature)
form_interaction <- ~ (1|origin) + (1|temperature) + (1|origin:temperature)


varPart_condition <- fitExtractVarPartModel(expr_filtered, form_condition, sample_info)
varPart_additive <- fitExtractVarPartModel(expr_filtered, form_additive, metadata)
varPart_interaction <- fitExtractVarPartModel(expr_filtered, form_interaction, metadata)


# Plot 1: Condition model
plotVarPart(varPart_condition) +
  theme_minimal(base_size = 14) +
  ggtitle("Variance Partitioning - Condition Model")

# Plot 2: Additive model
plotVarPart(varPart_additive) +
  theme_minimal(base_size = 14) +
  ggtitle("Variance Partitioning - Additive Model")

# Plot 3: Interaction model
plotVarPart(varPart_interaction) +
  theme_minimal(base_size = 14) +
  ggtitle("Variance Partitioning - Interaction Model")

# Convert to data frames and add model labels
vp_add <- as.data.frame(varPart_additive) %>% mutate(Model = "Additive")
vp_int <- as.data.frame(varPart_interaction) %>% mutate(Model = "Interaction")
vp_cond <- as.data.frame(varPart_condition) %>% mutate(Model = "Condition")

# Combine and reshape
vp_combined <- bind_rows(vp_add, vp_int, vp_cond) %>%
  pivot_longer(cols = -Model, names_to = "Factor", values_to = "Variance") %>%
  filter(is.finite(Variance))

# Plot 4a: Boxplot comparison
ggplot(vp_combined, aes(x = Factor, y = Variance, fill = Model)) +
  geom_boxplot(outlier.size = 0.5) +
  theme_minimal(base_size = 14) +
  labs(title = "Variance Explained by Factor: All Models")

# Plot 4b: Violin plot comparison 
ggplot(vp_combined, aes(x = Factor, y = Variance, fill = Model)) +
  geom_violin(trim = FALSE, scale = "width") +
  theme_minimal(base_size = 14) +
  labs(title = "Variance Distribution by Factor: All Models")

MDS Plot: Sample Clustering

# Compute Euclidean distance matrix
dist_matrix <- dist(t(assay(vsd)))  # transpose so samples are rows

# Perform classical MDS
mds_coords <- cmdscale(dist_matrix, k = 2)
mds_df <- as.data.frame(mds_coords)
colnames(mds_df) <- c("MDS1", "MDS2")
mds_df$Sample <- colnames(vsd)
mds_df <- cbind(mds_df, as.data.frame(colData(vsd)))

# Plot MDS with color by Temperature and shape by Origin
ggplot(mds_df, aes(x = MDS1, y = MDS2, color = factor(temperature), shape = factor(origin))) +
  geom_point(size = 3) +
  labs(
    title = "MDS Plot of Trinity-derived Counts",
    color = "Temperature",
    shape = "Origin"
  ) +
  theme_minimal(base_size = 14)

# Save to file
ggsave(
  filename = here::here("results", "sample_clustering", "MDS", "MDS_plot.png"),
  width = 8, height = 6, dpi = 300
)

Summary

This script compares condition, additive, and interaction models using variance partitioning and LRTs to evaluate differential expression patterns in Symphodus melops. The condition model explained the highest proportion of variance, reflecting its flexibility in treating each origin-temperature combination as a distinct group. However, this structure does not share information across conditions and is prone to overfitting, especially with limited replication. In contrast, the additive model explained substantial variance, particularly for temperature, and identified the largest number of significantly differentially expressed transcripts. This is supported by the MDS plot, which shows that samples primarily cluster by temperature, with less separation by origin — reinforcing the additive model’s ability to capture the dominant biological signal. LRT comparing the interaction model to the additive model revealed that only 741 transcripts (~0.4%) showed significantly improved fit when including the interaction term. These transcripts were considered candidates for origin-dependent temperature responses and were extracted for downstream analysis. Based on these results, the additive model was selected for global differential expression analysis.

Session Info

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: Europe/Oslo
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] variancePartition_1.38.1    BiocParallel_1.42.2        
##  [3] limma_3.64.3                here_1.0.2                 
##  [5] UpSetR_1.4.0                lubridate_1.9.4            
##  [7] forcats_1.0.1               stringr_1.5.2              
##  [9] dplyr_1.1.4                 purrr_1.1.0                
## [11] readr_2.1.5                 tidyr_1.3.1                
## [13] tibble_3.3.0                ggplot2_4.0.0              
## [15] tidyverse_2.0.0             DESeq2_1.48.2              
## [17] SummarizedExperiment_1.38.1 Biobase_2.68.0             
## [19] MatrixGenerics_1.20.0       matrixStats_1.5.0          
## [21] GenomicRanges_1.60.0        GenomeInfoDb_1.44.3        
## [23] IRanges_2.42.0              S4Vectors_0.46.0           
## [25] BiocGenerics_0.54.0         generics_0.1.4             
## 
## loaded via a namespace (and not attached):
##  [1] Rdpack_2.6.4            bitops_1.0-9            gridExtra_2.3          
##  [4] rlang_1.1.6             magrittr_2.0.4          compiler_4.5.1         
##  [7] systemfonts_1.3.1       reshape2_1.4.4          vctrs_0.6.5            
## [10] pkgconfig_2.0.3         crayon_1.5.3            fastmap_1.2.0          
## [13] backports_1.5.0         XVector_0.48.0          labeling_0.4.3         
## [16] utf8_1.2.6              caTools_1.18.3          rmarkdown_2.30         
## [19] tzdb_0.5.0              UCSC.utils_1.4.0        nloptr_2.2.1           
## [22] ragg_1.5.0              xfun_0.53               cachem_1.1.0           
## [25] jsonlite_2.0.0          EnvStats_3.1.0          remaCor_0.0.20         
## [28] DelayedArray_0.34.1     broom_1.0.10            parallel_4.5.1         
## [31] R6_2.6.1                bslib_0.9.0             stringi_1.8.7          
## [34] RColorBrewer_1.1-3      boot_1.3-28             jquerylib_0.1.4        
## [37] numDeriv_2016.8-1.1     Rcpp_1.1.0              iterators_1.0.14       
## [40] knitr_1.50              Matrix_1.4-0            splines_4.5.1          
## [43] timechange_0.3.0        tidyselect_1.2.1        abind_1.4-8            
## [46] yaml_2.3.10             gplots_3.2.0            codetools_0.2-18       
## [49] lattice_0.20-45         lmerTest_3.1-3          plyr_1.8.9             
## [52] withr_3.0.2             S7_0.2.0                evaluate_1.0.5         
## [55] pillar_1.11.1           KernSmooth_2.23-20      reformulas_0.4.1       
## [58] rprojroot_2.1.1         hms_1.1.3               scales_1.4.0           
## [61] aod_1.3.3               minqa_1.2.8             gtools_3.9.5           
## [64] RhpcBLASctl_0.23-42     glue_1.8.0              tools_4.5.1            
## [67] fANCOVA_0.6-1           lme4_1.1-37             locfit_1.5-9.12        
## [70] mvtnorm_1.3-3           grid_4.5.1              rbibutils_2.3          
## [73] nlme_3.1-155            GenomeInfoDbData_1.2.14 cli_3.6.5              
## [76] textshaping_1.0.3       S4Arrays_1.8.1          corpcor_1.6.10         
## [79] gtable_0.3.6            sass_0.4.10             digest_0.6.37          
## [82] SparseArray_1.8.1       pbkrtest_0.5.5          farver_2.1.2           
## [85] htmltools_0.5.8.1       lifecycle_1.0.4         httr_1.4.7             
## [88] statmod_1.5.0           MASS_7.3-55