Data Import and Preprocessing
# Load abundance matrix and sample information
abundance_matrix <- read.table(here::here("data", "DE", "abundance_matrix.txt"), header = TRUE, row.names = 1)
head(abundance_matrix)
## X39_Melops1040 X38_Melops1037 X37_Melops1036
## TRINITY_GG_716_c838_g1 0 2 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 7 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 1 2 1
## TRINITY_GG_606_c418_g2 1 1 1
## X36_Melops1035 X35_Melops1033 X34_Melops1025
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 3 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 1 3 6
## X33_Melops1024 X32_Melops1023 X31_Melops1022
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 1 0 2
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 5 0 2
## X30_Melops1021 X29_Melops1211 X28_Melops1210
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 1 2 0
## TRINITY_GG_1586_c745_g1 0 0 3
## TRINITY_GG_560_c228_g1 0 4 0
## TRINITY_GG_606_c418_g2 2 0 1
## X27_Melops1209 X26_Melops1206 X25_Melops1205
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 0 3 5
## X24_Melops1082 X23_Melops1065 X22_Melops1061
## TRINITY_GG_716_c838_g1 1 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 0 4 2
## X21_Melops1058 X20_Melops1046 X19_Melops1131
## TRINITY_GG_716_c838_g1 0 1 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 1
## TRINITY_GG_606_c418_g2 1 1 2
## X18_Melops1075 X17_Melops1072 X16_Melops1017
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 1 0 1
## TRINITY_GG_606_c418_g2 1 0 0
## X15_Melops1014 X14_Melops1202 X13_Melops1200
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 2 0 0
## TRINITY_GG_606_c418_g2 2 3 4
## X12_Melops1197 X11_Melops1194 X10_Melops1192
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 1 0 0
## TRINITY_GG_1463_c526_g6 2 1 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 2 0 1
## TRINITY_GG_606_c418_g2 0 3 5
## X09_Melops1158 X08_Melops1126 X07_Melops1015
## TRINITY_GG_716_c838_g1 0 1 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 2 1 1
## TRINITY_GG_1586_c745_g1 3 0 0
## TRINITY_GG_560_c228_g1 4 0 0
## TRINITY_GG_606_c418_g2 5 2 0
## X06_Melops1010 X05_Melops1259 X04_Melops1239
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 4 0 4
## X03_Melops1218 X02_Melops1195 X01_Melops1188
## TRINITY_GG_716_c838_g1 0 0 0
## TRINITY_GG_1114_c422_g3 0 0 0
## TRINITY_GG_1463_c526_g6 0 0 0
## TRINITY_GG_1586_c745_g1 0 0 0
## TRINITY_GG_560_c228_g1 0 0 0
## TRINITY_GG_606_c418_g2 3 0 2
sample_info <- read.table(
here::here("data", "DE", "samples.txt"),
header = FALSE,
col.names = c("condition", "sample")
)
sample_info
## condition sample
## 1 W18 39_Melops1040
## 2 W18 38_Melops1037
## 3 W18 37_Melops1036
## 4 W18 36_Melops1035
## 5 W18 35_Melops1033
## 6 W15 34_Melops1025
## 7 W15 33_Melops1024
## 8 W15 32_Melops1023
## 9 W15 31_Melops1022
## 10 W15 30_Melops1021
## 11 W12 29_Melops1211
## 12 W12 28_Melops1210
## 13 W12 27_Melops1209
## 14 W12 26_Melops1206
## 15 W12 25_Melops1205
## 16 H18 24_Melops1082
## 17 H18 23_Melops1065
## 18 H18 22_Melops1061
## 19 H18 21_Melops1058
## 20 H18 20_Melops1046
## 21 H15 19_Melops1131
## 22 H15 18_Melops1075
## 23 H15 17_Melops1072
## 24 H15 16_Melops1017
## 25 H15 15_Melops1014
## 26 H12 14_Melops1202
## 27 H12 13_Melops1200
## 28 H12 12_Melops1197
## 29 H12 11_Melops1194
## 30 H12 10_Melops1192
## 31 S18 09_Melops1158
## 32 S18 08_Melops1126
## 33 S15 07_Melops1015
## 34 S15 06_Melops1010
## 35 S12 05_Melops1259
## 36 S12 04_Melops1239
## 37 S12 03_Melops1218
## 38 S12 02_Melops1195
## 39 S12 01_Melops1188
Prepare dataset and run DESeq2
# Parse temperature and origin from condition string. Assumes condition is formatted like "W12", "S15", etc.
sample_info <- sample_info %>%
mutate(
origin = substr(condition, 1, 1),
temperature = substr(condition, 2, nchar(condition))
) %>%
mutate(
origin = recode(origin, "W" = "West", "H" = "Hybrid", "S" = "South"),
temperature = factor(temperature, levels = c("12", "15", "18")),
origin = factor(origin)
)
# Ensure column names of count matrix match sample names
colnames(abundance_matrix) <- sample_info$sample
# Round counts and ensure non-negative integers (required by DESeq2)
abundance_matrix <- round(pmax(abundance_matrix, 0))
# Create DESeq2 dataset with additive model (temperature + origin)
dds <- DESeqDataSetFromMatrix(
countData = abundance_matrix,
colData = sample_info,
design = ~ temperature + origin
)
# Filter out lowly expressed transcripts (keep those with total count ≥ 10 across all sampl
dds <- dds[rowSums(counts(dds)) >= 10, ]
# Run DESeq2 pipeline (estimate size factors, dispersions, and fit model)
dds <- DESeq(dds)
# Extract coefficient names for downstream contrasts
coef_names <- resultsNames(dds)
# Plot dispersion estimates across transcripts to assess fit to the negative binomial model used by DESeq2
plotDispEsts(dds)

Main function to run pairwise differential expression analysis for a given factor (e.g., temperature or origin).
# Automatically selects shrinkage method based on whether the comparison involves the reference level. Saves full and significant results, generates MA plots, and returns a summary table
run_contrast_analysis <- function(contrast_table, factor_name, output_dir, alpha = 0.05, lfcThreshold = 0.0, coef_names) {
results_list <- list()
counts_summary <- data.frame(
Comparison = character(),
Shrinkage = character(),
Upregulated = integer(),
Downregulated = integer(),
stringsAsFactors = FALSE
)
# Create output directory if it doesn't exist
dir.create(here::here(output_dir), recursive = TRUE, showWarnings = FALSE)
for (i in 1:nrow(contrast_table)) {
level1 <- contrast_table$condition1[i]
level2 <- contrast_table$condition2[i]
name <- paste(level2, "vs", level1, sep = "_")
# Check if the comparison involves the reference level provided by DESeq2 as a coefficient
# Use 'coef' with apeglm shrinkage for reference-level comparisons; use 'contrast' with normal shrinkage otherwise
direct_coef <- is_direct_coef(factor_name, level1, level2, coef_names)
if (direct_coef) {
coef_name <- paste0(factor_name, "_", level2, "_vs_", levels(sample_info[[factor_name]])[1])
res <- lfcShrink(dds, coef = coef_name, type = "apeglm")
shrinkage <- "apeglm"
} else {
res <- lfcShrink(dds, contrast = c(factor_name, level2, level1), type = "normal")
shrinkage <- "normal"
}
results_list[[name]] <- res
# Extract significant transcripts based on adjusted p-value and log2 fold change threshold
sig <- res[which(res$padj < alpha & abs(res$log2FoldChange) > lfcThreshold), ]
up <- sum(sig$log2FoldChange > 0)
down <- sum(sig$log2FoldChange < 0)
# Save full and significant results to CSV
write.csv(as.data.frame(res), file = here::here(output_dir, paste0(name, "_DE_results.csv")), row.names = TRUE)
write.csv(as.data.frame(sig), file = here::here(output_dir, paste0(name, "_significant_transcripts.csv")), row.names = TRUE)
# Generate and save MA plot with up/downregulated transcript counts
png(filename = here::here(output_dir, paste0(name, "_MA_plot.png")), width = 800, height = 600)
plotMA(res, ylim = c(-10, 10), main = paste("MA Plot:", name, "-", shrinkage), alpha = alpha, colSig = "red")
text(x = max(res$baseMean, na.rm = TRUE) * 0.7, y = 9, labels = paste("↑:", up), col = "blue", cex = 0.8)
text(x = max(res$baseMean, na.rm = TRUE) * 0.7, y = -9, labels = paste("↓:", down), col = "blue", cex = 0.8)
dev.off()
# Also display MA plot inline in the report
plotMA(res, ylim = c(-10, 10), main = paste("MA Plot:", name, "-", shrinkage), alpha = alpha, colSig = "red")
text(x = max(res$baseMean, na.rm = TRUE) * 0.7, y = 9, labels = paste("↑:", up), col = "blue", cex = 0.8)
text(x = max(res$baseMean, na.rm = TRUE) * 0.7, y = -9, labels = paste("↓:", down), col = "blue", cex = 0.8)
# Add summary row to the results table
counts_summary <- rbind(counts_summary, data.frame(
Comparison = name,
Shrinkage = shrinkage,
Upregulated = up,
Downregulated = down
))
}
# Return list of DESeq2 results and summary tab
return(list(results = results_list, summary = counts_summary))
}