# Load necessary libraries
library(DESeq2)
library(tidyverse)

# STEP 1: Read in the count matrix
# Replace "counts.txt" with the output from featureCounts
# This file should have genes as rows and samples as columns
count_data <- read.delim("your_counts_file.txt", comment.char = "#", row.names = 1)

# Remove unnecessary columns if featureCounts adds annotation info (e.g., Chr, Start, End, etc.)
# Adjust column indices if needed
count_data <- count_data[, -c(1:5)]  # Keep only count columns

# STEP 2: Define sample metadata (modify this to your sample names and conditions)
# Each row corresponds to a column in the count matrix
# Replace 'Sample1', 'Sample2', etc. with your actual sample names
col_data <- data.frame(
  row.names = colnames(count_data),
  condition = c("Group1", "Group1", "Group2", "Group2")  # Replace with your actual groupings
)

# Convert condition to a factor
col_data$condition <- factor(col_data$condition)

# STEP 3: Create DESeq2 dataset
dds <- DESeqDataSetFromMatrix(countData = count_data,
                              colData = col_data,
                              design = ~ condition)

# Filter out low-count genes (optional but recommended)
dds <- dds[rowSums(counts(dds)) >= 10, ]

# STEP 4: Run DESeq2
dds <- DESeq(dds)

# STEP 5: Extract results (default is Group2 vs Group1)
res <- results(dds)

# Order results by adjusted p-value
res <- res[order(res$padj), ]

# STEP 6: Save output to file
write.csv(as.data.frame(res), file = "deseq2_results.csv")

# Optional: MA-plot and PCA
plotMA(res, ylim = c(-5, 5))
vsd <- vst(dds, blind = FALSE)
plotPCA(vsd, intgroup = "condition")