接上一篇TCGA数据整理,这节继续讲TCGA差异基因分析
#---数据处理前准备---
rm(list=ls())#删除当前工作环境中的所有对象
setwd("D:/TCGA-生信分析/Data/LUAD/mRNA/")#设置工作目录
#BiocManager::install("DESeq2")
library(DESeq2)
#---数据读入---
count_matrix0 <- read.csv("count_matrix0.csv",sep = ",",header = T,row.names = 1,check.names = F)#读入整理后的数据表格
#---构建分组矩阵----
tumor <- colnames(count_matrix0)[as.numeric(substr(colnames(count_matrix0),14,15))<10]
normal <- colnames(count_matrix0)[as.numeric(substr(colnames(count_matrix0),14,15))>10]
tumor_sample <- count_matrix0[,tumor]
normal_sample <- count_matrix0[,normal]
count_matrix1 <- cbind(tumor_sample,normal_sample)
group_list <- c(rep('tumor',ncol(tumor_sample)),rep('normal',ncol(normal_sample)))
condition <- factor(group_list)
coldata <- data.frame(row.names = colnames(count_matrix1), condition)
#---制作dds对象,构建差异基因分析所需的数据格式---
dds<- DESeqDataSetFromMatrix(countData = count_matrix1,colData = coldata,design = ~condition)
#---进行差异分析---
dds <- DESeq(dds)
#---提取结果---
result <- as.data.frame(results(dds))
#---提取显著差异表达基因的矩阵---
DGE <- subset(result,padj < 0.05 & (log2FoldChange > 2 | log2FoldChange< -2))
DGE <- DGE[order(DGE$log2FoldChange),]
#--保存文件--
write.csv(DGE,'DGE.csv',row.names = TRUE)












网友评论