举例:GSE83521和GSE89143数据合并
1. 下载数据
rm(list = ls())library(GEOquery)library(stringr)gse = "GSE83521"eSet1 <- getGEO("GSE83521", destdir = '.', getGPL = F)eSet2 <- getGEO("GSE89143", destdir = '.', getGPL = F)#(1)提取表达矩阵expexp1 <- exprs(eSet1[[1]])exp1[1:4,1:4]exp2 <- exprs(eSet2[[1]])exp2[1:4,1:4]exp2 = log2(exp2+1)table(rownames(exp1) %in% rownames(exp2))length(intersect(rownames(exp1),rownames(exp2)))exp1 <- exp1[intersect(rownames(exp1),rownames(exp2)),]exp2 <- exp2[intersect(rownames(exp1),rownames(exp2)),]boxplot(exp1)boxplot(exp2)#(2)提取临床信息pd1 <- pData(eSet1[[1]])pd2 <- pData(eSet2[[1]])if(!identical(rownames(pd1),colnames(exp1))) exp1 = exp1[,match(rownames(pd1),colnames(exp1))]if(!identical(rownames(pd2),colnames(exp2))) exp2 = exp2[,match(rownames(pd2),colnames(exp2))]#(3)提取芯片平台编号gpl <- eSet2[[1]]@annotation#(4)合并表达矩阵# exp2的第三个样本有些异常,可以去掉或者用normalizeBetweenArrays标准化,把它拉回正常水平。exp2 = exp2[,-3]exp = cbind(exp1,exp2)boxplot(exp)Group1 = ifelse(str_detect(pd1$title,"Tumour"),"Tumour","Normal")Group2 = ifelse(str_detect(pd2$source_name_ch1,"Paracancerous"),"Normal","Tumour")[-3]Group = c(Group1,Group2)table(Group)Group = factor(Group,levels = c("Normal","Tumour"))save(gse,Group,exp,gpl,file = "exp.Rdata")
两个数据集样本的情况

合并后的数据

1. 针对不同数据集数据的差异,需要处理批次效应
2.1使用limma包里的removeBatchEffect()函数
rm(list = ls())load("exp.Rdata")#处理批次效应library(limma)#?removeBatchEffect()batch <- c(rep("A",12),rep("B",5))exp2 <- removeBatchEffect(exp, batch)par(mfrow=c(1,2)) #展示的图片为一行两列boxplot(as.data.frame(exp),main="Original")boxplot(as.data.frame(exp2),main="Batch corrected")

2.2使用sva包中的combat()函数
rm(list = ls())load("exp.Rdata")#处理批次效应(combat)library(sva)#?ComBatbatch <- c(rep("A",12),rep("B",5))mod = model.matrix(~Group)exp2 = ComBat(dat=exp, batch=batch, mod=mod, par.prior=TRUE, ref.batch="A")par(mfrow=c(1,2))boxplot(as.data.frame(exp),main="Original")boxplot(as.data.frame(exp2),main="Batch corrected")

转载自《生信技能树》,如有侵权,请联系删除。