#Activate edgeR; note that in your Rgui a vignettes button is added under which you find the edgeR pdf manual library(edgeR) ################# ANALYSIS ################# setwd("C:\\Synchr\\Onderwijs\\GenomicsDataAnalyse_VUmc\\2012\\Data") #Loads the data (M. Neerincx): 500 miRs, 16 samples load("datamiRseq.Rdata") #creates group variable; A factor is a special type of vector. Its values represent the levels of a nominal variable. #For a nominal category, one uses labels; for example, gender: male/female. #Nominal measures offer names or labels for certain characteristics. Variables assessed on a nominal scale are called categorical variables. group <- factor(rep(c("primary","metastasis"),8)) group #creates "individual (person)" variable pers <- sort(rep(1:8,2)) pers[15:16] <- c(8,9) pers <- factor(pers) pers #creates design matrix dat <- data.frame(group=group, pers=pers) rownames(dat) <- NULL design.full <- model.matrix(~ group + pers, data = dat) design.full # DGEList = a function to create a DGEList object from a table of counts (rows=features, columns=samples), group indicator for each column, # library size (optional) and a table of annotation (optional) d <- DGEList(counts = datamiRseq, group = group) d ## Estimating common dispersion; takes some time d.CR <- estimateGLMCommonDisp(d, design.full) # Filter out very lowly expressed tags. Those which have fewer than 5 counts in total cannot possibly achieve statisical significance #for differential expression (DE), so we filter out these tags (this is a recommendation by the edgeR developers) whsel <- which(rowSums(d.CR$counts) >= 5) d.CR <- d.CR[whsel,] #edgeR does not do the selection correctly for AveLogCPM slot newsel <- d.CR$AveLogCPM[whsel] d.CR$AveLogCPM <- newsel dim(d.CR) #Estimates tagwise dispersions from common and tag-specific estimates (shrinkage) d.CR <- estimateGLMTagwiseDisp(d.CR, design.full) #Fits the regression-type model resGLMfit.CR <- glmFit(d.CR, design.full, dispersion = d.CR$tagwise.dispersion) resGLMfit.CR #Performs differential expression testing using the dispersion estimates and the fit results. #IMPORTANT: coef. Which column in resGLMfit.CR$coefficients do you want to test? difexp <- glmLRT(resGLMfit.CR,coef=2) #save(difexp,file="difexp.Rdata") #load("difexp.Rdata") #Show the list in a ranked order n<-nrow(difexp) top<-topTags(difexp,n=n) top # Show the genes that are given an FDR corrected p-value less than 0.1. topsig <- top[top$table$FDR < 0.1,] # Show top 40 top40 <- topTags(difexp, n=40) # Show the data for the significant miRs topnames <- rownames(topsig$table) datamiRseq[topnames, ] # plotting fold changes; red dots are differentially signif #png(file="difexp_meta_vs_prim.png", height=600, width=600) plotSmear(d, de.tags = topnames, main = "Fold-change meta vs prim",cex=0.5) abline(h = c(-2, 2), col = "dodgerblue") #dev.off() # export topsig exptable <-topsig$table write.table(exptable,file="resultsmirEdgeR.txt",row.names = T,sep="\t") #save as text, loadable in excel