sample.file.url = "https://genome.med.nyu.edu/results/external/iCellR/data/pbmc3k_filtered_gene_bc_matrices.tar.gz" # download the file download.file(url = sample.file.url, destfile = "pbmc3k_filtered_gene_bc_matrices.tar.gz", method = "auto") # unzip the file. untar("pbmc3k_filtered_gene_bc_matrices.tar.gz") library("iCellR") my.data <- load10x("filtered_gene_bc_matrices/hg19/") sample1 <- my.data[1:900] sample2 <- my.data[901:1800] sample3 <- my.data[1801:2300] sample4 <- my.data[2301:2700] my.data <- data.aggregation(samples = c("sample1","sample2","sample3","sample4"), condition.names = c("WT","KO","Ctrl","KD")) my.obj <- make.obj(my.data) my.obj <- qc.stats(my.obj, s.phase.genes = s.phase, g2m.phase.genes = g2m.phase) png('plot1_QC_stats.png',width = 8, height = 6, units = 'in', res = 300) stats.plot(my.obj, plot.type = "three.in.one", interactive = FALSE, cell.color = "slategray3", cell.size = 1, cell.transparency = 0.5, box.color = "red", box.line.col = "green") dev.off() png('plot2_scatter.mito.umi.png',width = 6, height = 6, units = 'in', res = 300) stats.plot(my.obj, plot.type = "point.mito.umi", interactive = F, out.name = "plot2_scatter.mito.umi") dev.off() png('plot3_scatter.gene.umi.png',width = 6, height = 6, units = 'in', res = 300) stats.plot(my.obj, plot.type = "point.gene.umi", interactive = F, out.name = "plot3_scatter.gene.umi") dev.off() # filter my.obj <- cell.filter(my.obj, min.mito = 0, max.mito = 0.05 , min.genes = 200, max.genes = 6000, min.umis = 0, max.umis = Inf) # normalize my.obj <- norm.data(my.obj, norm.method = "ranked.glsf", top.rank = 500) ################################################################# make model my.obj <- gene.stats(my.obj, which.data = "main.data") #head(my.obj@gene.data[order(my.obj@gene.data$numberOfCells, decreasing = T),]) my.gene.data <- my.obj@gene.data head(my.gene.data) write.table((my.gene.data), file="gene.data.tsv", sep="\t", row.names =F) png('plot4_gene.model.png',width = 6, height = 6, units = 'in', res = 300) make.gene.model(my.obj, my.out.put = "plot", dispersion.limit = 1.5, base.mean.rank = 500, no.mito.model = T, mark.mito = T, interactive = F, no.cell.cycle = T, out.name = "gene.model") dev.off() my.obj <- make.gene.model(my.obj, my.out.put = "data", dispersion.limit = 1.5, base.mean.rank = 500, no.mito.model = T, mark.mito = T, interactive = F, no.cell.cycle = T, out.name = "gene.model") length(my.obj@gene.model) # [1] 683 # run PCA my.obj <- run.pca(my.obj, method = "gene.model", gene.list = my.obj@gene.model,data.type = "main") png('plot5_Opt_Number_Of_PCs.png',width = 6, height = 6, units = 'in', res = 300) opt.pcs.plot(my.obj) dev.off() #### run 2 pass PCA (optional) #### #length(my.obj@gene.model) #my.obj <- find.dim.genes(my.obj, dims = 1:20,top.pos = 20, top.neg = 20) #length(my.obj@gene.model) # second round PC #my.obj <- run.pca(my.obj, method = "gene.model", gene.list = my.obj@gene.model,data.type = "main") ############### dim redux my.obj <- run.pc.tsne(my.obj, dims = 1:10) my.obj <- run.umap(my.obj, dims = 1:10) ########################### IMPORTANT NOTE ######################################## #### if you have more than 5000 cells use a higher k (like 400) for KNetL (see below) #### k of 400 is usually good with biger data but adjust it for intended resolution. #### Here we use a k of 100 or 110 but this might not be ideal for your data. ################################################################################### ################################################################################### ################################################################################### ################################################################################### my.obj <- run.knetl(my.obj, dims = 1:20, k = 110, dim.redux = "umap") library(gridExtra) A= cluster.plot(my.obj,plot.type = "pca",interactive = F) B= cluster.plot(my.obj,plot.type = "umap",interactive = F) C= cluster.plot(my.obj,plot.type = "tsne",interactive = F) D= cluster.plot(my.obj,plot.type = "knetl",interactive = F) png('All.not.Clustered.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() ### cluster the cells # conventional method based on PCA #my.obj <- iclust(my.obj, dist.method = "euclidean", k = 50,dims = 1:20, data.type = "pca") # Recommended method based on KNetL map my.obj <- iclust(my.obj, k = 150, data.type = "knetl") # my.obj <- change.clust(my.obj, change.clust = 6, to.clust = 3) # my.obj <- gate.to.clust(my.obj, my.gate = "cellGating.txt", to.clust = 6) library(gridExtra) A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T) B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) png('Allclusts_not_in_order.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() #### re-numbering clusters based on their distances so they are in order (optional) my.obj <- clust.ord(my.obj,top.rank = 500, how.to.order = "distance") #my.obj <- clust.ord(my.obj,top.rank = 500, how.to.order = "random") library(gridExtra) A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T) B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) png('Allclusts.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() # Look at conditions library(gridExtra) A= cluster.plot(my.obj,plot.type = "pca",col.by = "conditions",interactive = F,cell.size = 0.5) B= cluster.plot(my.obj,plot.type = "umap",col.by = "conditions",interactive = F,cell.size = 0.5) C= cluster.plot(my.obj,plot.type = "tsne",col.by = "conditions",interactive = F,cell.size = 0.5) D= cluster.plot(my.obj,plot.type = "knetl",col.by = "conditions",interactive = F,cell.size = 0.5) png('Allconds.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() # or png('Allconds_clusts.png', width = 10, height = 10, units = 'in', res = 300) cluster.plot(my.obj, cell.size = 0.5, plot.type = "knetl", cell.color = "black", back.col = "white", col.by = "conditions", cell.transparency = 1, clust.dim = 2, interactive = F,cond.facet = T) dev.off() ########################### ########################### ## Pseudotime Abstract KNetL map (PAK map) png('pseudotime.KNetL.png',width = 6, height = 6, units = 'in', res = 300) pseudotime.knetl(my.obj,interactive = F,cluster.membership = F,conds.to.plot = NULL) dev.off() png('pseudotime.KNetL.membered.png',width = 6, height = 6, units = 'in', res = 300) pseudotime.knetl(my.obj,interactive = F,cluster.membership = T,conds.to.plot = NULL) dev.off() ### intractive plot pseudotime.knetl(my.obj,interactive = T) ########################### cluster avrage expression # for all cunditions my.obj <- clust.avg.exp(my.obj, conds.to.avg = NULL) # for one cundition #my.obj <- clust.avg.exp(my.obj, conds.to.avg = "WT") # for two cundition #my.obj <- clust.avg.exp(my.obj, conds.to.avg = c("WT","KO")) head(my.obj@clust.avg) write.table((my.obj@clust.avg),file="clust.avg.tsv",sep="\t", row.names =F) ##################### cell cycle Analysis G0 <- readLines(system.file('extdata', 'G0.txt', package = 'iCellR')) G1S <- readLines(system.file('extdata', 'G1S.txt', package = 'iCellR')) G2M <- readLines(system.file('extdata', 'G2M.txt', package = 'iCellR')) M <- readLines(system.file('extdata', 'M.txt', package = 'iCellR')) MG1 <- readLines(system.file('extdata', 'MG1.txt', package = 'iCellR')) S <- readLines(system.file('extdata', 'S.txt', package = 'iCellR')) # Tirosh scoring method (recomanded) my.obj <- cell.cycle(my.obj, scoring.List = c("G0","G1S","G2M","M","MG1","S"), scoring.method = "tirosh") # Coverage scoring method (recomanded) #my.obj <- cell.cycle(my.obj, scoring.List = c("G0","G1S","G2M","M","MG1","S"), scoring.method = "coverage") # plot cell cycle library(gridExtra) A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T,col.by = "cc") B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc") C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc") D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc") png('All_cellcycle.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() png('AllConds_cellcycle.png', width = 10, height = 10, units = 'in', res = 300) cluster.plot(my.obj, cell.size = 0.5, plot.type = "knetl", col.by = "cc", cell.color = "black", back.col = "white", cell.transparency = 1, clust.dim = 2, interactive = F,cond.facet = T) dev.off() #### png("cluster_cellcycle_pie.png",width = 6, height = 6, units = 'in', res = 300) clust.stats.plot(my.obj, plot.type = "pie.cc", interactive = F, conds.to.plot = NULL) dev.off() png("cluster_cellcycle_bar.png",width = 6, height = 6, units = 'in', res = 300) clust.stats.plot(my.obj, plot.type = "bar.cc", interactive = F, conds.to.plot = NULL) dev.off() # or per condition # clust.stats.plot(my.obj, plot.type = "pie.cc", interactive = F, conds.to.plot = "WT") ########################### ########################### #### coverage correction (imputation) my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 10) # save object save(my.obj, file = "my.obj.Robj") # you can load object to continue in the future! # load("my.obj.Robj") ###### proportions or frequencies png('Clust_cond_freq_pie.png',width = 10, height = 10, units = 'in', res = 300) clust.cond.info(my.obj, plot.type = "pie", normalize.ncell = TRUE, my.out.put = "plot", normalize.by = "percentage") dev.off() png('Clust_cond_freq_bar.png',width = 10, height = 10, units = 'in', res = 300) clust.cond.info(my.obj, plot.type = "bar", normalize.ncell = TRUE,my.out.put = "plot", normalize.by = "percentage") dev.off() png('Clust_cond_freq_pie.cond.png',width = 10, height = 10, units = 'in', res = 300) clust.cond.info(my.obj, plot.type = "pie.cond", normalize.ncell = T, my.out.put = "plot", normalize.by = "percentage") dev.off() png('Clust_cond_freq_bar.cond.png',width = 10, height = 10, units = 'in', res = 300) clust.cond.info(my.obj, plot.type = "bar.cond", normalize.ncell = T,my.out.put = "plot", normalize.by = "percentage") dev.off() my.obj <- clust.cond.info(my.obj) data <- (my.obj@my.freq) write.table(data, "myFrequencies.tsv", sep = "\t", row.names =F) ###### Cluster QC png('cluster-mito_ratio.png',width = 6, height = 6, units = 'in', res = 300) clust.stats.plot(my.obj, plot.type = "box.mito", interactive = F) dev.off() png('cluster-gene_cov.png',width = 6, height = 6, units = 'in', res = 300) clust.stats.plot(my.obj, plot.type = "box.gene", interactive = F) dev.off() ############################################################# #### gene-gene correlation my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 50) # main data A <- gg.cor(my.obj, interactive = F, gene1 = "GNLY", gene2 = "NKG7", conds = NULL, clusts = NULL, data.type = "main") # imputed data B <- gg.cor(my.obj, interactive = F, gene1 = "GNLY", gene2 = "NKG7", conds = NULL, clusts = NULL, data.type = "imputed") C <- gg.cor(my.obj, interactive = F, gene1 = "GNLY", gene2 = "NKG7", conds = NULL, clusts = c(3,2), data.type = "imputed") # imputed data D <- gg.cor(my.obj, interactive = F, gene1 = "GNLY", gene2 = "NKG7", conds = c("WT"), clusts = NULL, data.type = "imputed") png('gene-gene.correlation.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() ############################################################# ############################################################# More plots ##### intractive plots cluster.plot(my.obj, plot.type = "tsne", col.by = "clusters", clust.dim = 3, out.name = "3d_tSNE_clusters") cluster.plot(my.obj, plot.type = "tsne", col.by = "clusters", clust.dim = 2, interactive = T, out.name = "2d_tSNE_clusters") cluster.plot(my.obj, plot.type = "tsne", col.by = "clusters", clust.dim = 2, interactive = T, density = F, cond.shape = T, out.name = "2d_tSNE_clusters_conds") cluster.plot(my.obj, plot.type = "tsne", col.by = "conditions", clust.dim = 2, interactive = T, density = F, out.name = "2d_tSNE_conditions") cluster.plot(my.obj, plot.type = "umap", col.by = "clusters", clust.dim = 2, cond.shape = T, interactive = T, out.name = "2d_UMAP_clusters_conds") cluster.plot(my.obj, plot.type = "knetl", col.by = "clusters", clust.dim = 2, interactive = T, out.name = "2d_KNetL_clusters") ############################################################# ############################################################# ##### find markers marker.genes <- findMarkers(my.obj, fold.change = 2, padjval = 0.1, uniq = F, positive = F) marker.genes1 <- cbind(row = rownames(marker.genes), marker.genes) write.table((marker.genes1),file="marker.genes.tsv", sep="\t", row.names =F) ### plot markers MyGenes <- top.markers(marker.genes, topde = 10, min.base.mean = 0.2, filt.ambig = F) MyGenes <- unique(MyGenes) png('heatmap.png', width = 20, height = 20, units = 'in', res = 300) heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters") dev.off() heatmap.gg.plot(my.obj, gene = MyGenes, interactive = T, out.name = "heatmap_gg", cluster.by = "clusters") png('heatmap_imputed.png', width = 20, height = 20, units = 'in', res = 300) heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed") dev.off() png('heatmap_imputed_sorted_WT.png', width = 20, height = 20, units = 'in', res = 300) heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", cell.sort = TRUE, conds.to.plot = c("WT")) dev.off() png('heatmap_imputed_sudo.png', width = 20, height = 20, units = 'in', res = 300) heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "none", data.type = "imputed", cell.sort = TRUE) dev.off() ### more plots A <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, out.name = "scatter_plot") # PCA 2D B <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, out.name = "scatter_plot", plot.data.type = "umap") # Box Plot C <- gene.plot(my.obj, gene = "MS4A1", box.to.test = 0, box.pval = "sig.signs", col.by = "clusters", plot.type = "boxplot", interactive = F, out.name = "box_plot") # Bar plot (to visualize fold changes) D <- gene.plot(my.obj, gene = "MS4A1", col.by = "clusters", plot.type = "barplot", interactive = F, out.name = "bar_plot") library(gridExtra) png('gene_plots.png', width = 8, height = 8, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() ### more plots A <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, data.type = "imputed", out.name = "scatter_plot") # PCA 2D B <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, out.name = "scatter_plot", data.type = "imputed", plot.data.type = "umap") # Box Plot C <- gene.plot(my.obj, gene = "MS4A1", box.to.test = 0, box.pval = "sig.signs", col.by = "clusters", plot.type = "boxplot", interactive = F, data.type = "imputed", out.name = "box_plot") # Bar plot (to visualize fold changes) D <- gene.plot(my.obj, gene = "MS4A1", col.by = "clusters", plot.type = "barplot", interactive = F, data.type = "imputed", out.name = "bar_plot") library(gridExtra) png('gene_plots_imputed.png', width = 8, height = 8, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() ##### genelist = c("MS4A1","GNLY","FCGR3A","PPBP","CD14","CD3E","CD8A","CD4","GZMH","CCR7","CD68") png('Genes.heatmap.png', width = 10, height = 10, units = 'in', res = 300) heatmap.gg.plot(my.obj, gene = genelist, interactive = F, cluster.by = "clusters") dev.off() rm(list = ls(pattern="PL_")) for(i in genelist){ MyPlot <- gene.plot(my.obj, gene = i, interactive = F, cell.size = 0.1, plot.data.type = "knetl", data.type = "imputed", scaleValue = T, max.scale = 2.0, cell.transparency = 1) NameCol=paste("PL",i,sep="_") eval(call("<-", as.name(NameCol), MyPlot)) } library(cowplot) filenames <- ls(pattern="PL_") B <- cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.1,cell.transparency = 1,anno.clust=T) filenames <- c("B",filenames) png('Genes.KNetL.png',width = 15, height = 12, units = 'in', res = 300) plot_grid(plotlist=mget(filenames)) dev.off() ## A <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, cell.transparency = 1, scaleValue = TRUE, min.scale = 0, max.scale = 2.5, back.col = "white", cond.shape = TRUE) B <- gene.plot(my.obj, gene = "MS4A1", plot.type = "scatterplot", interactive = F, cell.transparency = 1, scaleValue = TRUE, min.scale = 0, max.scale = 2.5, back.col = "white", cond.shape = TRUE, conds.to.plot = c("KO","WT")) C <- gene.plot(my.obj, gene = "MS4A1", plot.type = "boxplot", interactive = F, back.col = "white", cond.shape = TRUE, conds.to.plot = c("KO")) D <- gene.plot(my.obj, gene = "MS4A1", plot.type = "barplot", interactive = F, cell.transparency = 1, back.col = "white", cond.shape = TRUE, conds.to.plot = c("KO","WT")) library(gridExtra) png('genes.in_conditions_and_clusters.png', width = 8, height = 8, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off() ###### Labeling the clusters #CD3E: only in T Cells #FCGR3A (CD16): in CD16+ monocytes and some expression NK cells #GNLY: NK cells #MS4A1: B cells #GZMH: in GZMH+ T8 cells and some expression NK cells #CD8A: in T8 cells #CD4: in T4 and some myeloid cells #CCR7: expressed more in memory cells #CD14: in CD14+ monocytes #CD68: in monocytes my.obj <- change.clust(my.obj, change.clust = 1, to.clust = "001.MG") my.obj <- change.clust(my.obj, change.clust = 2, to.clust = "002.NK") my.obj <- change.clust(my.obj, change.clust = 3, to.clust = "003.CD16+.Mono") my.obj <- change.clust(my.obj, change.clust = 4, to.clust = "004.MF") my.obj <- change.clust(my.obj, change.clust = 5, to.clust = "005.CD14+.Mono") my.obj <- change.clust(my.obj, change.clust = 6, to.clust = "006.Naive.T8") my.obj <- change.clust(my.obj, change.clust = 7, to.clust = "007.GZMH+.T8") my.obj <- change.clust(my.obj, change.clust = 8, to.clust = "008.B") my.obj <- change.clust(my.obj, change.clust = 9, to.clust = "009.Memory.T4") my.obj <- change.clust(my.obj, change.clust = 10, to.clust = "010.Naive.T4") A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T) B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T) png('Allclusts_annotated.png', width = 12, height = 10, units = 'in', res = 300) grid.arrange(A,B,C,D) dev.off()