sample.file.url = "https://genome.med.nyu.edu/results/external/iCellR/data/pbmc3k_filtered_gene_bc_matrices.tar.gz"

# download the file
download.file(url = sample.file.url, 
     destfile = "pbmc3k_filtered_gene_bc_matrices.tar.gz", 
     method = "auto")  

# unzip the file. 
untar("pbmc3k_filtered_gene_bc_matrices.tar.gz")

library("iCellR")
my.data <- load10x("filtered_gene_bc_matrices/hg19/")


sample1 <- my.data[1:900]
sample2 <- my.data[901:1800]
sample3 <- my.data[1801:2300]
sample4 <- my.data[2301:2700]
   
my.data <- data.aggregation(samples = c("sample1","sample2","sample3","sample4"),
	condition.names = c("WT","KO","Ctrl","KD"))


my.obj <- make.obj(my.data)

my.obj <- qc.stats(my.obj,
	s.phase.genes = s.phase, 
	g2m.phase.genes = g2m.phase)


png('plot1_QC_stats.png',width = 8, height = 6, units = 'in', res = 300)
stats.plot(my.obj,
	plot.type = "three.in.one",
	interactive = FALSE,
	cell.color = "slategray3", 
	cell.size = 1, 
	cell.transparency = 0.5,
	box.color = "red",
	box.line.col = "green")
dev.off()


png('plot2_scatter.mito.umi.png',width = 6, height = 6, units = 'in', res = 300)
stats.plot(my.obj, plot.type = "point.mito.umi", 
	interactive = F, out.name = "plot2_scatter.mito.umi")
dev.off()

png('plot3_scatter.gene.umi.png',width = 6, height = 6, units = 'in', res = 300)
stats.plot(my.obj, plot.type = "point.gene.umi", 
	interactive = F, out.name = "plot3_scatter.gene.umi")
dev.off()

# filter 
my.obj <- cell.filter(my.obj,
min.mito = 0,
max.mito = 0.05 ,
min.genes = 200,
max.genes = 6000,
min.umis = 0,
max.umis = Inf)

# normalize
my.obj <- norm.data(my.obj, norm.method = "ranked.glsf", top.rank = 500) 

################################################################# make model
my.obj <- gene.stats(my.obj, which.data = "main.data")
#head(my.obj@gene.data[order(my.obj@gene.data$numberOfCells, decreasing = T),])
my.gene.data <- my.obj@gene.data
head(my.gene.data)
write.table((my.gene.data), file="gene.data.tsv", sep="\t", row.names =F)

png('plot4_gene.model.png',width = 6, height = 6, units = 'in', res = 300)
make.gene.model(my.obj, my.out.put = "plot",
	dispersion.limit = 1.5, 
	base.mean.rank = 500, 
	no.mito.model = T, 
	mark.mito = T, 
	interactive = F,
	no.cell.cycle = T,
	out.name = "gene.model")
dev.off()

my.obj <- make.gene.model(my.obj, my.out.put = "data",
	dispersion.limit = 1.5, 
	base.mean.rank = 500, 
	no.mito.model = T, 
	mark.mito = T, 
	interactive = F,
	no.cell.cycle = T,
	out.name = "gene.model")

length(my.obj@gene.model)
# [1] 683

# run PCA
my.obj <- run.pca(my.obj, method = "gene.model", gene.list = my.obj@gene.model,data.type = "main")

png('plot5_Opt_Number_Of_PCs.png',width = 6, height = 6, units = 'in', res = 300)
opt.pcs.plot(my.obj)
dev.off()

#### run 2 pass PCA (optional)
####
#length(my.obj@gene.model)
#my.obj <- find.dim.genes(my.obj, dims = 1:20,top.pos = 20, top.neg = 20)
#length(my.obj@gene.model)

# second round PC
#my.obj <- run.pca(my.obj, method = "gene.model", gene.list = my.obj@gene.model,data.type = "main")

############### dim redux

my.obj <- run.pc.tsne(my.obj, dims = 1:10)
my.obj <- run.umap(my.obj, dims = 1:10)

########################### IMPORTANT NOTE ########################################
#### if you have more than 5000 cells use a higher k (like 400) for KNetL (see below)
#### k of 400 is usually good with biger data but adjust it for intended resolution.
#### Here we use a k of 100 or 110 but this might not be ideal for your data.
###################################################################################
###################################################################################
###################################################################################
###################################################################################

my.obj <- run.knetl(my.obj, dims = 1:20, zoom = 110, dim.redux = "umap")


 library(gridExtra)
A= cluster.plot(my.obj,plot.type = "pca",interactive = F)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F)

png('All.not.Clustered.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()


### cluster the cells

# conventional method based on PCA
#my.obj <- iclust(my.obj, dist.method = "euclidean", sensitivity = 150,dims = 1:20, data.type = "pca")

# Recommended method based on KNetL map
my.obj <- iclust(my.obj, k = 150, data.type = "knetl")

# my.obj <- change.clust(my.obj, change.clust = 6, to.clust = 3)
# my.obj <- gate.to.clust(my.obj, my.gate = "cellGating.txt", to.clust = 6)

 library(gridExtra)
A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)

png('Allclusts_not_in_order.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

#### re-numbering clusters based on their distances so they are in order (optional)

my.obj <- clust.ord(my.obj,top.rank = 500, how.to.order = "distance")
#my.obj <- clust.ord(my.obj,top.rank = 500, how.to.order = "random")

 library(gridExtra)
A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)

png('Allclusts.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

# Look at conditions

 library(gridExtra)
A= cluster.plot(my.obj,plot.type = "pca",col.by = "conditions",interactive = F,cell.size = 0.5)
B= cluster.plot(my.obj,plot.type = "umap",col.by = "conditions",interactive = F,cell.size = 0.5)
C= cluster.plot(my.obj,plot.type = "tsne",col.by = "conditions",interactive = F,cell.size = 0.5)
D= cluster.plot(my.obj,plot.type = "knetl",col.by = "conditions",interactive = F,cell.size = 0.5)

png('Allconds.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

# or

png('Allconds_clusts.png', width = 10, height = 10, units = 'in', res = 300)
cluster.plot(my.obj,
              cell.size = 0.5,
              plot.type = "knetl",
              cell.color = "black",
              back.col = "white",
              col.by = "conditions",
              cell.transparency = 1,
              clust.dim = 2,
              interactive = F,cond.facet = T)
dev.off()

###########################
###########################
## Pseudotime Abstract KNetL map (PAK map)

png('pseudotime.KNetL.png',width = 6, height = 6, units = 'in', res = 300)
pseudotime.knetl(my.obj,interactive = F,cluster.membership = F,conds.to.plot = NULL)
dev.off()

png('pseudotime.KNetL.membered.png',width = 6, height = 6, units = 'in', res = 300)
pseudotime.knetl(my.obj,interactive = F,cluster.membership = T,conds.to.plot = NULL)
dev.off()

### intractive plot
pseudotime.knetl(my.obj,interactive = T)

########################### cluster avrage expression

# for all cunditions
my.obj <- clust.avg.exp(my.obj, conds.to.avg = NULL)

# for one cundition
#my.obj <- clust.avg.exp(my.obj, conds.to.avg = "WT")

# for two cundition
#my.obj <- clust.avg.exp(my.obj, conds.to.avg = c("WT","KO"))

head(my.obj@clust.avg)

write.table((my.obj@clust.avg),file="clust.avg.tsv",sep="\t", row.names =F)

##################### cell cycle Analysis

G0 <- readLines(system.file('extdata', 'G0.txt', package = 'iCellR'))
G1S <- readLines(system.file('extdata', 'G1S.txt', package = 'iCellR'))
G2M <- readLines(system.file('extdata', 'G2M.txt', package = 'iCellR'))
M <- readLines(system.file('extdata', 'M.txt', package = 'iCellR'))
MG1 <- readLines(system.file('extdata', 'MG1.txt', package = 'iCellR'))
S <- readLines(system.file('extdata', 'S.txt', package = 'iCellR'))

# Tirosh scoring method (recomanded)
my.obj <- cell.cycle(my.obj, scoring.List = c("G0","G1S","G2M","M","MG1","S"), scoring.method = "tirosh")

# Coverage scoring method (recomanded)
#my.obj <- cell.cycle(my.obj, scoring.List = c("G0","G1S","G2M","M","MG1","S"), scoring.method = "coverage")

# plot cell cycle

 library(gridExtra)
A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T,col.by = "cc")
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc")
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc")
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T, col.by = "cc")

png('All_cellcycle.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

png('AllConds_cellcycle.png', width = 10, height = 10, units = 'in', res = 300)
cluster.plot(my.obj,
              cell.size = 0.5,
              plot.type = "knetl",
              col.by = "cc",
              cell.color = "black",
              back.col = "white",
              cell.transparency = 1,
              clust.dim = 2,
              interactive = F,cond.facet = T)
dev.off()

####

png("cluster_cellcycle_pie.png",width = 6, height = 6, units = 'in', res = 300)
clust.stats.plot(my.obj, plot.type = "pie.cc", interactive = F, conds.to.plot = NULL)
dev.off()

png("cluster_cellcycle_bar.png",width = 6, height = 6, units = 'in', res = 300)
clust.stats.plot(my.obj, plot.type = "bar.cc", interactive = F, conds.to.plot = NULL)
dev.off()

# or per condition
# clust.stats.plot(my.obj, plot.type = "pie.cc", interactive = F, conds.to.plot = "WT")

###########################
###########################
#### coverage correction (imputation)

my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 10)

# save object
save(my.obj, file = "my.obj.Robj")

# you can load object to continue in the future!

# load("my.obj.Robj")

###### proportions or frequencies
png('Clust_cond_freq_pie.png',width = 10, height = 10, units = 'in', res = 300)
clust.cond.info(my.obj, plot.type = "pie", normalize.ncell = TRUE, my.out.put = "plot", normalize.by = "percentage")
dev.off()
png('Clust_cond_freq_bar.png',width = 10, height = 10, units = 'in', res = 300)
clust.cond.info(my.obj, plot.type = "bar", normalize.ncell = TRUE,my.out.put = "plot", normalize.by = "percentage")
dev.off()

png('Clust_cond_freq_pie.cond.png',width = 10, height = 10, units = 'in', res = 300)
clust.cond.info(my.obj, plot.type = "pie.cond", normalize.ncell = T, my.out.put = "plot", normalize.by = "percentage")
dev.off()
png('Clust_cond_freq_bar.cond.png',width = 10, height = 10, units = 'in', res = 300)
clust.cond.info(my.obj, plot.type = "bar.cond", normalize.ncell = T,my.out.put = "plot", normalize.by = "percentage")
dev.off()

my.obj <- clust.cond.info(my.obj)
data <- (my.obj@my.freq)
write.table(data, "myFrequencies.tsv", sep = "\t", row.names =F)

###### Cluster QC
png('cluster-mito_ratio.png',width = 6, height = 6, units = 'in', res = 300)
clust.stats.plot(my.obj, plot.type = "box.mito", interactive = F)
dev.off()
png('cluster-gene_cov.png',width = 6, height = 6, units = 'in', res = 300)
clust.stats.plot(my.obj, plot.type = "box.gene", interactive = F)
dev.off()

#############################################################
#### gene-gene correlation
my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 50)

# main data
A <- gg.cor(my.obj, 
	interactive = F, 
	gene1 = "GNLY",
	gene2 = "NKG7", 
	conds = NULL,
	clusts = NULL,
	data.type = "main")

# imputed data 
B <- gg.cor(my.obj, 
	interactive = F, 
	gene1 = "GNLY",
	gene2 = "NKG7", 
	conds = NULL,
	clusts = NULL,
	data.type = "imputed")

C <- gg.cor(my.obj, 
	interactive = F, 
	gene1 = "GNLY",
	gene2 = "NKG7", 
	conds = NULL,
	clusts = c(3,2),
	data.type = "imputed")


# imputed data 
D <- gg.cor(my.obj, 
	interactive = F, 
	gene1 = "GNLY",
	gene2 = "NKG7", 
	conds = c("WT"),
	clusts = NULL,
	data.type = "imputed")

png('gene-gene.correlation.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

#############################################################
############################################################# More plots

##### intractive plots
cluster.plot(my.obj,
	plot.type = "tsne",
	col.by = "clusters",
	clust.dim = 3,
	out.name = "3d_tSNE_clusters")

cluster.plot(my.obj,
	plot.type = "tsne",
	col.by = "clusters",
	clust.dim = 2,
	interactive = T,
	out.name = "2d_tSNE_clusters")

cluster.plot(my.obj,
	plot.type = "tsne",
	col.by = "clusters",
	clust.dim = 2,
	interactive = T,
	density = F,
	cond.shape = T,
	out.name = "2d_tSNE_clusters_conds")

cluster.plot(my.obj,
	plot.type = "tsne",
	col.by = "conditions",
	clust.dim = 2,
	interactive = T,
	density = F,
	out.name = "2d_tSNE_conditions")

cluster.plot(my.obj,
	plot.type = "umap",
	col.by = "clusters",
	clust.dim = 2,
	cond.shape = T,
	interactive = T,
	out.name = "2d_UMAP_clusters_conds")

cluster.plot(my.obj,
	plot.type = "knetl",
	col.by = "clusters",
	clust.dim = 2,
	interactive = T,
	out.name = "2d_KNetL_clusters")


#############################################################
#############################################################
##### find markers 
marker.genes <- findMarkers(my.obj,
 fold.change = 2,
 padjval = 0.1,
 uniq = F,
 positive = F)

marker.genes1 <- cbind(row = rownames(marker.genes), marker.genes)
write.table((marker.genes1),file="marker.genes.tsv", sep="\t", row.names =F)

### plot markers
MyGenes <- top.markers(marker.genes, topde = 10, min.base.mean = 0.2, filt.ambig = F)
MyGenes <- unique(MyGenes)

png('heatmap.png', width = 20, height = 20, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters")
dev.off()

heatmap.gg.plot(my.obj, gene = MyGenes, interactive = T, out.name = "heatmap_gg", cluster.by = "clusters")

png('heatmap_imputed.png', width = 20, height = 20, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed")
dev.off()

png('heatmap_imputed_sorted_WT.png', width = 20, height = 20, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", cell.sort = TRUE, conds.to.plot = c("WT"))
dev.off()

png('heatmap_imputed_sudo.png', width = 20, height = 20, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "none", data.type = "imputed", cell.sort = TRUE)
dev.off()

### more plots

A <- gene.plot(my.obj, gene = "MS4A1", 
	plot.type = "scatterplot",
	interactive = F,
	out.name = "scatter_plot")
# PCA 2D	
B <- gene.plot(my.obj, gene = "MS4A1", 
	plot.type = "scatterplot",
	interactive = F,
	out.name = "scatter_plot",
	plot.data.type = "umap")
	
# Box Plot
C <- gene.plot(my.obj, gene = "MS4A1", 
	box.to.test = 0, 
	box.pval = "sig.signs",
	col.by = "clusters",
	plot.type = "boxplot",
	interactive = F,
	out.name = "box_plot")
	
# Bar plot (to visualize fold changes)	
D <- gene.plot(my.obj, gene = "MS4A1", 
	col.by = "clusters",
	plot.type = "barplot",
	interactive = F,
	out.name = "bar_plot")
	
library(gridExtra)
png('gene_plots.png', width = 8, height = 8, units = 'in', res = 300)
grid.arrange(A,B,C,D)	
dev.off()

### more plots

A <- gene.plot(my.obj, gene = "MS4A1", 
	plot.type = "scatterplot",
	interactive = F,
	data.type = "imputed",
	out.name = "scatter_plot")
# PCA 2D	
B <- gene.plot(my.obj, gene = "MS4A1", 
	plot.type = "scatterplot",
	interactive = F,
	out.name = "scatter_plot",
	data.type = "imputed",
	plot.data.type = "umap")
	
# Box Plot
C <- gene.plot(my.obj, gene = "MS4A1", 
	box.to.test = 0, 
	box.pval = "sig.signs",
	col.by = "clusters",
	plot.type = "boxplot",
	interactive = F,
	data.type = "imputed",
	out.name = "box_plot")
	
# Bar plot (to visualize fold changes)	
D <- gene.plot(my.obj, gene = "MS4A1", 
	col.by = "clusters",
	plot.type = "barplot",
	interactive = F,
	data.type = "imputed",
	out.name = "bar_plot")
	
library(gridExtra)
png('gene_plots_imputed.png', width = 8, height = 8, units = 'in', res = 300)
grid.arrange(A,B,C,D)	
dev.off()

#####

genelist = c("MS4A1","GNLY","FCGR3A","PPBP","CD14","CD3E","CD8A","CD4","GZMH","CCR7","CD68")

png('Genes.heatmap.png', width = 10, height = 10, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = genelist, interactive = F, cluster.by = "clusters")
dev.off()


rm(list = ls(pattern="PL_"))
for(i in genelist){
    MyPlot <- gene.plot(my.obj, gene = i,
        interactive = F,
        cell.size = 0.1,
        plot.data.type = "knetl",
	data.type = "imputed",
        scaleValue = T,
        max.scale = 2.0,
        cell.transparency = 1)
    NameCol=paste("PL",i,sep="_")
    eval(call("<-", as.name(NameCol), MyPlot))
}

library(cowplot)
filenames <- ls(pattern="PL_")

B <- cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.1,cell.transparency = 1,anno.clust=T)
filenames <- c("B",filenames)

png('Genes.KNetL.png',width = 15, height = 12, units = 'in', res = 300)
plot_grid(plotlist=mget(filenames))
dev.off()

##
A <- gene.plot(my.obj, gene = "MS4A1", 
   plot.type = "scatterplot",
   interactive = F,
   cell.transparency = 1,
   scaleValue = TRUE,
   min.scale = 0,
   max.scale = 2.5,
   back.col = "white",
   cond.shape = TRUE)
B <- gene.plot(my.obj, gene = "MS4A1", 
   plot.type = "scatterplot",
   interactive = F,
   cell.transparency = 1,
   scaleValue = TRUE,
   min.scale = 0,
   max.scale = 2.5,
   back.col = "white",
   cond.shape = TRUE,
   conds.to.plot = c("KO","WT"))

C <- gene.plot(my.obj, gene = "MS4A1", 
   plot.type = "boxplot",
   interactive = F,
   back.col = "white",
   cond.shape = TRUE,
   conds.to.plot = c("KO"))

D <- gene.plot(my.obj, gene = "MS4A1", 
   plot.type = "barplot",
   interactive = F,
   cell.transparency = 1,
   back.col = "white",
   cond.shape = TRUE,
   conds.to.plot = c("KO","WT"))

library(gridExtra)
png('genes.in_conditions_and_clusters.png', width = 8, height = 8, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

###### Labeling the clusters 
#CD3E: only in T Cells
#FCGR3A (CD16): in CD16+ monocytes and some expression NK cells
#GNLY: NK cells
#MS4A1: B cells
#GZMH: in GZMH+ T8 cells and some expression NK cells
#CD8A: in T8 cells
#CD4: in T4 and some myeloid cells
#CCR7: expressed more in memory cells 
#CD14: in CD14+ monocytes
#CD68: in monocytes

my.obj <- change.clust(my.obj, change.clust = 1, to.clust = "001.MG")
my.obj <- change.clust(my.obj, change.clust = 2, to.clust = "002.NK")
my.obj <- change.clust(my.obj, change.clust = 3, to.clust = "003.CD16+.Mono")
my.obj <- change.clust(my.obj, change.clust = 4, to.clust = "004.MF")
my.obj <- change.clust(my.obj, change.clust = 5, to.clust = "005.CD14+.Mono")
my.obj <- change.clust(my.obj, change.clust = 6, to.clust = "006.Naive.T8")
my.obj <- change.clust(my.obj, change.clust = 7, to.clust = "007.GZMH+.T8")
my.obj <- change.clust(my.obj, change.clust = 8, to.clust = "008.B")
my.obj <- change.clust(my.obj, change.clust = 9, to.clust = "009.Memory.T4")
my.obj <- change.clust(my.obj, change.clust = 10, to.clust = "010.Naive.T4")

A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=T)

png('Allclusts_annotated.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()