PMI associated gene list from [https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5511187/#sec-16title]
library(Seurat)
library(sctransform)
library(dplyr)
library(scDblFinder)
library(biomaRt)
library(scater)
library(SingleCellExperiment)
library(SeuratDisk)
library(Signac)
library(GenomicRanges)
library(rtracklayer)
library(EnsDb.Hsapiens.v86)
library(scales)
library(future)
plan("multicore", workers = 5)
options(future.globals.maxSize = 1000000 * 1024^2)
library(harmony)
library(biovizBase)
data.data <- Read10X(data.dir = "~/outs/filtered_feature_bc_matrix/")
rna_counts <- data.data$`Gene Expression`
atac_counts <- data.data$Peaks
data <- CreateSeuratObject(counts = rna_counts,project="all")
step<-"Init"
numCells<-nrow(data@meta.data)
###################################################################################
# Remove PMI genes
PMI<-read.csv("~/PMI_assoc_genes.csv",header=T)
PMI<-PMI[which(PMI$Tissuename=="Cerebral Cortex"),]
threshold<-c()
data<-data[-which(rownames(data) %in% PMI$Gene.Name),]
data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = "^MT-")
a<-signif(quantile(data$nFeature_RNA,probs=0.99)[[1]],digits=1)
threshold<-a
###################################################################################
###################################################################################
# Add atac assay
# and get TSS, nucleosome signal, blacklist fraction
grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-"))
grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts)
atac_counts <- atac_counts[as.vector(grange.use), ]
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
frag.file <- paste0("~/outs/atac_fragments.tsv.gz")
chrom_assay <- CreateChromatinAssay(
counts = atac_counts,
sep = c(":", "-"),
genome = 'hg38',
fragments = frag.file,
min.cells = 10,
annotation = annotations
)
data[["ATAC"]] <- chrom_assay
DefaultAssay(data)<-"ATAC"
data <- NucleosomeSignal(data)
data <- TSSEnrichment(data)
data$blacklist_fraction <- FractionCountsInRegion(object = data, assay = 'ATAC',regions = blacklist_hg38)
###################################################################################
###################################################################################
sam_BEB18034<-read.csv("~/sample2_barcodes_BEB18034.csv")
sam_BEB18034$x<-gsub("2","1",sam_BEB18034$x)
sam_BNT1261<-read.csv("~/sample2_barcodes_BNT1261.csv")
sam_BNT1261$x<-gsub("2","1",sam_BNT1261$x)
sam_BEB19157<-read.csv("~/sample3_barcodes_BEB19157.csv")
sam_BEB19157$x<-gsub("3","2",sam_BEB19157$x)
sam_1230<-read.csv("~/sample3_barcodes_1230.csv")
sam_1230$x<-gsub("3","2",sam_1230$x)
sam_3329<-read.csv("~/sample4_barcodes_3329.csv")
sam_3329$x<-gsub("4","3",sam_3329$x)
sam_HBQS<-read.csv("~/sample4_barcodes_HBQS.csv")
sam_HBQS$x<-gsub("4","3",sam_HBQS$x)
sam_BEB18062<-read.csv("~/sample5_barcodes_BEB18062.csv")
sam_BEB18062$x<-gsub("5","4",sam_BEB18062$x)
sam_NT1271<-read.csv("~/sample5_barcodes_NT1271.csv")
sam_NT1271$x<-gsub("5","4",sam_NT1271$x)
sam_4313<-read.csv("~/sample9_barcodes_4313.csv")
sam_4313$x<-gsub("9","5",sam_4313$x)
sam_4482<-read.csv("~/sample9_barcodes_4482.csv")
sam_4482$x<-gsub("9","5",sam_4482$x)
sam_HCT17HEX<-read.csv("~/sample10_barcodes_HCT17HEX.csv")
sam_HCT17HEX$x<-gsub("10","6",sam_HCT17HEX$x)
sam_HCTZZT<-read.csv("~/sample10_barcodes_HCTZZT.csv")
sam_HCTZZT$x<-gsub("10","6",sam_HCTZZT$x)
sam_4305<-read.csv("~/sample11_barcodes_4305.csv")
sam_4305$x<-gsub("11","7",sam_4305$x)
sam_4443<-read.csv("~/sample11_barcodes_4443.csv")
sam_4443$x<-gsub("11","7",sam_4443$x)
###################################################################################
###################################################################################
data$id_B<-ifelse(colnames(data) %in% sam_BEB18034$x, "BEB18034",
ifelse(colnames(data) %in% sam_BNT1261$x, "NT1261",
ifelse(colnames(data) %in% sam_BEB19157$x,"BEB19157",
ifelse(colnames(data) %in% sam_1230$x, "1230",
ifelse(colnames(data) %in% sam_3329$x, "3329",
ifelse(colnames(data) %in% sam_HBQS$x, "HBQS",
ifelse(colnames(data) %in% sam_BEB18062$x,"BEB18062",
ifelse(colnames(data) %in% sam_NT1271$x, "NT1271",
ifelse(colnames(data) %in% sam_4313$x, "4313",
ifelse(colnames(data) %in% sam_4482$x, "4482",
ifelse(colnames(data) %in% sam_HCT17HEX$x,"HCT17HEX",
ifelse(colnames(data) %in% sam_HCTZZT$x, "HCTZZT",
ifelse(colnames(data) %in% sam_4305$x, "4305",
ifelse(colnames(data) %in% sam_4443$x, "4443", "Other"))))))))))))))
data$id<-ifelse(grepl("-8$|-9$",rownames(data@meta.data)), "4674",
ifelse(grepl("-10$|-11$",rownames(data@meta.data)),"1238",
ifelse(grepl("-12$|-13$",rownames(data@meta.data)),"4627",
ifelse(grepl("-14$|-15$",rownames(data@meta.data)),"1224",
ifelse(grepl("-16$|-17$",rownames(data@meta.data)),"3586",
ifelse(grepl("-18$|-19$",rownames(data@meta.data)),"4481",
ifelse(grepl("-20$|-21$",rownames(data@meta.data)),"5640",
ifelse(grepl("-22$|-23$", rownames(data@meta.data)),"4064", data$id_B))))))))
md<-data@meta.data
md$cell<-rownames(md)
meta<-read.csv("~/Meta.csv")
colnames(meta)[1]<-"id"
tmp<-merge(md, meta, by="id")
rownames(tmp)<-tmp$cell
tmp[,1:15]<-NULL
data<-AddMetaData(data, tmp)
data<-subset(data, id=="Other", invert=T) #removed those called as doublet/unassigned
###################################################################################
###################################################################################
pdf("~/VlnPlot_init.pdf", width=14, height=8)
VlnPlot(data, features = c("nCount_ATAC", "nFeature_RNA","nCount_RNA","percent.mt","blacklist_fraction",'TSS.enrichment','nucleosome_signal'), ncol = 4, pt.size = 0, group.by="id") + NoLegend() &theme(text=element_text(size=8, angle=20), axis.text.x=element_text(size=7))
dev.off()
###################################################################################
###################################################################################
# Filtering, norm, scale, dim reduc
data<-subset(data, subset = nFeature_RNA < 10000 &
nFeature_RNA > 200 &
nucleosome_signal < 2 &
percent.mt < 5 &
TSS.enrichment > 2)
data<-SCTransform(data, vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>% FindNeighbors(dims = 1:30) %>%
RunUMAP(dims = 1:30, reduction.name="umap.rna", reduction.key = "rnaUMAP_")
DefaultAssay(data)<-"ATAC"
data<-RunTFIDF(data) %>% FindTopFeatures(min.cutoff='q0')%>% RunSVD()%>%
RunUMAP(reduction='lsi',dims=2:50,reduction.name="umap.atac",reduction.key="atacUMAP_")
step<-c(step,"filter")
numCells<-c(numCells, nrow(data@meta.data))
###################################################################################
###################################################################################
pdf("~/Init_Umap.pdf")
p1<-DimPlot(data, reduction="umap.rna")
p2<-DimPlot(data, reduction="umap.atac")
p1+p2
dev.off()
###################################################################################
###################################################################################
# Doublet
DefaultAssay(data)<-"SCT"
sce<-as.SingleCellExperiment(data)
dbl<-computeDoubletDensity(sce)
data$isDbl<-ifelse(dbl<3.5,1,0)
data$dbl<-dbl
###################################################################################
###################################################################################
pdf("~/Doublet.pdf")
p1<-DimPlot( data,group.by="isDbl",reduction="umap.rna")
p2<-FeaturePlot(data, features="dbl",reduction="umap.rna", max.cutoff=10)
p1+p2
dev.off()
###################################################################################
# 1224 1230 1238 3329 3586 4064
# 0 0.04988328 0.01993355 0.02109426 0.03671148 0.03088433 0.02903396
# 1 0.95011672 0.98006645 0.97890574 0.96328852 0.96911567 0.97096604
#
# 4305 4313 4443 4481 4482 4627
# 0 0.04025578 0.01385042 0.02153655 0.06106747 0.03017145 0.05621544
# 1 0.95974422 0.98614958 0.97846345 0.93893253 0.96982855 0.94378456
#
# 4674 5640 BEB18034 BEB18062 BEB19157 HBQS
# 0 0.03709199 0.07242729 0.04557921 0.03784254 0.03742250 0.02955741
# 1 0.96290801 0.92757271 0.95442079 0.96215746 0.96257750 0.97044259
#
# HCT17HEX HCTZZT NT1261 NT1271
# 0 0.05676761 0.04646697 0.04438848 0.03055833
# 1 0.94323239 0.95353303 0.95561152 0.96944167
data<-subset(data, subset= isDbl==1)
###################################################################################
###################################################################################
# Subset to Protein coding genes & recluster
PC_genes<-read.csv("~/PC_genes.csv",header=T)
tmp_assay<-subset(data[["RNA"]],features =PC_genes$x)
Key(tmp_assay)<-"pc_"
data[["PC"]]<-tmp_assay
DefaultAssay(data)<-"PC"
data <- SCTransform(data,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>%
FindNeighbors(dims = 1:30) %>% RunUMAP(dims = 1:30, reduction.name="umap.rna",reduction.key = "rnaUMAP_")
data<-FindMultiModalNeighbors(data, reduction.list=list("pca","lsi"), dims.list=list(1:30,2:50))
data<-RunUMAP(data,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
data<-FindClusters(data, graph.name="wsnn", algorithm=3)
###################################################################################
pdf("~/rna_atac.umap.RECLUSTER.pdf", width=14, height=8)
p1=DimPlot(data, reduction = "umap.rna") + ggtitle("RNA")+theme(legend.position="none")
p2=DimPlot(data, reduction = "umap.atac") + ggtitle("ATAC")+theme(legend.position="none")
p3=DimPlot(data, reduction="wnn.umap")+ ggtitle("WNN")
p1+p2+p3
dev.off()
###################################################################################
###################################################################################
# Map to Ref
Map<-readRDS("~/Mathys_DLPFC_SCT_normalized.rds")
anchors<-FindTransferAnchors(reference=Map, query=data, dims=1:30, normalization.method="SCT",
query.assay="SCT")
predictions<-TransferData(anchorset=anchors, reference=Map, query=data,
refdata=Map$celltype, dims=1:30)
data$predicted.id<-predictions$predicted.id
data$predicted.id.score<-predictions$predicted.id.score
###################################################################################
pdf("~/predicted.id.pdf",width=12,height=7)
p1<-DimPlot(data, group.by="predicted.id", reduction="wnn.umap")
p2<-FeaturePlot(data, features="predicted.id.score", reduction="wnn.umap",cols=c("coral1","deepskyblue3"))
p1+p2
dev.off()
###################################################################################
data2<-subset(data, subset=predicted.id.score>0.95)
step<-c(step,"predicted.id")
numCells<-c(numCells, nrow(data2@meta.data))
###################################################################################
###################################################################################
#with all
data2<-subset(data, subset=predicted.id.score>0.95)
integrated<-RunHarmony(data2, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="harmony.rna", theta=1)
integrated<-RunUMAP(integrated,reduction="harmony.rna",dims=1:40,reduction.name="harmony.rna.umap")
integrated<-FindNeighbors(integrated,dims=1:30, reduction="harmony.rna")
integrated<-FindClusters(integrated, graph.name="SCT_nn", resolution=2.8)
pdf("~/WNN_Harmony_plot_CT.pdf", width=15, height=7)
p1<-DimPlot(integrated, group.by="predicted.id", reduction="harmony.rna.umap")
p2<-DimPlot(integrated, group.by="id", reduction="harmony.rna.umap")
p3<-DimPlot(integrated, reduction="harmony.rna.umap", label=T)
p1+p2+p3
dev.off()
pdf("~/WNN_Harmony_plot_batch.pdf", width=15, height=7)
DimPlot(integrated, group.by="predicted.id", reduction="harmony.rna.umap")
dev.off()
###################################################################################
#filter out cells that are not assigned the same cell type as their neighbors. Additional doublet precaution
Idents(int)<-int$seurat_clusters
tab<-table(int$seurat_clusters, int$predicted.id)
tmp<-colnames(tab)[apply(tab,1,which.max)]
names(tmp)<-levels(int)
int<-RenameIdents(int,tmp)
int$cluster_celltype<-Idents(int)
int$agree<-ifelse(int$cluster_celltype==int$predicted.id, 1,0)
###################################################################################
integrated_sub2<-subset(int, subset=agree==1)
integrated_sub2 <- SCTransform(integrated_sub2,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>%
FindNeighbors(dims = 1:30) %>% RunUMAP(dims = 1:30, reduction.name="umap.rna",reduction.key = "rnaUMAP_")
integrated_sub2<-RunHarmony(integrated_sub2, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="harmony.rna")
integrated_sub2<-RunUMAP(integrated_sub2,reduction="harmony.rna",dims=1:30,reduction.name="harmony.rna.umap")
DefaultAssay(integrated_sub2)<-"ATAC"
integrated_sub2<-RunTFIDF(integrated_sub2) %>% FindTopFeatures(min.cutoff='q0')%>% RunSVD()
integrated_sub2<-ScaleData(integrated_sub2, rownames(integrated_sub2[["ATAC"]]))
integrated_sub2<-RunHarmony(integrated_sub2, group.by.vars="id", reduction="lsi",assay.use="ATAC",reduction.save="harmony.atac", project.dim=F)
integrated_sub2<-RunUMAP(integrated_sub2,reduction="harmony.atac",dims=1:30,reduction.name="harmony.atac.umap")
integrated_sub2<-FindMultiModalNeighbors(integrated_sub2, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
integrated_sub2<-RunUMAP(integrated_sub2,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
data<-integrated_sub2
###################################################################################
###################################################################################
DefaultAssay(data)<-"ATAC"
CTpeaks<-CallPeaks(data, macs2.path="~/miniconda3/envs/env1/bin/macs2", group.by="predicted.id")
chr<-c()
for (i in 1:22){
chr<-c(chr, paste0("chr",i))
}
chr<-c(chr,"chrX")
chr<-c(chr,"chrY")
CTpeaks2<-CTpeaks[seqnames(CTpeaks) %in% chr]
df <- as.data.frame(CTpeaks)
df<-df[which(df$seqnames %in% chr),]
#bed file for reanalyze [sort -k1,1 -k2,2n CT_peak_set.bed]
write.table(df[,1:3], file="~/CT_peak_set.bed", quote=F, sep="\t", row.names=F, col.names=F)
CTpeaks2<-makeGRangesFromDataFrame(df)
macs2_counts<-FeatureMatrix(fragments=Fragments(data), features=CTpeaks2,cells=colnames(data))
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotation) <- "UCSC"
data[["CTpeaks"]]<-CreateChromatinAssay(counts=macs2_counts, fragments=Fragments(data))
#get peak list labeled by celltype
write.csv(df,"~/22_restart/CTpeaks_annotated.csv")
saveRDS(data, "~/final.rds")
# get AD and Ctrl barcodes
df$status<-data$Status
AD<-df[which(df$status=="AD"),1:2]
Ctrl<-df[which(df$status=="Ctrl"),1:2]
write.csv(AD, "/barcodes_AD.csv", row.names=F, quote=F)
write.csv(Ctrl, "~/barcodes_Ctrl.csv", row.names=F, quote=F)
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
u2$index<-seq(1,nrow(u2))
ol_enc=findOverlaps(GRanges(u2),enc)
u2$Astrocytes <-grepl("Astrocytes",u2$peak_called_in)
u2$Microglia <-grepl("Microglia", u2$peak_called_in)
u2$Inhibitory <-grepl("Inhibitory", u2$peak_called_in)
u2$Excitatory <-grepl("Excitatory", u2$peak_called_in)
u2$Inhibitory <-grepl("Inhibitory", u2$peak_called_in)
u2$Oligodendrocytes<-grepl("Oligodendrocytes",u2$peak_called_in)
u2$OPCs <-grepl("OPCs", u2$peak_called_in)
u2$Pericytes <-grepl("Pericytes", u2$peak_called_in)
u2$Endothelial <-grepl("Endothelial", u2$peak_called_in)
c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-as.character(enc[subjectHits(ol_enc),]$encodeLabel)
c2_enc<-c2_enc[!duplicated(c2_enc$index),]
d2<-merge(c2_enc, u2, by="index", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)
# grouped together
tmp<-d2
# tmp$numCT<-str_count(tmp$CT.x,",")
# tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)
pdf("~/scMultiomics_AD/Peaks_annot_encode2.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_brewer(palette="Dark2",labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()
H3K27ac from Nott et al. 2019 and Kolzlenkov et al. 2018
peaks<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
peaks<-GRanges(peaks)
load(file="~/Nott_k27ac.rda")
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
Nott_k27ac_38<-liftOver(Nott_k27ac, ch)
Nott_k27ac_38<-unlist(Nott_k27ac_38) # lost 1,700
load("~/Dracheva_K27ac.rda")
all.k27<-c(Nott_k27ac_38, K27List$GABA[,1], K27List$GLU[,1])
over<-findOverlaps(peaks, all.k27)
all_olap<-peaks[queryHits(over)] # 189833
mcols(all_olap)$k27_ct<-all.k27[subjectHits(over)]$cell_type
all_olap$sub_ct<-gsub("GLU","Excitatory",all_olap$k27_ct)
all_olap$sub_ct<-gsub("GABA","Inhibitory",all_olap$sub_ct)
ct<-strsplit(all_olap$peak_called_in, ",")
a<-c()
for (i in 1:length(ct)){
a<-c(a, all_olap$sub_ct[i] %in% ct[[i]])
}
neuron<-all_olap[which(all_olap$sub_ct=="Neuron"),]
neuron1<-neuron[grepl("Excitatory", neuron$peak_called_in) | grepl("Inhibitory", neuron$peak_called_in),]
all_agree<-all_olap[a,]
olap_k27<-c(neuron1,all_agree)
olap_k27$peak<-paste(seqnames(olap_k27), "-", start(olap_k27),"-", end(olap_k27), sep="")
a<-as.data.frame(olap_k27) %>% group_by(peak) %>% summarize(k27= paste(sort(unique(k27_ct)),collapse="&"))
peaks$peak<-paste(seqnames(peaks), "-", start(peaks),"-", end(peaks), sep="")
tmp<-merge(peaks, a, by="peak", all.x=T)
tmp$ATAC_num<-str_count(tmp$peak_called_in, ",")+1
tmp$k27_num<-str_count(tmp$k27, "&")+1
k27_over<-findOverlaps(all.k27, all_agree) # 247359 # 398596
peaks_over<-findOverlaps(peaks, all_agree) # 125746 # 386892
write.csv(tmp, "~/scMultiomics_AD/CTpeaks_wK27olap.csv")
If peak was called in a cell type, does it overlap H3K27ac of that cell type? For peaks that were called in multiple cell types, they may overlap H3K27ac of only one cell type. (ie. peak called in microglia and astrocytes but only overlaps microglia H3K27ac)
tmp<-read.csv("~/scMultiomics_AD/CTpeaks_wK27olap.csv")
olap<-GRanges(tmp)
Astrocytes<-olap[grepl("Astrocytes",olap$peak_called_in),]
k27_Astrocytes<-all.k27[which(all.k27$cell_type=="Astrocytes"),]
over<-findOverlaps(Astrocytes, k27_Astrocytes)
k27_Astrocytes$olap<-FALSE
k27_Astrocytes[subjectHits(over)]$olap<-TRUE
Astrocytes$found<-FALSE
Astrocytes[unique(queryHits(over))]$found<-T
Atab<-as.data.frame(table(Astrocytes$found))
Atab$ct<-"Astrocytes"
Atab$cat<-"ATAC"
Atab2<-as.data.frame(table(k27_Astrocytes$olap))
Atab2$ct<-"Astrocytes"
Atab2$cat<-"k27"
Atab<-rbind(Atab, Atab2)
Excitatory<-olap[grepl("Excitatory",olap$peak_called_in),]
k27_Excitatory<-all.k27[which(all.k27$cell_type %in% c("Neuron","GLU")),]
over<-findOverlaps(Excitatory, k27_Excitatory)
Excitatory$found<-F
Excitatory[unique(queryHits(over))]$found<-T
Etab<-as.data.frame(table(Excitatory$found))
Etab$ct<-"Excitatory"
Etab$cat<-"ATAC"
k27_Excitatory$olap<-FALSE
k27_Excitatory[subjectHits(over)]$olap<-TRUE
Etab2<-as.data.frame(table(k27_Excitatory$olap))
Etab2$ct<-"Excitatory"
Etab2$cat<-"k27"
Etab<-rbind(Etab, Etab2)
Inhibitory<-olap[grepl("Inhibitory",olap$peak_called_in),]
k27_Inhibitory<-reduce(all.k27[which(all.k27$cell_type %in% c("Neuron","GABA")),])
over<-findOverlaps(Inhibitory, k27_Inhibitory)
k27_Inhibitory$olap<-FALSE
k27_Inhibitory[subjectHits(over)]$olap<-TRUE
Inhibitory$found<-FALSE
Inhibitory[unique(queryHits(over))]$found<-TRUE
Itab<-as.data.frame(table( Inhibitory$found))
Itab$ct<-"Inhibitory"
Itab$cat<-"ATAC"
Itab2<-as.data.frame(table(k27_Inhibitory$olap))
Itab2$ct<-"Inhibitory"
Itab2$cat<-"k27"
Itab<-rbind(Itab, Itab2)
Oligodendrocytes<-olap[grepl("Oligodendrocytes",olap$peak_called_in),]
k27_Oligodendrocytes<-all.k27[which(all.k27$cell_type=="Oligodendrocytes"),]
over<-findOverlaps(Oligodendrocytes, k27_Oligodendrocytes)
Oligodendrocytes$found<-F
Oligodendrocytes[unique(queryHits(over))]$found<-T
Otab<-as.data.frame(table( Oligodendrocytes$found))
Otab$ct<-"Oligodendrocytes"
Otab$cat<-"ATAC"
k27_Oligodendrocytes$olap<-FALSE
k27_Oligodendrocytes[subjectHits(over)]$olap<-TRUE
Otab2<-as.data.frame(table(k27_Oligodendrocytes$olap))
Otab2$ct<-"Oligodendrocytes"
Otab2$cat<-"k27"
Otab<-rbind(Otab, Otab2)
Microglia<-olap[grepl("Microglia",olap$peak_called_in),]
k27_Microglia<-all.k27[which(all.k27$cell_type=="Microglia"),]
over<-findOverlaps(Microglia, k27_Microglia)
Microglia$found<-F
Microglia[unique(queryHits(over))]$found<-T
Mtab<-as.data.frame(table( Microglia$found))
Mtab$ct<-"Microglia"
Mtab$cat<-"ATAC"
k27_Microglia$olap<-FALSE
k27_Microglia[subjectHits(over)]$olap<-TRUE
Mtab2<-as.data.frame(table(k27_Microglia$olap))
Mtab2$ct<-"Microglia"
Mtab2$cat<-"k27"
Mtab<-rbind(Mtab, Mtab2)
df<-rbind(Atab, Etab, Itab, Otab, Mtab)
df$color<-paste0(df$ct,"-",df$Var1)
df$color<-paste0(df$cat,"-",df$Var1)
df$celltype<-substr(df$ct,start=1,stop=3)
df$color<-ifelse(df$Var1==F, "AA No Overlap",df$color)
pdf("~/scMultiomics_AD/CTpeaks_olapk27_barPlot2_filtered0.02_XY.pdf")
ggplot(df, aes(y=Freq, x=cat, fill=color))+geom_bar(color="grey60",stat="identity")+theme_classic()+ylab("# of Peaks") + xlab("")+scale_x_discrete(labels=c("ATAC", "H3K27ac"))+
theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17, angle=90), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15, angle=90))+scale_y_continuous(position = "right")+
facet_wrap(~celltype , ncol=5, strip.position="bottom") +scale_fill_manual(limits=c("AA No Overlap","ATAC-TRUE","k27-TRUE"),labels=c( "No Overlap","Overlap H3K27ac", "Overlap ATAC")
,values=c(alpha("grey80",0.5),alpha("darkslategray3",0.5),"darkslategray3"))+xlab("")
dev.off()
per<-list()
j=1
for (i in seq(2,nrow(df),by=2)){
per[[j]]<-df[i,2]/(df[i,2]+df[i-1,2])
j=j+1
}
num<-t(t(table(data$id)))
colnames(num)<-"Total"
ct<-t(t(table(data$id, data$predicted.id)))
r1<-aggregate(nCount_RNA~id, data@meta.data, mean)
r2<-aggregate(nFeature_RNA~id, data@meta.data, mean)
t1<-aggregate(percent.mt~id, data@meta.data, mean)
a1<-aggregate(nCount_ATAC~id, data@meta.data, mean)
a2<-aggregate(nFeature_ATAC~id, data@meta.data, mean)
t2<-aggregate(TSS.enrichment~id, data@meta.data, mean)
c<-cbind(r1,r2,t1,a1,a2,t2)
c<-c[,c(1,2,4,6,8,10,12)]
c$Total<-num
c$Astrocytes <-ct[,1]
c$Endothelial <-ct[,2]
c$Excitatory <-ct[,3]
c$Inhibitory <-ct[,4]
c$Microglia <-ct[,5]
c$Oligodendrocytes<-ct[,6]
c$OPCs <-ct[,7]
c$Pericytes <-ct[,8]
meta<-read.csv("~/22_restart/meta_RIN_PMI.csv")
meta$ID[16]<-"HCTZZT"
meta2<-merge(meta, c, by.x="ID",by.y="id")
write.csv(meta2, "~/scMultiomics_AD/Sample_stats.csv", quote=F, row.names=F)
extra<-read.csv("~/meta_RIN_PMI.csv")
extra[16,1]<-"HCTZZT"
meta<-data@meta.data
meta$X<-rownames(meta)
meta<-meta[,-which(names(meta) %in% c("Age","Braak"))]
md.m<-merge(meta, extra, by.x="id",by.y="ID")
rownames(md.m)<-md.m$X
data<-AddMetaData(data, md.m)
DefaultAssay(data)<-"PC"
all.genes<-rownames(data)
data<-NormalizeData(data)
Astrocytes<-subset(data, subset=predicted.id=="Astrocytes")
Inhibitory<-subset(data, subset=predicted.id=="Inhibitory")
Excitatory<-subset(data, subset=predicted.id=="Excitatory")
Oligodendrocytes<-subset(data, subset=predicted.id=="Oligodendrocytes")
OPCs<-subset(data, subset=predicted.id=="OPCs")
Microglia<-subset(data, subset=predicted.id=="Microglia")
subs<-list(Astrocytes,Inhibitory,Excitatory,Oligodendrocytes,OPCs,Microglia)
library(MAST)
i=1
Idents(subs[[i]])<-subs[[i]]$Status
subs[[i]]<-NormalizeData(subs[[i]])
ALL<-FindMarkers(subs[[i]], ident.1="AD",ident.2="Ctrl",min.pct=0.25,latent.vars=c("Age","Sex"),test.use="MAST", assay="PC")
ALL$celltype<-subs[[i]]$predicted.id[1]
ALL$gene<-rownames(ALL)
for (i in 2:length(subs)){
Idents(subs[[i]])<-subs[[i]]$Status
subs[[i]]<-NormalizeData(subs[[i]])%>% ScaleData(features=row.names(data))
markers<-FindMarkers(subs[[i]], ident.1="AD",ident.2="Ctrl",min.pct=0.25, latent.vars=c("Age","Sex"), test.use="MAST",
assay="PC")
markers$celltype<-subs[[i]]$predicted.id[1]
markers$gene<-rownames(markers)
ALL<-rbind(ALL,markers)
}
ALL$cat<-ifelse(ALL$avg_log2FC >0, "up","down")
significant<-ALL[which(abs(ALL$avg_log2FC)>0.25),]
significant<-significant[which(significant$p_val_adj<0.01),]
write.csv(significant, "~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
# # # # all CT
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
library(enrichR)
setEnrichrSite("Enrichr")
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")
ast_down<-enrichr(degs[which(degs$celltype=="Astrocytes" & degs$cat=="down"),]$gene, dbs)
ast_up<-enrichr(degs[which(degs$celltype=="Astrocytes" & degs$cat=="up"),]$gene, dbs)
mic_down<-enrichr(degs[which(degs$celltype=="Microglia" & degs$cat=="down"),]$gene, dbs)
mic_up<-enrichr(degs[which(degs$celltype=="Microglia" & degs$cat=="up"),]$gene, dbs)
ex_down<-enrichr(degs[which(degs$celltype=="Excitatory" & degs$cat=="down"),]$gene, dbs)
ex_up<-enrichr(degs[which(degs$celltype=="Excitatory" & degs$cat=="up"),]$gene, dbs)
in_down<-enrichr(degs[which(degs$celltype=="Inhibitory" & degs$cat=="down"),]$gene, dbs)
in_up<-enrichr(degs[which(degs$celltype=="Inhibitory" & degs$cat=="up"),]$gene, dbs)
oli_down<-enrichr(degs[which(degs$celltype=="Oligodendrocytes" & degs$cat=="down"),]$gene, dbs)
oli_up<-enrichr(degs[which(degs$celltype=="Oligodendrocytes" & degs$cat=="up"),]$gene, dbs)
opc_down<-enrichr(degs[which(degs$celltype=="OPCs" & degs$cat=="down"),]$gene, dbs)
opc_up<-enrichr(degs[which(degs$celltype=="OPCs" & degs$cat=="up"),]$gene, dbs)
# down
a<-ast_down[["GO_Biological_Process_2021"]]
b<-mic_down[["GO_Biological_Process_2021"]]
c<-ex_down[["GO_Biological_Process_2021"]]
d<-in_down[["GO_Biological_Process_2021"]]
e<-oli_down[["GO_Biological_Process_2021"]]
f<-opc_down[["GO_Biological_Process_2021"]]
a$celltype<-"Astrocytes"
b$celltype<-"Microglia"
c$celltype<-"Excitatory"
d$celltype<-"Inhibitory"
e$celltype<-"Oligodendrocytes"
f$celltype<-"OPC"
df<-rbind(a,b,c,d,e,f)
df2<-df[which(df$Adjusted.P.value<0.05),]
df2<-df2[order(df2$Adjusted.P.value),]
top<-df2[c(1:10),1]
df2_BP_down<-df2
df2<-df[which(df$Adjusted.P.value<0.01),]
# write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_downAD_across_ct_MAST.csv")
write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_downAD_across_ct_MAST.csv")
t<-table(df2_BP_down$Term)
topDown<-head(df2$Term, n=10)
topDown<-df2 %>% group_by(celltype) %>% top_n(n=2, wt=Combined.Score)
topDown<-topDown$Term
tD<-df[which(df$Term %in% topDown),]
mat<-matrix(nrow=9, ncol=6)
colnames(mat)<-unique(df$celltype)
for (j in 1:9){
for (i in 1:6){
termTmp<-unique(topDown)[j]
ctTmp<-colnames(mat)[i]
sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
if (nrow(sub)>0){
mat[j,i]<-sub$Odds.Ratio
}
else{
mat[j,i]<-0
}
}
}
rownames(mat)<-sapply(strsplit(unique(topDown), " (", fixed=T), `[`, 1)
colnames(mat)<-substr(colnames(mat),start=1,stop=3)
sig_mat<-matrix(nrow=9, ncol=6)
colnames(sig_mat)<-unique(df$celltype)
for (j in 1:9){
for (i in 1:6){
termTmp<-unique(topDown)[j]
ctTmp<-colnames(sig_mat)[i]
sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
if (nrow(sub)>0){
sig_mat[j,i]<-sub$Adjusted.P.value}
else{
sig_mat[j,i]<-1} }}
mat<-mat[,c(1,3,4,2,5,6)]
sig_mat<-sig_mat[,c(1,3,4,2,5,6)]
ha2<-HeatmapAnnotation(celltype=colnames(mat)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")
ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,35,100),c("grey95","deepskyblue","dodgerblue4")), top_annotation=ha2, name="Odds.Ratio", show_column_names=T, show_row_names=T, column_title=NULL,row_names_side="left", row_title_gp=gpar(fontsize=14),row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
), cell_fun = function(j, i, x, y, w, h, fill) {
if(sig_mat[i, j] <0.05) {
grid.text("*", x, y, gp=gpar(fontsize=20, col="white"), vjust="center")
} })
pdf("~/scMultiomics_AD/Top_downDEG_GObp.pdf", width=13, height=5)
ht
dev.off()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# BP UP
a<-ast_up[["GO_Biological_Process_2021"]]
b<-mic_up[["GO_Biological_Process_2021"]]
c<-ex_up[["GO_Biological_Process_2021"]]
d<-in_up[["GO_Biological_Process_2021"]]
e<-oli_up[["GO_Biological_Process_2021"]]
f<-opc_up[["GO_Biological_Process_2021"]]
a$celltype<-"Astrocytes"
b$celltype<-"Microglia"
c$celltype<-"Excitatory"
d$celltype<-"Inhibitory"
e$celltype<-"Oligodendrocytes"
f$celltype<-"OPC"
df<-rbind(a,b,c,d,e,f)
df_all<-df
df2<-df[which(df$Adjusted.P.value<0.05),]
df2<-df2[order(df2$Adjusted.P.value),]
top<-df2[c(1:10),1]
write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_upAD_across_ct_MAST.csv")
# up
t<-table(df2$Term)
topup<-head(df2$Term, n=10)
topup<-df2 %>% group_by(celltype) %>% top_n(n=5, wt=Combined.Score)
topup<-topup[order(topup$celltype),]
topup<-topup$Term
tD<-df[which(df$Term %in% topup),]
mat<-matrix(nrow=length(unique(topup)), ncol=6)
colnames(mat)<-unique(df$celltype)
for (j in 1:length(unique(topup))){
for (i in 1:6){
termTmp<-unique(topup)[j]
ctTmp<-colnames(mat)[i]
sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
if (nrow(sub)>0){
mat[j,i]<- sub$Odds.Ratio
}
else{
mat[j,i]<-0
}
}
}
rownames(mat)<-sapply(strsplit(unique(topup), " (", fixed=T), `[`, 1)
colnames(mat)<-substr(colnames(mat),start=1,stop=3)
sig_mat<-matrix(nrow=length(unique(topup)), ncol=6)
colnames(sig_mat)<-unique(df$celltype)
for (j in 1:length(unique(topup))){
for (i in 1:6){
termTmp<-unique(topup)[j]
ctTmp<-colnames(sig_mat)[i]
sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
if (nrow(sub)>0){
sig_mat[j,i]<-sub$Adjusted.P.value}
else{
sig_mat[j,i]<-1} }}
mat<-mat[,c(1,3,4,2,5,6)]
sig_mat<-sig_mat[,c(1,3,4,2,5,6)]
ha2<-HeatmapAnnotation(celltype=colnames(mat)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")
ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,35,100),c("grey95","red","red4")), top_annotation=ha2, name="Odds.Ratio", show_column_names=T, show_row_names=T, column_title=NULL,row_names_side="left", row_title_gp=gpar(fontsize=14),row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
), cell_fun = function(j, i, x, y, w, h, fill) {
if(sig_mat[i, j] <0.05) {
grid.text("*", x, y, gp=gpar(fontsize=20, col="white"), vjust="center")
} })
pdf("~/scMultiomics_AD/Top_upDEG_GObp.pdf", width=10, height=5)
ht
dev.off()
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
tab<-table(degs$celltype, degs$cat)
mat<-matrix(nrow=length(unique(degs$gene)),ncol=6)
rownames(mat)<-unique(degs$gene)
colnames(mat)<-unique(degs$celltype)
for (i in 1:nrow(mat)){
for (j in 1:ncol(mat)){
gene_tmp<-degs[which(degs$gene==rownames(mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$celltype==colnames(mat)[j]),]
mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$avg_log2FC,0)
}
}
colnames(mat)<-substr(colnames(mat),start=1,stop=3)
# # all labels
noDup<-degs[!duplicated(degs$gene),]
mat<-mat[order(noDup$celltype, noDup$avg_log2FC),]
mat<-mat[,c(1,3,2,6,4,5)]
ha2<-HeatmapAnnotation(celltype=colnames(mat), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
bar1<-HeatmapAnnotation(up=anno_barplot(tab[,2], gp=gpar(fill="red"), axis_param=list(labels=c("","","","")), ylim=c(0,280)),
down=anno_barplot((tab[,1] * -1), gp=gpar(fill="blue"), axis_param=list(labels=c("","","")), ylim=c(-280,0),
show_annotation_name=F,
show_legend=F))
ha<-c( bar1, ha2)
# #
#
# #
ad_related<-read.csv("~/AD_gwas_genes_012822.csv")
noDup<-noDup[order(noDup$celltype, noDup$avg_log2FC),]
lab<-which(noDup$gene %in% c(ad_related$Gene,"PLCG2","MAPT","ARL17B","SAMD4A","PTPRG","MDGA2","GPR158") & abs(noDup$avg_log2FC)>0.5)
ht=Heatmap(mat, cluster_rows=F,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-1,0,1),c("blue","white","red")), name="MAST-adj log2FC", show_column_names=T, show_row_names=F, column_title=NULL,row_names_gp = gpar(fontsize = 3), top_annotation=ha)
pdf("~/scMultiomics_AD/AD_DEGs_MAST_heatmap_foldCs.pdf", width=6, height=8)
rowAnnotation(gene=anno_mark(at=lab, labels=noDup[lab,]$gene, side="left", labels_gp=gpar(fontsize=10)))+ht
dev.off()
Add labels for specific genes
genes<-read.csv("~/scMultiomics_AD/testing.csv", col.names=1)
genes$X1<-gsub(" ","", genes$X1)
tmp<-strsplit(genes$X1, ",")
tmp<-unlist(tmp)
write.csv(tmp, "~/scMultiomics_AD/Luciferase_linked_genes.csv")
# back to cluster
luc<-read.csv("~/scMultiomics_AD/Luciferase_linked_genes.csv")
toTest<-toTest[which(toTest$gene != "PVR"),]
all_connect_genes<-subsetByOverlaps(l2, toTest)
show_genes<-unique(c(luc$x, all_connect_genes$gene))
noDup<-noDup[order(noDup$celltype, noDup$avg_log2FC),]
lab<-which(noDup$gene %in% c(ad_related$Gene,"PLCG2","MAPT","ARL17B", show_genes,
"ACSL1", "CD163", "CD44", "CR1", "CEMIP2", "DAB2", "FOXP1", "LYVE1", "MAFB", "MITF", "MYO5A", "NOTCH2", "SLC11A1", "TGFBI"))
pdf("~/scMultiomics_AD/AD_DEGs_MAST_heatmap_foldCs_wTestingLabel_otherColorScale2.pdf", width=6, height=8)
rowAnnotation(gene=anno_mark(at=lab, labels=noDup[lab,]$gene, side="left", labels_gp=gpar(fontsize=10)))+ht
dev.off()
# MATHYS
m1<-read.csv("~/mathys_data/MathysDE_Ast.csv")
m2<-read.csv("~/mathys_data/MathysDE_Ex.csv")
m3<-read.csv("~/mathys_data/MathysDE_In.csv")
m4<-read.csv("~/mathys_data/MathysDE_Microglia.csv")
m5<-read.csv("~/mathys_data/MathysDE_Oli.csv")
m6<-read.csv("~/mathys_data/MathysDE_opc.csv")
m1$celltype<-"Astrocytes"
m2$celltype<-"Excitatory"
m3$celltype<-"Inhibitory"
m4$celltype<-"Microglia"
m5$celltype<-"Oligodendrocytes"
m6$celltype<-"OPCs"
mat_degs<-rbind(m1,m2,m3,m4,m5,m6)
# mat_degs<-mat_degs[which(abs(as.numeric(mat_degs$IndModel.FC))>0.2 & as.numeric(mat_degs$IndModel.adj.pvals)<0.01),]
mat_degs<-mat_degs[which(mat_degs$DEGs.Ind.Model==T | mat_degs$DEGs.Ind.Mix.models==T),]
mat_v_our<-merge(degs, mat_degs, by=c("gene","celltype"), all.x=T)
mat_v_our$group<-ifelse(is.na(mat_v_our$p_val)==T, "Mathys",
ifelse(is.na(mat_v_our$DEGs.Ind.Model)==T, "Ours","Shared"))
mat_v_our<-merge(degs, mat_degs, by=c("gene","celltype"))
mat_v_our$IndModel.FC<-as.numeric(mat_v_our$IndModel.FC)
sum(sign(mat_v_our$avg_log2FC)==sign(mat_v_our$IndModel.FC))
# MORABITO
mor_degs<-read.csv("~/morabito/morabito_degs.csv")
mor_degs<-mor_degs[which(mor_degs$celltype!="PER.END"),]
mor_degs$celltype<-ifelse(mor_degs$celltype=="ASC", "Astrocytes",
ifelse(mor_degs$celltype=="EX", "Excitatory",
ifelse(mor_degs$celltype=="INH", "Inhibitory",
ifelse(mor_degs$celltype=="MG","Microglia",
ifelse(mor_degs$celltype=="ODC", "Oligodendrocytes","OPCs")))))
mor_v_our<-merge(degs, mor_degs, by=c("gene","celltype"), all.x=T)
mor_v_our$group<-ifelse(is.na(mor_v_our$p_val.x)==T, "Morabito",
ifelse(is.na(mor_v_our$p_val.y)==T, "Ours","Shared"))
vv<-merge(mor_degs, mat_degs, by=c("gene","celltype"),all=T)
vv<-merge(vv, degs, by=c("gene","celltype"),all=T)
vv$Mat<-ifelse(is.na(vv$IndModel.adj.pvals)==T, F,T)
vv$Mor<-ifelse(is.na(vv$p_val.x)==T, F,T)
vv$Our<-ifelse(is.na(vv$p_val.y)==T, F, T)
x<-list(Mor=which(vv$Mor==T), Mat=which(vv$Mat==T), Our=which(vv$Our==T))
pdf("~/scMultiomics_AD/Mathys_morabito_our_degs.pdf")
ggVennDiagram(x)+theme(legend.position="none")
dev.off()
x<-vv[,24:26]
nrow(x[which(x$Mor==T & x$Mat==F & x$Our==F),]) # 2562
nrow(x[which(x$Mor==F & x$Mat==T & x$Our==F),]) # 5716
nrow(x[which(x$Mor==F & x$Mat==F & x$Our==T),]) # 666
nrow(x[which(x$Mor==T & x$Mat==T & x$Our==F),]) # 182
nrow(x[which(x$Mor==T & x$Mat==F & x$Our==T),]) # 280
nrow(x[which(x$Mor==F & x$Mat==T & x$Our==T),]) # 84
nrow(x[which(x$Mor==T & x$Mat==T & x$Our==T),]) # 61
dat<-c("Morabito"=2562, "Mathys"=5716,"Ours"=666,"Morabito&Mathys"=182, "Morabito&Ours"=280, "Mathys&Ours"=84, "Morabito&Mathys&Ours"=62)
pdf("~/scMultiomics_AD/Mathys_morabito_Our_degs_venn.pdf", width=4, height=4)
v=venn(dat)
plot(v)
dev.off()
DefaultAssay(Astrocytes)<-"PC"
Astrocytes <- SCTransform(Astrocytes,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30)
Astrocytes<-RunUMAP(Astrocytes,reduction="sub.harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
# atac
DefaultAssay(Astrocytes)<-"CTpeaks"
Astrocytes<-RunTFIDF(Astrocytes) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
Astrocytes<-RunUMAP(Astrocytes,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
# wnn
Astrocytes<-FindMultiModalNeighbors(Astrocytes, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
Astrocytes<-RunUMAP(Astrocytes,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
Astrocytes<-FindClusters(Astrocytes, graph.name="wknn", algorithm=1, resolution=0.3)
Astrocytes$subs<-paste0("Ast_", Astrocytes$seurat_clusters)
pdf("~/scMultiomics_AD/Ast_subclusters.pdf")
DimPlot(Astrocytes, group.by="subs",reduction="wnn.umap", cols=c("Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1"), pt.size=2, label=T)
dev.off()
Astrocytes<-NormalizeData(Astrocytes)
DEGs<-FindAllMarkers(Astrocytes,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")
Supp Fig 5
degs_in<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")
degs_in<-degs_in[which(degs_in$cat=="up"),]
degs_in<-degs_in[order(degs_in$p_val_adj),]
degs_in<-degs_in[which(abs(degs_in$avg_log2FC)>0.55 & degs_in$p_val_adj<0.01 ),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021")
In<-list()
for (i in 1:5){
In[[i]]<-enrichr(degs_in[which(degs_in$cluster== i-1),]$gene, dbs)
}
a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]
a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
df<-rbind(a,b,c,d,e)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Ast_subclusters_GO_BP.csv")
# switch back to BP
top<-df %>% group_by(i) %>% top_n(n=3, wt=Combined.Score)
top<-top[which(top$Adjusted.P.value<0.01),]
# top2.term<-top[which(top$i !=1),]$Term
term<-top$Term
mat<-matrix(nrow=length(unique(term)),ncol=5)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)
for (i in 1:nrow(mat)){
for (j in 1:ncol(mat)){
gene_tmp<-df[which(df$Term==rownames(mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
}
}
mat<-mat[which(rownames(mat) %in% top2.term),]
rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn
sig_mat<-matrix(nrow=length(unique(term)),ncol=5)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
for (j in 1:ncol(sig_mat)){
gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
}
}
colnames(mat)<-paste0("Ast_", colnames(mat))
ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,60),c("white", "# d3eff5","# 00518a")), name="Odds Ratio", show_column_names=T, show_row_names=T, column_title=NULL, row_names_side = "left",row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
), cell_fun = function(j, i, x, y, w, h, fill) {
if(sig_mat[i, j] >0) {
grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
} })
library(stringr)
pdf("~/scMultiomics_AD/Ast_subcluster_GObp.pdf", width=8, height=4)
ht
dev.off()
tab<-prop.table(table(subs[[1]]$subs, subs[[1]]$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
stat.test <- df %>%
group_by(Var2) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")
# 0.192
# 0.0649
# 0.48
# 0.358
# 0.534
Supp Fig 5
meta<-Astrocytes@meta.data
tab<-prop.table(table(meta$cluster, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 5)
Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)
lev<-levels(df$Var2)
df$Var2<-factor(df$Var2, levels=lev)
tab2<-as.data.frame(table(meta$cluster))
tab2.m<-merge(tab2, cols.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)
tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]
df<-df[order(match(df$Var2, lev2)),]
pdf("~/scMultiomics_AD/Ast_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="none")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))
p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Num. Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)
p1+p2
dev.off()
olig_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")
olig_degs<-olig_degs[which(olig_degs$cat=="up"),]
top <- olig_degs %>% group_by(cluster) %>% top_n(n=10, wt=avg_log2FC)
top$clust<-paste("Ast_", top$cluster, sep="")
avg_Exp<-as.data.frame(AverageExpression(Astrocytes, features=top$gene, group.by="cluster", assay="PC"))
colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
mat_scaled<-t(apply(as.matrix(avg_Exp),1,scale))
colnames(mat_scaled)<-colnames(avg_Exp)
ha<-HeatmapAnnotation(cluster=colnames(mat_scaled), show_legend=F, col=list(cluster=c("Ast_0"="darkgoldenrod1", "Ast_1"="darkgoldenrod3", "Ast_2"="gold2", "Ast_3"="goldenrod", "Ast_4"="goldenrod1")))
ra<-rowAnnotation(cluster=top$clust, show_legend=F, col=list(cluster=c("Ast_0"="darkgoldenrod1", "Ast_1"="darkgoldenrod3","Ast_2"="gold2", "Ast_3"="goldenrod", "Ast_4"="goldenrod1")))
ht=Heatmap(mat_scaled,cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)
pdf("~/scMultiomics_AD/Ast_top10DEG_heatmap.pdf", width=4, height=6)
ht
dev.off()
This analysis was not shown in publication
sad<-read.csv("~/Sadick/Sadick_Ast_subDEGs.csv")
sad<-sad[which(sad$p_val_adj<0.01 & sad$avg_log2FC>0.35),]
top<-sad %>% group_by(LEN_so_astro_r2_cluster) %>% top_n(n=30, wt=avg_log2FC)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==0),]$gene), name="Clust0_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==1),]$gene), name="Clust1_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==2),]$gene), name="Clust2_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==3),]$gene), name="Clust3_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==4),]$gene), name="Clust4_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==5),]$gene), name="Clust5_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==6),]$gene), name="Clust6_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==7),]$gene), name="Clust7_score", search=T)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==8),]$gene), name="Clust8_score", search=T)
pdf("~/scMultiomics_AD/topick_Astrocytes_subclusters.pdf")
FeaturePlot(Astrocytes, reduction="wnn.umap", features=c("Clust0_score1","Clust1_score1","Clust2_score1","Clust3_score1","Clust4_score1","Clust5_score1","Clust6_score1","Clust7_score1","Clust8_score1"))
dev.off()
DefaultAssay(Inhibitory)<-"PC"
Inhibitory <- SCTransform(Inhibitory,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30)
# Inhibitory<-RunHarmony(Inhibitory, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="sub.harmony.rna", lambda=0.8)
Inhibitory<-RunUMAP(Inhibitory,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
# atac
DefaultAssay(Inhibitory)<-"CTpeaks"
Inhibitory<-RunTFIDF(Inhibitory) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
Inhibitory<-RunUMAP(Inhibitory,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
# wnn
Inhibitory<-FindMultiModalNeighbors(Inhibitory, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
Inhibitory<-RunUMAP(Inhibitory,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
Inhibitory<-FindClusters(Inhibitory, graph.name="wknn", algorithm=1, resolution=0.2)
Inhibitory$batch<-ifelse(Inhibitory$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")
Inhibitory$subs<-paste0("Inh_", Inhibitory$seurat_clusters)
pdf("~/scMultiomics_AD/Inh_subclusters.pdf")
DimPlot(Inhibitory, group.by="subs",reduction="wnn.umap", cols=c( "Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1"), pt.size=2, label=T)
dev.off()
DEGs<-FindAllMarkers(Inhibitory,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")
degs_in<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")
degs_in<-degs_in[which(degs_in$cat=="up"),]
degs_in<-degs_in[order(degs_in$p_val_adj),]
dbs<-"Azimuth_Cell_Types_2021"
# dbs<-"Allen_Brain_Atlas_10x_scRNA_2021"
In<-list()
for (i in 1:8){
In[[i]]<-enrichr(degs_in[which(degs_in$cluster== i-1),]$gene, dbs)
}
head(In[[i]][["Azimuth_Cell_Types_2021"]])
pdf("~/scMultiomics_AD/IN_marker_vlnplot_2.pdf", width=15,height=20)
VlnPlot(Inhibitory, features=c("PVALB","SST","VIP","LAMP5", "SNCG"), pt.size=0.5)
dev.off()
cluster.ids<-c("PVALB+",
"SST+",
"VIP+",
"SNCG+",
"LAMP5+ 1",
"Chandelier",
"LAMP5+ 2",
"SST+ KLHL1+" )
tmp<-Inhibitory$seurat_clusters
names(cluster.ids)<-levels(Inhibitory)
Inhibitory<-RenameIdents(Inhibitory,cluster.ids)
Inhibitory$azimuth<-Idents(Inhibitory)
Inhibitory$seurat_clusters<-tmp
Idents(Inhibitory)<-Inhibitory$seurat_clusters
# Inhibitory$azimuth<-factor(Inhibitory$azimuth,levels=levels(Inhibitory)[order(levels(Inhibitory))])
pdf("~/scMultiomics_AD/Azimuth_In_layers2.pdf", width=7, height=5)
DimPlot(Inhibitory, reduction="wnn.umap",group.by="azimuth", cols=c(
"PVALB+"="palegreen3",
"SNCG+"="gold",
"VIP+"="firebrick1",
"LAMP5+ 1"="orange",
"LAMP5+ 2"="darkorange3",
"Chandelier"="mediumorchid",
"SST+"="deepskyblue2",
"SST+ KLHL1+"="skyblue1" )
)+ggtitle("GABAergic Subtypes")
dev.off()
Inhibitory$cluster<-paste("Inh_", Inhibitory$seurat_clusters, ": ", Inhibitory$azimuth,)
pdf("~/scMultiomics_AD/Azimuth_In_layers2.pdf", width=7, height=5)
DimPlot(Inhibitory, reduction="wnn.umap",group.by="cluster", cols=c(
"Inh_0: Pvalb+" =cols.df[17,1],
"Inh_1: VIP+" =cols.df[18,1],
"Inh_2: Sst+ RPL35AP11+"=cols.df[19,1],
"Inh_3: LAMP5+" =cols.df[20,1],
"Inh_4: LAMP5+ AARD+" =cols.df[21,1],
"Inh_5: Chandelier" =cols.df[22,1],
"Inh_6: Sst+ KLHL1+" =cols.df[23,1],
"Inh_7: Sst+ Chodl+" =cols.df[24,1]))+ggtitle("GABAergic Subtypes")
dev.off()
pdf("~/scMultiomics_AD/In_marker_expression_plot.pdf")
VlnPlot(Inhibitory, features=c("PVALB","VIP","LAMP5","SST","SNCG"), assay="PC", pt.size=0.5) & scale_fill_manual(values=c(
"Pvalb+"="palegreen",
"VIP+"="firebrick1",
"LAMP5+"="sienna",
"LAMP5+ AARD+"="coral",
"Chandelier"="mediumorchid",
"Sst+ RPL35AP11+"="deepskyblue2",
"Sst+ KLHL1+"="deepskyblue4",
"Sst+ Chodl+"="skyblue1" ))
dev.off()
pdf("~/scMultiomics_AD/In_marker_featPlot.pdf")
FeaturePlot(Inhibitory, reduction="wnn.umap", features=c("PVALB","VIP","LAMP5","SST"), max.cutoff=1)&scale_color_viridis()
dev.off()
Inhibitory$cluster<-paste("Inh_", Inhibitory$seurat_clusters)
pdf("~/scMultiomics_AD/IN_marker_dotplot.pdf", width=6, height=8)
DotPlot(Inhibitory, features=c("PVALB","VIP","LAMP5","SST"), col.min=0, col.max=1.5, dot.scale=20, group.by="cluster")+scale_color_viridis()
dev.off()
# # IN
markers<-data.frame(SST= c("GRID2","RALYL","COL25A1","SST","GRIK1","NXPH1","SOX6","TRHDE","SLC8A1","ST6GALNAC5"),
VIP= c("SYNPR","SLC24A3","GRM7","KCNT2","GALNTL6","THSD7A","ADARB2","VIP","LRP1B","ERBB4"),
PVALB=c("DPP10","SDK1","ERBB4","ADAMTS17","ZNF804A","BTBD11","SLIT2","MYO16","GRIA4","PVALB"),
LAMP5=c("LAMP5","PTPRT","PRELID2","EYA4","PTCHD4","FGF13","FBXL7","MYO16","GRIA4","RELN"),
Chand=c("ADAMTS5","PTHLH","NOG","COL15A1","PLEKHH2","COL21A1","NPNT","SRGAP1","HTR1F","FAM19A4"), SNCG=c("MAML3", "CNR1", "SNCG", "CNTN5", "ADARB2","CXCL14", "SLC8A1", "FSTL5", "NPAS3", "ASIC2"))
markers$seq<-seq(1, nrow(markers))
markers_melt<-melt(markers, id="seq")
avg_Exp<-as.data.frame(AverageExpression(Inhibitory, features=markers_melt$value,assay="PC", group.by="subs"))
colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
avg_Exp<-as.matrix(avg_Exp)
markers_melt<-markers_melt[which(markers_melt$value %in% rownames(avg_Exp)),]
markers_melt<-markers_melt[!duplicated(markers_melt$value),]
mat_scaled<-t(apply(avg_Exp,1,scale))
# mat_scaled<-apply(avg_Exp,2,scale)
colnames(mat_scaled)<-colnames(avg_Exp)
ha<-HeatmapAnnotation(celltype=colnames(avg_Exp),col= list(celltype=c( "Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1")), show_legend=F)
ha2<-HeatmapAnnotation(subtype=c("PVALB+","SST+","VIP+", "SNCG+", "LAMP5+", "Chandelier","LAMP5+", "SST+ KLHL1+") , col=list(subtype=c("PVALB+"="green","VIP+"="orange", "LAMP5+"="dodgerblue", "Chandelier"="purple","SST+"="red","SST+ KLHL1+"="red3", "SNCG+"="yellow")), show_legend=T)
ha2<-c(ha,ha2)
ra<-rowAnnotation(subtype=markers_melt$variable, col=list(subtype=c("SST"="red","VIP"="orange","PVALB"="green","LAMP5"="dodgerblue","Chand"="purple", "SNCG"="yellow")))
ht=Heatmap(mat_scaled,cluster_rows=T,cluster_columns=T,col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha2, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_split=markers_melt$variable,row_title_gp=gpar(fontsize=14),row_names_gp=gpar(fontsize=7))
pdf("~/scMultiomics_AD/In_Subtype_heatmap.pdf", width=5,height=6)
ht
dev.off()
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")
# 0 1 2 3 4 5 6 7
# 197 181 228 182 296 229 254 178
In<-list()
for (i in 1:8){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}
a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]
f<-In[[6]][["GO_Biological_Process_2021"]]
g<-In[[7]][["GO_Biological_Process_2021"]]
h<-In[[8]][["GO_Biological_Process_2021"]]
a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
f$i<-5
g$i<-6
h$i<-7
df<-rbind(a,b,c,d,e,f,g,h)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Inh_subclusters_GO_BP.csv")
top<-df %>% group_by(i) %>% top_n(n=2, wt=-Adjusted.P.value)
term<-top$Term
term<-sort(unique(term))
mat<-matrix(nrow=length(unique(term)),ncol=8)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)
for (i in 1:nrow(mat)){
for (j in 1:ncol(mat)){
gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
}
}
rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn
sig_mat<-matrix(nrow=length(unique(term)),ncol=8)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
for (j in 1:ncol(sig_mat)){
gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
}
}
colnames(mat)<-paste0("Inh_", colnames(mat))
ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,15),c("white", "# d3eff5","# 00518a")), name="Odds Ratio", show_column_names=T, show_row_names=T, column_title=NULL, row_names_side = "left",row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
), cell_fun = function(j, i, x, y, w, h, fill) {
if(sig_mat[i, j] >0) {
grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
} })
library(stringr)
pdf("~/scMultiomics_AD/Inh_subcluster_GObp.pdf", width=8, height=4)
ht
dev.off()
tab<-prop.table(table(Inhibitory$azimuth, Inhibitory$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
tests<-list()
for (i in 1:nrow(tab)){
ct<-rownames(tab)[i]
df.tmp<-df[which(df$Var2==ct),]
tests[[i]]<-t.test(df.tmp[which(df.tmp$Status=="Ctrl"),]$Freq, df.tmp[which(df.tmp$Status=="AD"),]$Freq)$p.value
}
# Chandelier 0.4980
# Lamp5+ 0.0417
# Pvalb+ 0.0415
# Sncg+ 0.5030
# Sst+ Chodl+ 0.3860
# Sst+ KLHL1+ 0.4400
# Sst+ RPL35AP11+ 0.3100
# VIP+ 0.1710
stat.test <- df %>%
group_by(Var2) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")
pdf("~/scMultiomics_AD/In_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()
AD Ctrl
Inh_0 2043 2104 Inh_1 1571 2096 Inh_2 1836 1785 Inh_3 916 1047 Inh_4 564 598 Inh_5 341 372 Inh_6 305 380 Inh_7 154 219
DefaultAssay(Excitatory)<-"PC"
Excitatory <- SCTransform(Excitatory,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30)
Excitatory<-RunUMAP(Excitatory,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
# atac
DefaultAssay(Excitatory)<-"CTpeaks"
Excitatory<-RunTFIDF(Excitatory) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
Excitatory<-RunUMAP(Excitatory,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
# wnn
Excitatory<-FindMultiModalNeighbors(Excitatory, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
Excitatory<-RunUMAP(Excitatory,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
Excitatory<-FindClusters(Excitatory, graph.name="wknn", algorithm=1, resolution=0.)
Excitatory$batch<-ifelse(Excitatory$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")
Excitatory<-FindClusters(Excitatory, graph.name="wknn", algorithm=1, resolution=0.15)
Excitatory$subs<-paste0("Exc_", Excitatory$seurat_clusters)
Excitatory2<-subset(Excitatory, subset=seurat_clusters==10, invert=T)
pdf("~/scMultiomics_AD/Exc_subclusters.pdf")
DimPlot(Excitatory2, group.by="subs",reduction="wnn.umap", cols=c( "Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3"), pt.size=2, label=T)
dev.off()
DEGs<-FindAllMarkers(Excitatory2,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
# # Excitatory subtypes
# same as large subcluster DEG list but this ran first
degs_ex<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
degs_ex<-degs_ex[which(degs_ex$cat=="up"),]
degs_ex<-degs_ex[order(degs_ex$gene,degs_ex$p_val_adj),]
dbs<-"Azimuth_Cell_Types_2021"
ex<-list()
for (i in 1:10){
ex[[i]]<-enrichr(degs_ex[which(degs_ex$cluster== i-1),]$gene, dbs)
}
i=i+1
head(ex[[i]][["Azimuth_Cell_Types_2021"]])
Excitatory<-subset(Excitatory, seurat_clusters==10, invert=T)
cluster.ids<-c("L2-3 Intratelencephalon-Projecting 1",
"L2-3 Intratelencephalon-Projecting 2",
"L5 Intratelencephalon-Projecting 2",
"L5 Intratelencephalon-Projecting 1",
"L6 THEMIS+ LINC00343+",
"L6 Corticothalamic-Projecting",
"L5 Intratelencephalon-Projecting 3",
"L6 Car3+ Intratelencephalon-Projecting",
"L5-6 Near-Projecting",
"L5 Extratelencephalon-Projecting" )
tmp<-Excitatory$seurat_clusters
names(cluster.ids)<-levels(Excitatory)
Excitatory<-RenameIdents(Excitatory,cluster.ids)
Excitatory$azimuth<-Idents(Excitatory)
Excitatory$seurat_clusters<-tmp
# Idents(Excitatory)<-Excitatory$seurat_clusters
Excitatory$azimuth<-factor(Excitatory$azimuth,levels=levels(Excitatory)[order(levels(Excitatory))])
Excitatory$cluster<-paste("Exc_",Excitatory$seurat_clusters,": ",Excitatory$azimuth, sep="")
pdf("~/scMultiomics_AD/Azimuth_Ex_layers.pdf", width=8, height=5)
DimPlot(Excitatory, reduction="wnn.umap", group.by="azimuth", pt.size=0.2, cols=c(
"L2-3 Intratelencephalon-Projecting 1" ="firebrick1",
"L2-3 Intratelencephalon-Projecting 2"="firebrick3",
"L5 Intratelencephalon-Projecting 1"="gold",
"L5 Intratelencephalon-Projecting 2"="goldenrod3",
"L5 Intratelencephalon-Projecting 3"="darkgoldenrod4",
"L5 Extratelencephalon-Projecting"="khaki1",
"L5-6 Near-Projecting"="turquoise",
"L6 THEMIS+ LINC00343+"="deepskyblue",
"L6 Corticothalamic-Projecting"="blue",
"L6 Car3+ Intratelencephalon-Projecting"="cadetblue3"
))+ggtitle("Glutamatergic Subtypes")
dev.off()
pdf("~/scMultiomics_AD/Azimuth_Ex_layers.pdf", width=8, height=5)
DimPlot(subs[[3]], reduction="wnn.umap", group.by="cluster", pt.size=0.2, cols=c(
"Exc_0: L2-3 Intratelencephalon-Projecting 1" =cols.df[7,1],
"Exc_1: L2-3 Intratelencephalon-Projecting 2"=cols.df[8,1],
"Exc_2: L5 Intratelencephalon-Projecting 2"=cols.df[9,1],
"Exc_3: L5 Intratelencephalon-Projecting 1"=cols.df[10,1],
"Exc_4: L6 THEMIS+ LINC00343+"=cols.df[11,1],
"Exc_5: L6 Corticothalamic-Projecting"=cols.df[12,1],
"Exc_6: L5 Intratelencephalon-Projecting 3"=cols.df[13,1],
"Exc_7: L6 Car3+ Intratelencephalon-Projecting"=cols.df[14,1],
"Exc_8: L5-6 Near-Projecting"=cols.df[15,1],
"Exc_9: L5 Extratelencephalon-Projecting"=cols.df[16,1]
))+ggtitle("Glutamatergic Subtypes")
dev.off()
subs[[3]]$label<-paste("Exc_", subs[[3]]$seurat_clusters, sep="")
pdf("~/scMultiomics_AD/Azimuth_Ex_layers_label.pdf", width=5, height=5)
DimPlot(subs[[3]], reduction="wnn.umap", group.by="label", pt.size=0.2, cols=c(
"Exc_0" =cols.df[7,1],
"Exc_1"=cols.df[8,1],
"Exc_2"=cols.df[9,1],
"Exc_3"=cols.df[10,1],
"Exc_4"=cols.df[11,1],
"Exc_5"=cols.df[12,1],
"Exc_6"=cols.df[13,1],
"Exc_7"=cols.df[14,1],
"Exc_8"=cols.df[15,1],
"Exc_9"=cols.df[16,1]
), label=T)+ggtitle("Glutamatergic Subtypes")+theme(legend.position="none")
dev.off()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
degs_ex<-read.csv("~/scMultiomics_AD/DEGs/DEG_Excitatory_subTypes_AgeSex.csv")
degs_ex$subtype<-ifelse(degs_ex$cluster==0, "L2/3 RORB+ CCDC68+ ",
ifelse(degs_ex$cluster==1, "L2/3 Intratelencephalon-Projecting",
ifelse(degs_ex$cluster==2, "L5 Intratelencephalon-Projecting 2",
ifelse(degs_ex$cluster==3, "L6 THEMIS+ SNTG2+",
ifelse(degs_ex$cluster==4, "L5 Intratelencephalon-Projecting ",
ifelse(degs_ex$cluster==5, "L6 FEZF2+ KLK7+",
ifelse(degs_ex$cluster==6, "L3 RORB+ OTOGL+",
ifelse(degs_ex$cluster==7, "L6 Corticothalamic-Projecting 1",
ifelse(degs_ex$cluster==8, "L5 Intratelencephalon-Projecting 3",
ifelse(degs_ex$cluster==9, "L6 CAR3+ Intratelencephalon-Projecting",
ifelse(degs_ex$cluster==10, "L5/6 Near-Projecting",
ifelse(degs_ex$cluster==11, "NS","L5 FEZF2+ CSN1S1+"))))))))))))
# degs_ex$layer<-lapply(strsplit(degs_ex$subtype, split=" "), `[`, 1)
write.csv(degs_ex, "~/scMultiomics_AD/DEGs/DEG_Ex_subTypes.csv")
markers<-data.frame(L23_IP=c("EPHA6","CA10","LAMA2","RASGRF2","GNAL","KIAA1217","MEIS2","DAB1","CUX2","LRRTM4","PDZD2","ENC1","FAM19A1","CNTN5","LINC01378","PVRL3","CBLN2","x","x"),
L5_EP= c("NRP1","PTCHD1.AS","COL5A2","SLC35F3","COL24A1","HOMER1","VAT1L","NRG1","FAM19A1","CBLN2","x","x","x","x","x","x","x","x","x") ,
L5_IP= c("CADPS2","CHN2","IL1RAPL2","CPNE4","RORB","CNTN5","TOX","FSTL5","POU6F2","FSTL4","x","x","x","x","x","x","x","x","x"),
L56_NP=c("TLE4","CALCRL","LUZP2","TSHZ2","HTR2C","FER1L6-AS2","CDH6","KIAA1217","NPSR1.AS1","NPSR1-AS1","ZNF385D","ITGA8","CPNE4","NXPH2","TLL1 ","CRYM","PDE9A","CBLN2","ASIC2"),
L6_CTP=c("ADAMTSL1","TRPM3","KIAA1217","MEIS2","SEMA5A","SEMA3E","EGFEM1P","SORCS1","HS3ST4","TOX","x","x","x","x","x","x","x","x","x"),
L6_CAR=c("NTNG2","NR4A2","POSTN","KCNMB2","B3GAT2","RNF152","THEMIS","OLFML2B","STK32B","GAS2L3","x","x","x","x","x","x","x","x","x"),
L6_T_L=c("EMA3A","TRPM3","ADAMTS3","LINC00343","ANKRD30B","DCHS2","THEMIS","SEC24D","GULP1","ROBO1","x","x","x","x","x","x","x","x","x"))
markers$seq<-seq(1, nrow(markers))
markers_melt<-melt(markers, id="seq")
markers_melt<-markers_melt[which(markers_melt$value != "x"),]
# ex_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
# markers_malt2<-markers_melt[which(markers_melt$value %in% ex_degs$gene),]
a<-markers_melt %>% group_by(value) %>% summarize(subtype=paste(sort(variable), collapse=","))
# t<-table(a$value)
# a<-a[which(a$value %in% names(t[t<2])),]
avg_Exp<-as.data.frame(AverageExpression(Excitatory, features=a$value,assay="RNA", group.by="subs"))
colnames(avg_Exp)<-gsub("RNA.","", colnames(avg_Exp))
avg_Exp<-as.matrix(avg_Exp)
a<-a[which(a$value %in% rownames(avg_Exp)),]
a$layer<-sapply(strsplit(a$subtype, "_"), `[`, 1)
a$num<-str_count(a$subtype, ",")+1
a<-a[order(a$subtype, a$num),]
avg_Exp<-avg_Exp[a$value,]
mat_scaled<-t(apply(avg_Exp,1,scale))
colnames(mat_scaled)<-colnames(avg_Exp)
ha<-HeatmapAnnotation(celltype=colnames(avg_Exp),col= list(celltype=c( "Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3")), show_legend=F)
ha2<-HeatmapAnnotation(subtype=c("L2-3 Intratelencephalon-Projecting 1",
"L2-3 Intratelencephalon-Projecting 2",
"L5 Intratelencephalon-Projecting 2",
"L5 Intratelencephalon-Projecting 1",
"L6 THEMIS+ LINC00343+",
"L6 Corticothalamic-Projecting",
"L5 Intratelencephalon-Projecting 3",
"L6 CAR3+ Intratelencephalon-Projecting",
"L5-6 Near-Projecting",
"L5 Extratelencephalon-Projecting" ) ,
col=list(subtype=c("L2-3 Intratelencephalon-Projecting 1"="green4",
"L2-3 Intratelencephalon-Projecting 2"="green",
"L5 Intratelencephalon-Projecting 2"="orangered",
"L5 Intratelencephalon-Projecting 1"="orange",
"L6 THEMIS+ LINC00343+"="dodgerblue",
"L6 Corticothalamic-Projecting"="royalblue3",
"L5 Intratelencephalon-Projecting 3"="tomato",
"L6 CAR3+ Intratelencephalon-Projecting"="skyblue1",
"L5-6 Near-Projecting"="plum",
"L5 Extratelencephalon-Projecting"="gold" )), show_legend=F)
ha2<-c(ha,ha2)
n <- 15
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sam<-sample(col_vector, n)
names(sam)<-unique(a$subtype)
ra<-rowAnnotation(subtype=a$subtype, col=list(subtype=sam))
ht=Heatmap(mat_scaled,cluster_rows=F,cluster_columns=F,column_split=c("L23","L23","L5","L5","L6","L6","L5","L6","L56","L5"),col=colorRamp2(c(-1,0,1),viridis(3)), top_annotation=ha, name="Z", show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=a$layer)
pdf("~/scMultiomics_AD/Neuron_Subtype_heatmap_Ex.pdf", width=4,height=8)
ht
dev.off()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 3
spec_mark<-table(markers_melt$value)
markers_melt<-markers_melt[which(markers_melt$value %in% names(spec_mark[spec_mark==1])),]
plot_mark<-merge(markers_melt, ex_degs, by.x="value",by.y="gene")
top_mark<-plot_mark %>% group_by(variable) %>% top_n(n=2, wt=avg_log2FC)
top_mark<-top_mark[order(top_mark$variable),]
Excitatory2$cluster<-paste0("Exc_", Excitatory2$seurat_clusters)
pdf("~/scMultiomics_AD/Ex_marker_dotplot.pdf", width=10, height=9)
DotPlot(Excitatory2, features=unique(top_mark$value), col.min=0, col.max=1.5, dot.scale=10, group.by="cluster")+scale_color_viridis(option="plasma", direction=-1)+theme(axis.text.x=element_text(size=15, angle=90), axis.text.y=element_text(size=15),axis.title=element_text(size=0))
dev.off()
tab<-table(markers_melt$value, markers_melt$variable)
mat<-as.matrix(tab)
mat<-mat[rownames(avg_Exp),]
df<-as.data.frame(mat)
pdf("~/scMultiomics_AD/EX_marker_upset_annotation.pdf",width=3, height=10)
ggplot(df, aes(y=Var1, x=Var2, alpha=as.factor(Freq)))+geom_point()+theme_classic()+scale_alpha_manual(values=c(0,1))+ylab("")+xlab("")+
theme(legend.position="none", axis.text.x=element_text(angle=90))+scale_y_discrete(position="right", limits=rev)+scale_x_discrete(labels=c("L23","L5 Extratelencephalon-Projecting","L5 Intratelencephalon-Projecting","L56 Near Projecting","L6 Corticothalamic-Projecting","L6 CAR3+ Intratelencephalon-Projecting","L6 THEMIS+ LINC00343+"))
dev.off()
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")
In<-list()
for (i in 1:10){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}
a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]
f<-In[[6]][["GO_Biological_Process_2021"]]
g<-In[[7]][["GO_Biological_Process_2021"]]
h<-In[[8]][["GO_Biological_Process_2021"]]
i<-In[[9]][["GO_Biological_Process_2021"]]
j<-In[[10]][["GO_Biological_Process_2021"]]
a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
f$i<-5
g$i<-6
h$i<-7
i$i<-8
j$i<-9
df<-rbind(a,b,c,d,e,f,g,h,i,j)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Exc_subclusters_GO_BP.csv")
top<-df %>% group_by(i) %>% top_n(n=2, wt=-Adjusted.P.value)
top<-top[which(top$Adjusted.P.value<0.01),]
term<-top$Term
term<-sort(unique(term))
mat<-matrix(nrow=length(unique(term)),ncol=10)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)
for (i in 1:nrow(mat)){
for (j in 1:ncol(mat)){
gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
}
}
rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn
sig_mat<-matrix(nrow=length(unique(term)),ncol=10)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
for (j in 1:ncol(sig_mat)){
gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
}
}
colnames(mat)<-paste0("Exc_",colnames(mat))
ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,20),c("white", "# d3eff5","# 00518a")), name="Odds Ratio", show_column_names=T, show_row_names=T, column_title=NULL, row_names_side = "left",row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
), cell_fun = function(j, i, x, y, w, h, fill) {
if(sig_mat[i, j] >0) {
grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
} })
library(stringr)
pdf("~/scMultiomics_AD/Exc_subcluster_GObp.pdf", width=9, height=3.5)
ht
dev.off()
tab<-prop.table(table(Excitatory$azimuth, Excitatory$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
tests<-list()
for (i in 1:nrow(tab)){
ct<-rownames(tab)[i]
df.tmp<-df[which(df$Var2==ct),]
tests[[i]]<-t.test(df.tmp[which(df.tmp$Status=="Ctrl"),]$Freq, df.tmp[which(df.tmp$Status=="AD"),]$Freq)$p.value
}
# L2-3 Intratelencephalon-Projecting 1 1.000
# L2-3 Intratelencephalon-Projecting 2 1.000
# L5 Intratelencephalon-Projecting 2 1.000
# L5 Intratelencephalon-Projecting 1 1.000
# L6 THEMIS+ LINC00343+ 1.000
# L6 Corticothalamic-Projecting 1.000
# L5 Intratelencephalon-Projecting 3 1.000
# L6 Car3+ Intratelencephalon-Projecting 1.000
# L5-6 Near-Projecting 1.000
# L5 Extratelencephalon-Projecting 0.419
stat.test <- df %>%
group_by(Var2) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")
pdf("~/scMultiomics_AD/Ex_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()
Oligodendrocytes<-subset(data, subset=predicted.id=="Oligodendrocytes")
DefaultAssay(Oligodendrocytes)<-"PC"
Oligodendrocytes <- SCTransform(Oligodendrocytes,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30)
Oligodendrocytes<-RunUMAP(Oligodendrocytes,reduction="sub.harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
# atac
DefaultAssay(Oligodendrocytes)<-"CTpeaks"
Oligodendrocytes<-RunTFIDF(Oligodendrocytes) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
Oligodendrocytes<-RunUMAP(Oligodendrocytes,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
# wnn
Oligodendrocytes<-FindMultiModalNeighbors(Oligodendrocytes, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
Oligodendrocytes<-RunUMAP(Oligodendrocytes,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
Oligodendrocytes<-FindClusters(Oligodendrocytes, graph.name="wknn", algorithm=1, resolution=0.35)
Oligodendrocytes$batch<-ifelse(Oligodendrocytes$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")
Oligodendrocytes$subs<-paste0("Oli_", Oligodendrocytes$seurat_clusters)
pdf("~/scMultiomics_AD/Olig_subclusters.pdf")
DimPlot(Oligodendrocytes, group.by="subs",reduction="wnn.umap", cols=c( "Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1"), pt.size=2, label=T)
dev.off()
DEGs<-FindAllMarkers(Oligodendrocytes,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")
Supp Fig 5
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.25),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")
In<-list()
for (i in 1:4){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}
head(In[[i]][["GO_Molecular_Function_2021"]][,1:4])
head(In[[i]][["GO_Biological_Process_2021"]][,1:4])
head(In[[i]][["GO_Cellular_Component_2021"]][,1:4])
head(In[[i]][["KEGG_2021_Human"]][,1:4])
head(In[[i]][["Azimuth_Cell_Types_2021"]][,1:4])
head(In[[i]][["Allen_Brain_Atlas_10x_scRNA_2021"]][,1:4])
a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
a$i<-0
b$i<-1
c$i<-2
d$i<-3
df<-rbind(a,b,c,d)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Olig_subclusters_GO_BP.csv")
df2<-df[!grepl("cycle", df$Term),]
top<-df %>% group_by(i) %>% top_n(n=2, wt=Combined.Score)
term<-top$Term
term<-sort(unique(term))
mat<-matrix(nrow=length(unique(term)),ncol=4)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)
for (i in 1:nrow(mat)){
for (j in 1:ncol(mat)){
gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
}
}
rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn
ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,60,300),c("white", "# d3eff5","# 00518a")), name="Odds Ratio", show_column_names=T, show_row_names=T, column_title=NULL, row_names_side = "left",row_names_max_width = max_text_width(
rownames(mat),
gp = gpar(fontsize = 12)
))
library(stringr)
pdf("~/scMultiomics_AD/Olig_subcluster_GObp.pdf", width=8, height=6)
ht
dev.off()
df<-read.csv("~/scMultiomics_AD/enrichr/Olig_subclusters_GO_BP.csv")
df<-df[which(df$i==2),]
df<-df[which(df$Adjusted.P.value<0.01),]
df$term<-sapply(strsplit(df$Term, " (", fixed=T), `[`,1)
df<-df[order(df$Odds.Ratio, decreasing=T),]
df$term<-factor(df$term, levels=rev(df$term))
pdf("~/scMultiomics_AD/Olig_GO_bp_cluster2.pdf", width=12, height=6)
ggplot(df, aes(x=Odds.Ratio, y=term, fill=-log10(Adjusted.P.value)))+geom_bar(stat="identity")+theme_classic()+scale_fill_gradient2(low="white", mid="white",high="# 0F7010", limits=c(0,5))+theme(axis.text=element_text(size=12))
dev.off()
tab<-prop.table(table(subs[[4]]$subs, subs[[4]]$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
stat.test <- df %>%
group_by(Var2) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")
# 0.882
# 0.295
# 0.684
# 0.31
pdf("~/scMultiomics_AD/Olig_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()
Supp Fig 5
meta<-Oligodendrocytes@meta.data
tab<-prop.table(table(meta$cluster, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 4)
Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)
lev<-levels(df$Var2)
df$Var2<-factor(df$Var2, levels=lev)
tab2<-as.data.frame(table(meta$cluster))
tab2.m<-merge(tab2, cols.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)
tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]
df<-df[order(match(df$Var2, lev2)),]
pdf("~/scMultiomics_AD/Olig_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="none")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))
p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Num. Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)
p1+p2
dev.off()
olig_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")
olig_degs<-olig_degs[which(olig_degs$cat=="up"),]
top <- olig_degs %>% group_by(cluster) %>% top_n(n=10, wt=avg_log2FC)
top$clust<-paste("Oli_", top$cluster, sep="")
avg_Exp<-as.data.frame(AverageExpression(Oligodendrocytes, features=top$gene, group.by="cluster", assay="PC"))
colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
mat_scaled<-t(apply(as.matrix(avg_Exp),1,scale))
colnames(mat_scaled)<-colnames(avg_Exp)
ha<-HeatmapAnnotation(cluster=colnames(mat_scaled), show_legend=F, col=list(cluster=c("Oli_0"="lightsalmon", "Oli_1"="darksalmon", "Oli_2"="coral", "Oli_3"="brown1")))
ra<-rowAnnotation(cluster=top$clust, show_legend=F, col=list(cluster=c("Oli_0"="lightsalmon", "Oli_1"="darksalmon","Oli_2"="coral", "Oli_3"="brown1")))
ht=Heatmap(mat_scaled,cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)
pdf("~/scMultiomics_AD/Olig_top10DEG_heatmap.pdf", width=4, height=6)
ht
dev.off()
ht2=Heatmap(t(mat_scaled),cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), left_annotation=ha, name="Z", top_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)
pdf("~/scMultiomics_AD/Olig_top10DEG_heatmap_transpose.pdf", width=4, height=6)
ht
dev.off()
DefaultAssay(Microglia)<-"PC"
Microglia <- SCTransform(Microglia,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F)
DefaultAssay(Microglia)<-"SCT"
Microglia<-RunPCA(Microglia,ndims=40)
# Microglia<-RunHarmony(Microglia, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="sub.harmony.rna", lambda=1, theta=2, max.iter.harmony=20) # converged on 14
Microglia<-RunUMAP(Microglia,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
# atac
DefaultAssay(Microglia)<-"CTpeaks"
Microglia<-RunTFIDF(Microglia) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
Microglia<-RunUMAP(Microglia,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
# wnn
Microglia<-FindMultiModalNeighbors(Microglia, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
Microglia<-RunUMAP(Microglia,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
Microglia<-FindClusters(Microglia, graph.name="wknn", algorithm=1, resolution=0.45)
Microglia$batch<-ifelse(Microglia$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")
Microglia$subs<-paste0("Mic_", Microglia$seurat_clusters)
pdf("~/scMultiomics_AD/Mic_subclusters.pdf")
DimPlot(Microglia, group.by="subs",reduction="wnn.umap", pt.size=2, label=F)
dev.off()
DEGs<-FindAllMarkers(Microglia,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
Mic_no4<-subset(Microglia, wknn_res.0.45==4, invert=T)
Mic_no4<-NormalizeData(Mic_no4)
Idents(Mic_no4)<-Mic_no4$wknn_res.0.45
DEGs<-FindAllMarkers(Mic_no4,min.pct=0.1,logfc.threshold = 0.1, assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")
write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Microglia_removeCluster.csv")
mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
mic_degs<-mic_degs[order(mic_degs$cat, -mic_degs$cluster,decreasing=T),]
mic_degs2<-mic_degs[which(mic_degs$cat=="up"),]
top_genes<-mic_degs2 %>%group_by(cluster) %>% top_n(n=20, wt=abs(avg_log2FC))
top_genes<-rbind(top_genes, mic_degs2[which(mic_degs2$gene %in% c( "ACSL1", "CD163", "CD44", "CR1", "CEMIP2", "DAB2", "FOXP1", "LYVE1", "MAFB", "MITF", "MYO5A", "NOTCH2", "SLC11A1", "TGFBI")),])
top_genes<-top_genes[!duplicated(top_genes$gene),]
# avgExp<-as.data.frame(AverageExpression(Microglia,group.by="wknn_res.0.45",features=top_genes$gene,assay="RNA",slot="data"))
avgExp<-as.data.frame(AverageExpression(Mic_no4,group.by="wknn_res.0.45",features=top_genes$gene,assay="RNA",slot="data"))
names<-colnames(avgExp)
names<-gsub("RNA.","",names)
colnames(avgExp)<-names
tmp<-top_genes[which(top_genes$gene %in% rownames(avgExp)),]
avgExp<-avgExp[tmp$gene,]
avgExp.scale<-t(apply(avgExp,1, scale))
colnames(avgExp.scale)<-colnames(avgExp)
ha<-HeatmapAnnotation(cluster=paste0("Mic_",colnames(avgExp)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6")), show_legend=F)
ra<-rowAnnotation(cluster=paste0("Mic_",as.factor(top_genes$cluster)), col=list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d", "Mic_3"="# 00b0f6")),show_legend=T)
colnames(avgExp.scale)<-paste("Mic_",colnames(avgExp.scale), sep="")
ht=Heatmap(avgExp.scale, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$cluster, name="Z", show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 5), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ra, top_annotation=ha)
pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_dropCluster2.pdf", width=5,height=8)
ht
dev.off()
ha<-rowAnnotation(cluster=rownames(t(avgExp.scale)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6")), show_legend=F)
ra<-HeatmapAnnotation(cluster=paste0("Mic_",as.factor(top_genes$cluster)), col=list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d", "Mic_3"="# 00b0f6")),show_legend=T)
ht2=Heatmap(t(avgExp.scale), cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$cluster, name="Z", show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 10),column_names_gp=gpar(fontsize=6), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ha, top_annotation=ra)
pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_dropCluster2.pdf", width=7,height=3)
ht2
dev.off()
mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
mic_degs<-mic_degs[order(mic_degs$cat, -mic_degs$cluster,decreasing=T),]
mic_degs_H<-mic_degs[which(mic_degs$cat=="up"),]
mic_degs_H<-mic_degs_H[which(mic_degs_H$cluster %in% c(0,1,2)),]
avgExp<-as.data.frame(AverageExpression(Microglia,group.by="wknn_res.0.45",features=mic_degs_H$gene,assay="PC",slot="data"))
names<-colnames(avgExp)
names<-gsub("PC.","",names)
colnames(avgExp)<-names
tmp<-mic_degs_H[which(mic_degs_H$gene %in% rownames(avgExp)),]
avgExp<-avgExp[tmp$gene,]
avgExp.scale<-t(apply(avgExp,1, scale))
colnames(avgExp.scale)<-colnames(avgExp)
colnames(avgExp.scale)<-paste("Mic_",colnames(avgExp.scale), sep="")
ha<-HeatmapAnnotation(cluster=paste0("Mic_",colnames(avgExp)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6","Mic_4"="# e86bf3")), show_legend=F)
ra<-rowAnnotation(cluster=as.factor(mic_degs_H$cluster), col=list(cluster=c("0"="forestgreen","1"="darkolivegreen3","2"="darkseagreen3")),show_legend=T)
ht=Heatmap(avgExp.scale, cluster_rows=T,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$subtype.y, name="Z", show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 6), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ra, top_annotation=ha)
pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_0v1v2.pdf", width=4,height=3)
ht
dev.off()
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")
In<-list()
for (i in 4:5){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}
head(In[[i]][["GO_Molecular_Function_2021"]][,1:4])
head(In[[i]][["GO_Biological_Process_2021"]][,1:4])
head(In[[i]][["GO_Cellular_Component_2021"]][,1:4])
head(In[[i]][["KEGG_2021_Human"]][,1:4])
head(In[[i]][["Azimuth_Cell_Types_2021"]][,1:4])
head(In[[i]][["Allen_Brain_Atlas_10x_scRNA_2021"]][,1:4])
a<-In[[4]][["GO_Biological_Process_2021"]]
b<-In[[5]][["GO_Biological_Process_2021"]]
a$i<-3
b$i<-4
df<-rbind(a,b)
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Mic_subclusters_GO_BP_cluster3_4.csv")
mic_down<-enrichr(mic_degs[which(mic_degs$cat=="down" & mic_degs$cluster==4),]$gene, dbs)
write.csv(mic_down[["GO_Biological_Process_2021"]], "~/scMultiomics_AD/enrichr/Mic_cluster4down_GO_BP.csv")
mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia_removeCluster.csv")
mic_degs2<-mic_degs[which(mic_degs$cat=="up"),]
clus3<-enrichr(mic_degs2[which(mic_degs2$cluster==3),]$gene, dbs)
df<-clus3[["GO_Biological_Process_2021"]]
df<-df[which(df$Adjusted.P.value<0.05),]
df$term<-sapply(strsplit(df$Term, " (", fixed=T), `[`,1)
df<-df[order(df$Odds.Ratio, decreasing=T),]
df$term<-factor(df$term, levels=rev(df$term))
pdf("~/scMultiomics_AD/Mic_GO_bp_cluster2.pdf", width=12, height=6)
ggplot(df, aes(x=Odds.Ratio, y=term, fill=-log10(Adjusted.P.value)))+geom_bar(stat="identity")+theme_classic()+scale_fill_gradient2(low="white", mid="# D4F9B0",high="# 0F7010", limits=c(0,5))+theme(axis.text=element_text(size=12))
dev.off()
GO BP 1 receptor-mediated endocytosis (GO:0006898) 2 clathrin-dependent endocytosis (GO:0072583) 3 membrane organization (GO:0061024) 4 connective tissue development (GO:0061448) 5 transmembrane receptor protein tyrosine kinase signaling pathway (GO:0007169) 6 hyaluronan catabolic process (GO:0030214) 7 positive regulation of epithelial cell migration (GO:0010634) 8 positive regulation of protein phosphorylation (GO:0001934) 9 hyaluronan metabolic process (GO:0030212)
KEGG Fc gamma R-mediated phagocytosis
tab<-prop.table(table(Microglia$wknn_res.0.45, Microglia$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
df$cluster<-paste0("Mic_", df$Var2)
# 0.0358
# 0.00904
# 0.00449
# 0.0133
# 0.706
stat.test <- df %>%
group_by(cluster) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "cluster")
pdf("~/scMultiomics_AD/Mic_CT_stat_prop_tt.pdf", width=6,height=5)
ggboxplot(df, y="Freq",x="cluster", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("firebrick","navyblue"))
dev.off()
data<-readRDS("~/scMultiomics_AD/final.rds")
Microglia<-readRDS("~/scMultiomics_AD/Microglia_subcluster.rds")
subs<-readRDS("~/scMultiomics_AD/subclusters.rds")
mic_clusters<-Microglia@meta.data
subs[[5]]<-AddMetaData(subs[[5]],mic_clusters)
subs[[5]]$cluster<-ifelse(is.na(subs[[5]]$seurat_clusters)==T, "NK", subs[[5]]$wknn_res.0.45)
subs[[5]]$seurat_clusters<-subs[[5]]$cluster
fullmeta<-c()
for (i in 1:length(subs)){
subs[[i]]$subs<-paste0(substr(subs[[i]]$predicted.id,start=1,stop=3),"_", subs[[i]]$seurat_clusters,sep="")
meta<-as.data.frame(subs[[i]]@meta.data[,c("subs")])
rownames(meta)<-rownames(subs[[i]]@meta.data)
fullmeta<-rbind(fullmeta,meta)
}
mic_meta<-read.csv("~/scMultiomics_AD/microglia_metadata.csv")
mic_meta$subs<-paste0("Mic_", mic_meta$seurat_clusters)
mic_subs<-as.data.frame(mic_meta$subs)
rownames(mic_subs)<-mic_meta$X.1
a=as.data.frame(subs[[1]]$subs)
b=as.data.frame(subs[[2]]$subs)
c=as.data.frame(subs[[3]]$subs)
d=as.data.frame(subs[[4]]$subs)
e=mic_subs
colnames(a)<-"subs"
colnames(b)<-"subs"
colnames(c)<-"subs"
colnames(d)<-"subs"
colnames(e)<-"subs"
fullmeta<-rbind(a,b,c,d,e)
data<-AddMetaData(data, fullmeta)
data$subs<-ifelse(is.na(data$subs),substr(data$predicted.id,start=1,stop=3),data$subs)
pdf("~/scMultiomics_AD/All_subclusters_plot_wLegend.pdf", width=9, height=10)
DimPlot(data, reduction="wnn.umap",group.by="subs", label=T, pt.size=0.1,cols=c(
"Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1",
"End"="gray54",
"Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3", "Exc_10"="white",
"Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1",
"Mic_0"="darkorchid4",
"Mic_1"="magenta", "Mic_2"="magenta4",
"Mic_3"="orchid", "Mic_4"="purple",
"Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1",
"OPC"="firebrick", "Per"="lightpink"))&ggtitle("WNN")& theme(legend.position="none")
dev.off()
BrainMarkers<-read.csv("~/scREAD/scREAD_BrainMarkers.csv", header=T)
BrainMarkers<-BrainMarkers[which(BrainMarkers$Name!="PDGRFA"),]
DefaultAssay(data)<-"RNA"
data<-NormalizeData(data, assay="RNA") %>% ScaleData(features=BrainMarkers$Name)
avgExp_PC<-as.data.frame(AverageExpression(data, features=BrainMarkers$Name, group.by=c("predicted.id","id"), slot="data", assay="RNA"))
colnames(avgExp_PC)<-gsub("RNA.","",colnames(avgExp_PC))
df<-data.frame(celltype=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,1),
id=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,2))
df$tmp<-substr(df$celltype,start=1,stop=3)
labels<-BrainMarkers
labels$tmp<-substr(labels$List,start=1,stop=3)
# Normalize
mat_scaled<-t(apply(avgExp_PC,1,scale))
mat_scaled<-t(scale(t(avgExp_PC)))
library(RColorBrewer)
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col=sample(col_vector, 15)
names(col)<-unique(df$id)
ha<-rowAnnotation(celltype=labels$tmp,col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink")), show_legend=F)
ha2<-HeatmapAnnotation(celltype=df$tmp,ID=df$id, col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"), ID=col), show_legend=F)
ht=Heatmap(mat_scaled,cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,2),viridis(3)), row_split=labels$tmp, top_annotation=ha2, name="Z", left_annotation=ha, show_column_names=F, show_row_names=F, column_title=NULL, row_title_gp=gpar(fontsize=14))
lab<-which(rownames(mat_scaled) %in% c("AQP4","NRGN","GAD2","P2RY12","MOBP","VCAN"))
pdf("~/scMultiomics_AD/scREAD_heatmap_byID.pdf", width=5,height=8)
ht+rowAnnotation(gene=anno_mark(at=lab, labels=rownames(mat_scaled[lab,]), side="right", labels_gp=gpar(fontsize=10)))
dev.off()
DefaultAssay(data)<-"ATAC"
annot<-Annotation(data)
DefaultAssay(data)<-"ATAC"
a<-FindRegion(data, "AQP4" ,extend.downstream=100,extend.upstream=1000)
b<-FindRegion(data, "NRGN" ,extend.downstream=100,extend.upstream=1000)
c<-FindRegion(data, "GAD2" ,extend.downstream=100,extend.upstream=1000)
d<-FindRegion(data, "P2RY12",extend.downstream=100,extend.upstream=1000)
e<-FindRegion(data, "MOBP" ,extend.downstream=100,extend.upstream=1000) # MOBP
f<-FindRegion(data, "VCAN" ,extend.downstream=100,extend.upstream=1000) # NEU4
# & theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
DefaultAssay(data)<-"CTpeaks"
Annotation(data)<-annot[which(annot$gene_name=="AQP4"),]
p1<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =a) & scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5), axis.title.y=element_text(size=10))&ggtitle("AQP4")
anPlot<-AnnotationPlot(data, region=a)&scale_color_manual(values="darkgreen")& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p1<-CombineTracks(list(p1,anPlot),heights=c(5,1))
Annotation(data)<-annot[which(annot$gene_name=="NRGN"),]
p2<-CoveragePlot(data,window=200, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =b)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("NRGN")
anPlot<-AnnotationPlot(data, region=b)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p2<-CombineTracks(list(p2,anPlot),heights=c(5,1))
Annotation(data)<-annot[which(annot$gene_name=="GAD2"),]
p3<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =c)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("GAD2")
anPlot<-AnnotationPlot(data, region=c)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p3<-CombineTracks(list(p3,anPlot),heights=c(5,1))
Annotation(data)<-annot[which(annot$gene_name=="P2RY12"),]
p4<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =d)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("P2RY12")
anPlot<-AnnotationPlot(data, region=d)&scale_color_manual(values="darkgreen")& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p4<-CombineTracks(list(p4,anPlot),heights=c(5,1))
Annotation(data)<-annot[which(annot$gene_name=="MOBP"),]
p5<-CoveragePlot(data,window=600, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =e, ymax=800)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("MOBP")
anPlot<-AnnotationPlot(data, region=e)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p5<-CombineTracks(list(p5,anPlot),heights=c(5,1))
Annotation(data)<-annot[which(annot$gene_name=="VCAN"),]
p6<-CoveragePlot(data,window=3000, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"), group.by="predicted.id",annotation=F,peaks=F, region =f)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("VCAN")
anPlot<-AnnotationPlot(data, region=f)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p6<-CombineTracks(list(p6,anPlot),heights=c(5,1))
library(cowplot)
pdf("~/scMultiomics_AD/marker_coverage_plot.pdf", width=12, height=7)
plot_grid(p1,p2,p3,p4,p5,p6, ncol=6)
dev.off()
tab<-prop.table(table(meta$subs, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","Ctrl","AD","AD","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
Status<-Status[-c(6,13,14)]
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 36)
Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df<-df[-seq(121,135),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)
pdf("~/scMultiomics_AD/Allsubs_stat_prop_bar.pdf", width=5,height=6)
ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0))
dev.off()
lev<-levels(df$Var2)
lev2<-lev[c(1,2,3,4,5,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,6,36)]
df$Var2<-factor(df$Var2, levels=lev2)
tab2<-as.data.frame(table(meta$subs))
tab2.m<-merge(tab2, hex.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)
lev<-lev[c(1,2,3,4,5,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,6,36)]
tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]
df<-df[order(match(df$Var2, lev2)),]
pdf("~/scMultiomics_AD/Allsubs_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="left")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))
p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Number of Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)+scale_x_continuous(breaks=c(0,6000,12000))
p1+p2
dev.off()
Idents(data)<-data$id
df2<-data@meta.data[,c("id","Status")]
df2<-df2[!duplicated(df2$id),]
df2<-df2[order(df2$Status),]
Idents(data)<-factor(Idents(data), levels=df2$id)
DefaultAssay(data)<-"PC"
rs<-rowSums(data)
avgExp_PC<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","id"), assay="PC", features=names(rs[which(rs>20)])))
colnames(avgExp_PC)<-gsub("PC.","",colnames(avgExp_PC))
colnames(avgExp_PC)[89:103]<-c("OPCs_1224",
"OPCs_1230",
"OPCs_1238",
"OPCs_3329",
"OPCs_3586",
"OPCs_4305",
"OPCs_4313",
"OPCs_4443",
"OPCs_4481",
"OPCs_4482",
"OPCs_4627",
"OPCs_HCT17HEX",
"OPCs_HCTZZT",
"OPCs_NT1261",
"OPCs_NT1271")
df<-data.frame(celltype=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,1),
id=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,2))
df$tmp<-substr(df$celltype,start=1,stop=3)
rownames(df)<-colnames(avgExp_PC)
avgExp_PC2<-avgExp_PC[,!(df$tmp %in% c("End","Per"))]
df<-df[!(df$tmp %in% c("End","Per")),]
cor.mat<-cor(as.matrix(avgExp_PC2),method="pearson")
df.m<-merge(df,df2, by="id", sort=F)
rownames(df.m)<-paste(df.m$celltype,"_", df.m$id, sep="")
df.m<-df.m[rownames(df),]
library(RColorBrewer)
n=18
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col<-sample(col_vector,n)
ha<-HeatmapAnnotation(celltype=df.m$tmp, id=df.m$id, Status=df.m$Status,show_legend=F, col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"),Status=c("AD"="red","Ctrl"="blue"), id=c("1224" ="# CAB2D6","1230" ="# B3B3B3","1238" ="# FED9A6","3329" ="# A6D854","3586" ="# CBD5E8","4305" ="# 7FC97F","4313" ="# B2DF8A","4443" ="# FFED6F","4481" ="# E5D8BD","4482" ="# FFFF99","4627" ="# 377EB8","HCT17HEX"="# FC8D62","HCTZZT" ="# A6CEE3","NT1261" ="# F1E2CC","NT1271" ="# FDBF6F") ))
ra<-rowAnnotation(celltype=df.m$tmp, id=df.m$id,Status=df.m$Status, show_legend=F,col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"), Status=c("AD"="red","Ctrl"="blue"),id=c("1224" ="# CAB2D6","1230" ="# B3B3B3","1238" ="# FED9A6","3329" ="# A6D854","3586" ="# CBD5E8","4305" ="# 7FC97F","4313" ="# B2DF8A","4443" ="# FFED6F","4481" ="# E5D8BD","4482" ="# FFFF99","4627" ="# 377EB8","HCT17HEX"="# FC8D62","HCTZZT" ="# A6CEE3","NT1261" ="# F1E2CC","NT1271" ="# FDBF6F")))
ht=Heatmap(cor.mat,cluster_rows=T,cluster_columns=T,col=colorRamp2(c(0,0.5,1),plasma(3)), top_annotation=ha, name="r", left_annotation=ra, show_column_names=F, show_row_names=F, column_title=NULL, row_title_gp=gpar(fontsize=14))
pdf("~/scMultiomics_AD/CT_id_corMat_heatmap.pdf")
ht
dev.off()
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
library(future)
plan("multicore", workers = 10)
options(future.globals.maxSize = 100000 * 1024^2)
pfm2<-readJASPARMatrix("~/software/cellranger-arc-2.0.0/GRCh38/regions/motifs.pfm", matrixClass="PFM")
DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"CTpeaks"
Annotation(data)<-annotation
# add motif information
data <- AddMotifs(
object = data,
genome = BSgenome.Hsapiens.UCSC.hg38,
pfm = pfm2
)
motifs<-Motifs(data)
saveRDS(motifs, "~/scMultiomics_AD/motifs_object.rds")
counts<-readRDS("~/scMultiomics_AD/motif/FilteredPeaks500b_assay.rds")
motifs<-readRDS("~/scMultiomics_AD/motif/FilteredPeaks500b_motifs.rds")
data[["peaks500"]]<-CreateChromatinAssay(counts=counts, fragments=Fragments(data))
DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"peaks500"
Annotation(data)<-annotation
Motifs(data)<-motifs
library(BSgenome.Hsapiens.UCSC.hg38)
# ad<-subset(data, subset=Status=="AD")
# ad<-RunChromVAR(ad, BSgenome.Hsapiens.UCSC.hg38)
# ctrl<-subset(data, subset=Status=="Ctrl")
# ctrl<-RunChromVAR(ctrl)
data<-RunChromVAR(data, BSgenome.Hsapiens.UCSC.hg38)
differential.activity <- FindMarkers(
object = data,
ident.1 = 'AD',
ident.2 = 'Ctrl',
mean.fxn = rowMeans,
fc.name = "avg_diff"
)
Idents(data)<-data$predicted.id
differential.activity.ct <- FindAllMarkers(
object = data,
mean.fxn = rowMeans,
fc.name = "avg_diff"
)
differential.activity.ct<-differential.activity.ct[which(differential.activity.ct$avg_diff>0),]
write.csv(differential.activity.ct,"~/scMultiomics_AD/motif/Differential_activity_celltypes.csv")
filter macs2 peak calls for link callling
avgAcc<-as.data.frame(AverageExpression(data, assay="CTpeaks", group.by="predicted.id", slot="counts"))
c2$peak<-paste0(seqnames(c2),"-",start(c2)-1,"-", end(c2))
avgAcc<-avgAcc[,-c(2,8)] # remove endothelial and pericytes
rsPeak<-rowSums(avgAcc>0.02) # rerun links with these peaks
Peak2<-rsPeak[which(rsPeak>0)]
peak_set<-write.table(rownames(Peak2),"~/scMultiomics_AD/CT_filtered_peaks.bed")
part1<-import("~/AD/outs/analysis/feature_linkage/feature_linkage.bedpe", format="bedpe")
a10<-as.data.frame(part1)
a10<-a10[which(a10$NA.2!="peak-peak"),]
{
a10$seqnames<-a10$first.seqnames
start<-as.integer(ifelse(a10$NA.2=="peak-gene", (a10$first.end+a10$first.start)/2, (a10$second.end+a10$second.start)/2))
end<-ifelse(a10$NA.2=="gene-peak",a10$first.start, a10$second.start)
a10$start<-ifelse(start<end, start,end)
a10$end<-ifelse(start<end,end,start)
a10$peak.seqnames<-paste0("chr",ifelse(a10$NA.2=="peak-gene",a10[,1], a10[,6]),sep="")
a10$peak.start<-ifelse(a10$NA.2=="peak-gene",a10[,2], a10[,7])
a10$peak.end<-ifelse(a10$NA.2=="peak-gene",a10[,3], a10[,8])
a<-matrix(unlist(strsplit(a10$name,"><")),ncol=2,byrow=T)
gene<-ifelse(a10$NA.2=="peak-gene",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
peak<-ifelse(a10$NA.2=="gene-peak",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
a10$gene<-gene
a10$peak<-peak
a10$gene.strand<-"*"
a10$peak.strand<-"*"
a10$gene.seqnames<-paste0("chr",ifelse(a10$NA.2=="gene-peak",a10[,1], a10[,6]),sep="")
a10$gene.start<-ifelse(a10$NA.2=="gene-peak",a10[,2], a10[,7])
a10$gene.end<-ifelse(a10$NA.2=="gene-peak",a10[,3], a10[,8])
a10$Peak_pos<-paste0(a10$peak.seqnames,"-",a10$peak.start-1,"-", a10$peak.end)
}# # # format a10 ( ad)
a10<-a10[which(abs(a10$score)>0.2),]
# a10<-a10[,c(26,27,28,22,19,20,21,23,11,12,13,14,15,16,17,18,29)]
annot_peaks<-read.csv("~/scMultiomics_AD/CTpeaks_wK27olap.csv")
annot_peaks$peak<-gsub("X",23, annot_peaks$peak)
annot_peaks$peak<-gsub("Y",24, annot_peaks$peak)
a10_ct<-merge(a10, annot_peaks, by.x="Peak_pos", by.y="peak")
a10_ct<-a10_ct[,c(20,21,22,23,24,13,14,17,18,19,37,38,39,40)]
tmp<-a10_ct
colnames(tmp)[c(8,9,10)]<-c("seqnames","start","end")
colnames(a10_ct)[c(8,9,10)]<-c("signac.seqnames","signac.start","signac.end")
a10.gr<-makeGRangesFromDataFrame(tmp, keep.extra.columns = T)
saveRDS(a10.gr, "~/scMultiomics_AD/Links_AD_signac_formatted_filtered.rds")
write.table(a10_ct, "~/scMultiomics_AD/Links_AD_filtered_withXY.bed", sep="\t", row.names=F, quote=F)
a10_ct$link<-paste0(a10_ct$gene,"-", a10_ct$index)
a10_qval<-data.frame(Qval=a10_ct$NA., link=a10_ct$link)
part1<-import("~/Ctrl/outs/analysis/feature_linkage/feature_linkage.bedpe", format="bedpe")
c10<-as.data.frame(part1)
c10<-c10[which(c10$NA.2!="peak-peak"),]
{
c10$seqnames<-c10$first.seqnames
start<-as.integer(ifelse(c10$NA.2=="peak-gene", (c10$first.end+c10$first.start)/2, (c10$second.end+c10$second.start)/2))
end<-ifelse(c10$NA.2=="gene-peak",c10$first.start, c10$second.start)
c10$start<-ifelse(start<end, start,end)
c10$end<-ifelse(start<end,end,start)
c10$peak.seqnames<-paste0("chr",ifelse(c10$NA.2=="peak-gene",c10[,1], c10[,6]),sep="")
c10$peak.start<-ifelse(c10$NA.2=="peak-gene",c10[,2], c10[,7])
c10$peak.end<-ifelse(c10$NA.2=="peak-gene",c10[,3], c10[,8])
a<-matrix(unlist(strsplit(c10$name,"><")),ncol=2,byrow=T)
gene<-ifelse(c10$NA.2=="peak-gene",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
peak<-ifelse(c10$NA.2=="gene-peak",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
c10$gene<-gene
c10$peak<-peak
c10$gene.strand<-"*"
c10$peak.strand<-"*"
c10$gene.seqnames<-paste0("chr",ifelse(c10$NA.2=="gene-peak",c10[,1], c10[,6]),sep="")
c10$gene.start<-ifelse(c10$NA.2=="gene-peak",c10[,2], c10[,7])
c10$gene.end<-ifelse(c10$NA.2=="gene-peak",c10[,3], c10[,8])
c10$Peak_pos<-paste0(c10$peak.seqnames,"-",c10$peak.start-1,"-", c10$peak.end)
}# # # format c10 ( ad)
c10<-c10[which(abs(c10$score)>0.2),]
c10_ct<-merge(c10, annot_peaks, by.x="Peak_pos", by.y="peak")
c10_ct<-c10_ct[,c(20,21,22,23,24,13,14,17,18,19,37,38,39,40)]
tmp<-c10_ct
colnames(tmp)[c(8,9,10)]<-c("seqnames","start","end")
colnames(c10_ct)[c(8,9,10)]<-c("signac.seqnames","signac.start","signac.end")
c10.gr<-makeGRangesFromDataFrame(tmp, keep.extra.columns = T)
saveRDS(c10.gr, "~/scMultiomics_AD/Links_AD_signac_formatted_filtered.rds")
write.table(c10_ct, "~/scMultiomics_AD/Links_AD_filtered_withXY.bed", sep="\t", row.names=F, quote=F)
c10_ct$link<-paste0(c10_ct$gene,"-", c10_ct$index)
c10_qval<-data.frame(Qval=c10_ct$NA., link=c10_ct$link)
c2.2<-merge(o2,a10_qval, by="link", all.x=T)
c2.2<-merge(c2.2, c10_qval, by="link", all.x=T)
c2.2<-c2.2[,c(1,2,3,4,8,10,11,34,35,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33)]
c2.2$Qval<-ifelse(c2.2$group=="AD", c2.2$Qval.x,
ifelse(c2.2$group=="Ctrl", c2.2$Qval.y, (c2.2$Qval.x+c2.2$Qval.y)/2))
all<-merge(a10_ct, c10_ct, by=c("gene","peak.seqnames","peak.start","peak.end"), all=T)
all$group<-ifelse(is.na(all$score.x)==T, "Ctrl",
ifelse(is.na(all$score.y)==T, "AD","common"))
all$CT<-ifelse(is.na(all$score.x)==T, all$peak_called_in.y, all$peak_called_in.x)
t<-table(all$CT, all$group)
t<-t[order(-t[,2]),]
write.table(t, "~/scMultiomics_AD/Entire_ct_link_tablefiltered.txt", sep="\t")
all$Astrocytes <-grepl("Astrocytes",all$CT)
all$Microglia <-grepl("Microglia", all$CT)
all$Inhibitory <-grepl("Inhibitory", all$CT)
all$Excitatory <-grepl("Excitatory", all$CT)
all$Inhibitory <-grepl("Inhibitory", all$CT)
all$Oligodendrocytes<-grepl("Oligodendrocytes",all$CT)
all$OPCs <-grepl("OPCs", all$CT)
all$Pericytes <-grepl("Pericytes", all$CT)
all$Endothelial <-grepl("Endothelial", all$CT)
tab<-rbind(table(all$Astrocytes , all$group ),
table(all$Microglia , all$group ),
table(all$Inhibitory , all$group ),
table(all$Excitatory , all$group ),
table(all$Inhibitory , all$group ),
table(all$Oligodendrocytes, all$group),
table(all$OPCs , all$group ),
table(all$Pericytes ,all$group ) ,
table(all$Endothelial, all$group) )
tab2<-tab[which(rownames(tab)==TRUE),]
rownames(tab2)<-c("Astrocytes","Microglia","Inhibitory","Excitatory","Inhibitory","Oligodendrocytes","OPCs","Pericytes","Endothelial")
write.table(tab2, "~/scMultiomics_AD/Melt_ct_link_table_filtered_wXY.txt", sep="\t", quote=F)
c2.df<-as.data.frame(c2)
c2.df$link<-paste0(c2.df$seqnames,"-", c2.df$start,"-", c2.df$end,"-",c2.df$gene,"-", c2.df$group)
a10_ct$link<-paste0(a10_ct$peak.seqnames,"-", a10_ct$peak.start,"-", a10_ct$peak.end,"-",a10_ct$gene)
c10_ct$link<-paste0(c10_ct$peak.seqnames,"-", c10_ct$peak.start,"-", c10_ct$peak.end,"-",c10_ct$gene)
t1<-a10_ct[,c(8,9,10,15)]
t2<-c10_ct[,c(8,9,10,15)]
mer<-merge(t1,t2,by="link", all=T)
mer$group<-ifelse(is.na(mer$signac.start.x)==T, "Ctrl",
ifelse(is.na(mer$signac.start.y)==T,"AD","common"))
mer$link<-paste0(mer$link,"-",mer$group)
mer$signac.seqnames<-ifelse(mer$group=="AD", mer$signac.seqnames.x,mer$signac.seqnames.y)
mer$signac.start<-ifelse(mer$group=="AD", mer$signac.start.x,mer$signac.start.y)
mer$signac.end<-ifelse(mer$group=="AD", mer$signac.end.x,mer$signac.end.y)
mer<-mer[,c(1,9,10,11)]
c2.df<-c2.df[,-c(19,20,21)]
t<-merge(c2.df,mer, by="link",all.x=T)
c2<-GRanges(t)
saveRDS(c2, "~/scMultiomics_AD/AD_Ctrl_links_UMI200_annot.rds")
Only genes with at least 200 UMIs were retained in the links
sumR<-rowSums(data[["RNA"]]@counts)
sumR<-sumR[sumR>200]
all_2<-all[which(all$gene %in% names(sumR)),]
all_2$signac.seqnames<-ifelse(all_2$group=="AD", all_2$signac.seqnames.x, all_2$signac.seqnames.y)
all_2$signac.start<-ifelse(all_2$group=="AD", all_2$signac.start.x, all_2$signac.start.y)
all_2$signac.end<-ifelse(all_2$group=="AD", all_2$signac.end.x, all_2$signac.end.y)
all_2$k27<-ifelse(all_2$group=="AD",all_2$k27.x, all_2$k27.y)
s2<-all_2[,c(2,3,4,1,6,16,25,26,27,28,29,30,31,32,33,34,35,36,37,38)]
colnames(s2)[c(1,2,3)]<-c("seqnames","start","end")
saveRDS(s2, "~/scMultiomics_AD/AD_Ctrl_links_UMI200.rds")
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
s2$seqnames<-gsub("chr23","chrX",s2$seqnames)
s2$seqnames<-gsub("chr24","chrY",s2$seqnames)
all.gr<-GRanges(s2)
links_200.annp<-annotatePeak(all.gr, TxDb=txdb,tssRegion=c(-1000,100) )
links_200.anno<-links_200.annp@anno
saveRDS(links_200.anno, "~/scMultiomics_AD/XY_Filtered_AD_Ctrl_links_UMI200_annot.rds")
All peaks were overlapped with h3k27ac earlier before links were called
c2<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_UMI200_annot.rds")
overlap<-findOverlaps(olap, c2) # olap is from peak k27 overlap in FIG1
c2[subjectHits(overlap)]$k27<-olap[queryHits(overlap)]$k27
ahh<-subsetByOverlaps(ccdc, GRanges("chr3:42721851-42721910"))
moitfs1000<-readRDS("~/scMultiomics_AD/motif/motifs1kb_object.rds")
subsetByOverlaps(ahh, moitfs1000@positions$TBR1_MA0802.1)
library(TFBSTools)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg38)
pfm2<-readJASPARMatrix("~/JASPAR_reformat_10X.pfm", matrixClass="PFM")
test<-c("TBR1")
pfm3<-pfm2[grepl(test, names(pfm2))]
motif_pos<-matchMotifs(pfm3,resize(ahh, width=2000, fix="center"), genome = BSgenome.Hsapiens.UCSC.hg38, out="positions", p.cutoff=0.0005)
ad_rna<-read.csv("~/scMultiomics_AD/GRN/AD_MetaCell_Expresion.csv", row.names=1)
ad_atac<-read.csv("~/scMultiomics_AD/GRN/AD_MetaCell_Accessibility.csv", row.names=1)
ctrl_rna<-read.csv("~/scMultiomics_AD/GRN/Ctrl_MetaCell_Expression.csv", row.names=1)
ctrl_atac<-read.csv("~/scMultiomics_AD/GRN/Ctrl_MetaCell_Accessibility.csv", row.names=1)
G_TF.x<-cor.test(as.numeric(ad_rna["TBR1",]), as.numeric(ad_rna["HHATL",]))
G_TF.y<-cor.test(as.numeric(ctrl_rna["TBR1",]), as.numeric(ctrl_rna["HHATL",]))
P_TF.x<-cor.test(as.numeric(ad_atac["chr3-42721353-42722322",]), as.numeric(ad_rna["TBR1",]))
P_TF.y<-cor.test(as.numeric(ctrl_atac["chr3-42721353-42722322",]), as.numeric(ctrl_rna["TBR1",]))
avgAcc<-as.data.frame(AverageExpression(data, features="chr3-42721353-42722322", assay="CTpeaks", group.by="Status"))
avgAcc<-melt(avgAcc)
gene<-"HHATL"
index<-"187627"
ex.gr<-signac.gr
ex.gr<-ex.gr[which(ex.gr$index ==index)]
ex.gr$group2<-ifelse(ex.gr$link=="HHATL-242389", 0, 2)
ex.gr<-ex.gr[which((ex.gr$link %in% b$link) | ex.gr$group2 ==0),]
region<-GRanges(paste0(seqnames(ex.gr)[1],":",min(start(ex.gr)),"-",max(end(ex.gr))))
start(region)<-start(region)-5000
end(region)<-end(region)+5000
min.cutoff<-min(ex.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(ex.gr$score)*2
DefaultAssay(data)<-"CTpeaks"
Idents(data)<-data$Status
cov_plot<-CoveragePlot(object=data, region=region, assay="CTpeaks",annotation=F,window=300,peaks=F, links=F)&
scale_fill_manual(values=c("blue", "red"))
Links(data)<-ex.gr #full
link_plotA <- LinkPlot.height(data, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
ChIP_track1<-BigwigTrack(region,"~/TBR1_AD-B6_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 100)+ylab("AD")+theme(legend.position="none")+scale_fill_manual(values="red")+ylim(0,20)
ChIP_track2<-BigwigTrack(region,"~/TBR1_AD-CTL_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 100)+ylab("Ctrl")+theme(legend.position="none")+ylim(0,20)+scale_fill_manual(values="blue")
motif_pos<-matchMotifs(pfm3,resize(c2[which(c2$index %in% ex.gr$index),], width=2000, fix="center"), genome = BSgenome.Hsapiens.UCSC.hg38, out="positions", p.cutoff=0.0005)
motif_track<-Bed_PeakPlot(resize(motif_pos, width=500, fix="center"),region)+ylab("TBR1")
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
snps<-GRanges(c("chr3:42691682","chr3:42747019","chr3:42741460","chr3:42735109","chr3:42749631","chr3:42728196","chr3:42734484"))
p<-CombineTracks(plotlist=list(cov_plot,ChIP_track2,ChIP_track1,link_plotA, motif_track, gene_plot), heights=c(6,3,3, 2,1,2))
pdf(paste0("~/scMultiomics_AD/TBR1_trio_",index ,"_linkPlot.pdf"), width=6, height=7)
p
dev.off()
TBR1-HHATL link (HHATL-242389) - CSAW diff for TBR1. Only present in Ctrl (also more acc in control) - in CCDC13 intron. Link also to CCDC13 & only AD (score=0.25) - link is common (score=0.47) and present in all cell types besides excitatory - motif is 200bp upstream of peak - gene-TF significant in both & peak-TF significant in both but negative (does not classify as trio) - is also control-specific trio for ZNF189 (242389-ZNF189_MA1725.1-HHAT) - rs3916334 eQTL for HHATL, CCDC13-AS1
redo<-c2[which(c2$CT %in% c("Excitatory","Inhibitory","Microglia","Astrocytes","Oligodendrocytes","OPCs")),]
redo$classify<-paste(seqnames(redo),"-",start(redo),"-",redo$group,sep="")
redo2<-redo[!duplicated(redo$classify),]
redo<-as.data.frame(redo2)
table(redo$k27)
# redo$K27<-ifelse(redo$k27=="None","No_overlap","overlapped")
redo_format<-redo2[,c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21)]
redo_format$Astrocytes.k27<-grepl("Astrocytes",redo_format$k27)
redo_format$Excitatory.k27<-grepl("Neuron",redo_format$k27) | grepl("GLU", redo_format$k27)
redo_format$Inhbitory.k27<-grepl("Neuron",redo_format$k27) | grepl("GABA",redo_format$k27)
redo_format$Microglia.k27<-grepl("Microglia",redo_format$k27)
redo_format$Oligodendrocytes.k27<-grepl("Oligodendrocytes",redo_format$k27)
write.csv(redo_format,"~/scMultiomics_AD/celltype_specific_links_anno_hg38.csv")
tmp<-c2
tmp$classify<-paste(seqnames(tmp),"-",start(tmp),"-",tmp$group,sep="")
tmp$K27<-ifelse(tmp$k27=="None","No_overlap","overlapped")
tmp<-as.data.frame(tmp)
tmp<-tmp[!duplicated(tmp$classify),]
redo_format<-tmp[,c(1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,23,21)]
redo_format$Astrocytes.k27<-grepl("Astrocytes",redo_format$k27)
redo_format$Excitatory.k27<-grepl("Neuron",redo_format$k27) | grepl("GLU", redo_format$k27)
redo_format$Inhbitory.k27<-grepl("Neuron",redo_format$k27) | grepl("GABA",redo_format$k27)
redo_format$Microglia.k27<-grepl("Microglia",redo_format$k27)
redo_format$Oligodendrocytes.k27<-grepl("Oligodendrocytes",redo_format$k27)
write.csv(redo_format,"~/scMultiomics_AD/all_links_anno_hg38.csv")
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )
# get distance to linked-gene TSS so that you can remove links greater than 100kb away from TSS
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$TSS<-ifelse(strand(gtf)=="+", start(gtf), end(gtf))
TSS.df<-data.frame(gene=gtf$gene_name, TSS=gtf$TSS)
tmp<-merge(links, TSS.df, by="gene")
tmp$p1<-abs(tmp$start-tmp$TSS)
tmp$p2<-abs(tmp$end-tmp$TSS)
tmp$distLinkedGene<-ifelse(tmp$p1<tmp$p2, tmp$p1, tmp$p2)
pfm<-readJASPARMatrix("motifs.pfm", matrixClass="PFM")
DefaultAssay(data)<-"CTpeaks"
peaks<-granges(data)
peaks<-resize(peaks, width=500, fix="center") #resize peaks to 500bp before calling motifs
counts<-FeatureMatrix(fragments=Fragments(data), features=peaks,cells=colnames(data))
data[["peaks500"]]<-CreateChromatinAssay(counts=macs2_counts, fragments=Fragments(data))
DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"peaks500"
Annotation(data)<-annotation
# add motif information
data <- AddMotifs(
object = data,
genome = BSgenome.Hsapiens.UCSC.hg38,
pfm = pfm
)
motifs<-Motifs(data)
tmp<-motifs@data
colnames(tmp)<-sapply(strsplit(colnames(tmp), " ",fixed=T), `[`,1)
nz<- simplify2array(apply(tmp, 1, function(x) paste(colnames(dummy)[x != 0], collapse = ",") ))
olap<-findOverlaps(nz, GRanges(annot_peaks), minoverlap=200) # annot_peaks is indexed peaks
nz$index<-annot_peaks[subjectHits(olap),]$index
nz.2<-nz[which(nz$num<=100),] #remove links with peaks that had more than 100 motif calls
#only keep links within 100kb and peaks with motif calls
testing<-tmp[which( (tmp$index %in% nz.2$index) & tmp$distLinkedGene<100000),]
# format for motif-gene-peak links before correlation calculation
mot<-strsplit(nz.2$nz,",")
mot2<-unlist(mot)
c<-c()
for (i in 1:length(nz.2)){
c<-c(c, rep(nz.2[i,]$index, nz.2[i,]$num)) # repeat index for number of motifs in peak
}
df<-data.frame(motif=mot2, index=c)
testing<-merge(testing, df, by="index")
testing$TF<-sapply(strsplit(testing$motif,"_"), `[`,1) # remove motif names
testing$TF<-sapply(strsplit(testing$TF,":"), `[`,1) # if 2 tfs just take first
testing$TF<-toupper(testing$TF) # make all upper case
testing<-testing[which(testing$TF %in% rownames(data[["RNA"]])),]
testing$peak<-paste0(testing$seqnames,"-",testing$start,"-",testing$end)# add peak name in same format as seurat object
m=motifs@positions #positions contains -log10(q-value) for each motif call
for (i in 1:length(m)){
m[[i]]$name<-names(m[i])
}
# format motif as TF name
u<-unlist(m)
u$Peak_motif<-paste0(seqnames(u),"-", start(u),"-", end(u),"-",u$name)
u<-u[!duplicated(u$Peak_motif),]
u$TF=sapply(strsplit(u$name,"_"),`[`,1)
u$TF<-sapply(strsplit(u$TF,":"), `[`,1) # if 2 tfs just take first
u$TF<-toupper(u$TF)
# overlap with indexed peaks. Will use peak index to determine which motif scores go with which peak
olap<-findOverlaps(u, GRanges(annot_peaks), type="within")
u<-u[queryHits(olap)]
u$index<-annot_peaks[subjectHits(olap),]$index
saveRDS(u, "~/scMultiomics_AD/motif/Motif_Score_indexPeaks.rds")
# max accessibility of linked-peak by cluster
data<-AddMetaData(data, fullmeta)
data$subs<-ifelse(is.na(data$subs),substr(data$predicted.id,start=1,stop=3),data$subs)
Idents(data)<-data$subs
avgAcc<-as.data.frame(AverageExpression(data, group.by="subs", assay="CTpeaks", features=testing$peak))
max<-as.data.frame(apply(avgAcc, 1, function(x) max(x)))
max$peak<-rownames(max)
colnames(max)[1]<-"MaxAcc"
# ADD
testing2<-merge(testing, max, by="peak")
u<-as.data.frame(u)
u2u<-u[,c(6,7,9,10,1,2,3)]
colnames(u2u)<-c("Motif_score","motif","TF","index","motif_seqnames","motif_start","motif_end")
testing2<-merge(testing2, u2u, by=c("motif","TF","index"))
testing2$GRN<-paste0(testing2$index,"-",testing2$motif,"-", testing2$gene)
testing3<-testing2[which(testing2$Motif_score>12 & abs(testing2$score)>0.3 & testing2$Qval>20 & testing2$k27_num>0),]
saveRDS(testing3[which(testing3$group!="Ctrl"),], "~/scMultiomics_AD/GRN/a10.rds")
saveRDS(testing3[which(testing3$group!="AD"),], "~/scMultiomics_AD/GRN/c10.rds")
ad<-subset(data, subset=Status=="AD")
ad<-FindMultiModalNeighbors(ad, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50), k.nn=50, return.intermediate=T)
ad<-FindClusters(ad, graph.name="wknn", resolution=100, group.singletons=T)
ad<-NormalizeData(ad)
avgExp<-as.data.frame(AverageExpression(ad, group.by="seurat_clusters", assay="RNA"))
avgAcc<-as.data.frame(AverageExpression(ad, group.by="seurat_clusters",assay="CTpeaks"))
write.csv(avgExp, "~/scMultiomics_AD/GRN/AD_MetaCell_Expresion.csv")
write.csv(avgAcc, "~/scMultiomics_AD/GRN/AD_MetaCell_Accessibility.csv")
ctrl<-subset(data, subset=Status=="Ctrl")
ctrl<-FindMultiModalNeighbors(ctrl, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50), k.nn=50, return.intermediate=T)
ctrl<-FindClusters(ctrl, graph.name="wknn", resolution=100, group.singletons=T)
ctrl<-NormalizeData(ctrl)
avgExp<-as.data.frame(AverageExpression(ctrl, group.by="seurat_clusters", assay="RNA"))
avgAcc<-as.data.frame(AverageExpression(ctrl, group.by="seurat_clusters",assay="CTpeaks"))
write.csv(avgExp, "~/scMultiomics_ctrl/GRN/Ctrl_MetaCell_Expression.csv")
write.csv(avgAcc, "~/scMultiomics_ctrl/GRN/Ctrl_MetaCell_Accessibility.csv")
Jobs were split into batches of 10,000 trios
# !/usr/bin/env Rscript
library(Seurat)
library(Signac)
library(GenomicRanges)
library(WGCNA)
args = commandArgs(trailingOnly=TRUE)
n=args[1]
n=as.numeric(i)
group=args[2]
data<-readRDS("~/scMultiomics_AD/final.rds")
ctrl<-subset(data, subset=Status=="AD")
c10<-readRDS("~/scMultiomics_AD/GRN/a10.rds")
DefaultAssay(ctrl)<-"RNA"
ctrl<-NormalizeData(ctrl)
DefaultAssay(ctrl)<-"CTpeaks"
ctrl<-RunTFIDF(ctrl)
part<-seq(1, nrow(c10), by=10000)
i=part[n]
j=i+9999
cor<-c()
pval<-c()
cor2<-c()
pval2<-c()
for (i in i:j){
geneT<-c10$gene[i]
tfT<-c10$TF[i]
test<-cor.test(as.numeric(data[["RNA"]][geneT,]), as.numeric(data[["RNA"]][tfT,]), method="pearson")
cor<-c(cor,test$estimate)
pval<-c(pval,test$p.value)
peakT<-c10$peak[i]
test2<-cor.test(as.numeric(data[["CTpeaks"]][peakT,]), as.numeric(data[["RNA"]][tfT,]), method="pearson")
cor2<-c(cor2,test2$estimate)
pval2<-c(pval2,test2$p.value)
}
df<-data.frame(`G-TF_cor`=cor, `G-TF_pval`=pval, `P-TF_cor`=cor2, `P-TF_pval`=pval2)
df$grn<-c10[i:j,]$GRN
write.csv(df, paste0("~/scMultiomics_AD/GRN/",group,"_gene_tf_peak_score_",n,".csv"))
# Trio correlations were split into batches of 10,000 trios.
# AD_list is list of AD trio batch outputs
# Ctrl_list is list of Ctrl trio batch outputs
files=as.list(readLines("~/scMultiomics_AD/GRN/AD_list.csv"))
AD_list=lapply(files,function(x){
FT=read.csv(x)})
files=as.list(readLines("~/scMultiomics_AD/GRN/Ctrl_list.csv"))
Ctrl_list=lapply(files,function(x){
FT=read.csv(x)})
AD<-c(1,1,1,1,1,1) #dummy variable to combine data frames
for (i in 1:length(AD_list)){
AD<-rbind(AD,AD_list[[i]])
}
AD<-AD[-1,] # remove dummy
Ctrl<-c(1,1,1,1,1,1)
for (i in 1:length(Ctrl_list)){
Ctrl<-rbind(Ctrl,Ctrl_list[[i]])
}
Ctrl<-Ctrl[-1,]
# add scores back to links
all<-readRDS("~/scMultiomics_AD/GRN/ALL_wMotifScore_MaxAcc.rds" ) #links with motif score and accessibility
mer<-merge(all, AD,by.x="GRN",by.y="grn", all.x=T)
mer<-merge(mer, Ctrl, by.x="GRN", by.y="grn",all.x=T)
mer<-mer[mer$GRN %in% c(AD2$grn, Ctrl2$grn),]
mer2<-mer
# scale motif scoring so that it's normalized for each motif instead of across all motifs. Because repetitive motifs biased towards lower scoring
maxs<-aggregate(Motif_score~motif, mer2, max)
colnames(maxs)[2]<-"Maxs"
mins<-aggregate(Motif_score~motif, mer2, min)
colnames(mins)[2]<-"Mins"
mer2<-merge(mer2, maxs, by="motif")
mer2<-merge(mer2, mins, by="motif")
mer2$Z_Motif_score<- (mer2$Motif_score-mer2$Mins)/(mer2$Maxs-mer2$Mins)
# normalize accessibility
rs<-rowSums(data[["CTpeaks"]])
rs.df<-as.data.frame(rs)
rs.df$peak<-rownames(rs.df)
rs.df$norm<-minmax.normalisation(log2(rs.df$rs)) # normalize on log2 so that outliers/promoters don't left skew distribution
mer2<-merge(mer2,rs.df, by="peak")
a<-mer2[!duplicated(mer2$GRN),]
# Is Gene-TF cor significant?
a$AD_sig<-a$G.TF_pval.x<0.001
a$Ctrl_sig<-a$G.TF_pval.y<0.001
# Is P-TF cor significant?
a$AD_sigP<-a$P.TF_pval.x<0.001
a$Ctrl_sigP<-a$P.TF_pval.y<0.001
# restrict so that only get ones where P-TF and G-TF agree by status
b<-a[which((a$AD_sig==T & a$AD_sigP==T & a$P.TF_cor.x>0.2) | (a$Ctrl_sig==T & a$Ctrl_sigP==T & a$P.TF_cor.y>0.2)),]
# ones that are significant in both
c<-b[which(b$AD_sig==T & b$Ctrl_sig==T),]
c$sign<-sign(c$G.TF_cor.x)==sign(c$G.TF_cor.y)
c2<-c[which(c$sign==F),] # 34 are different directions
# Only significant in AD
d<-b[which(b$AD_sig==T & b$Ctrl_sig==F),]
d2<-d[which(d$Ctrl_sigP==F),] # Peak-TF is also not significant in Ctrl
# Only significant in Ctrl
e<-b[which(b$AD_sig==F & b$Ctrl_sig==T),]
e2<-e[which(e$AD_sigP==F),] # Peak-TF is also not significant in AD
b$category<-ifelse(b$GRN %in% c2$GRN, "DiffDir",
ifelse(b$GRN %in% d2$GRN, "AD-only",
ifelse(b$GRN %in% e2$GRN, "Ctrl-only","Sig")))
trios<-as.data.frame(b)
write.table(trios, "~/scMultiomics_AD/Significant_trios.txt", sep="\t", row.names=F, quote=F)
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-unique(b)
ol_enc=findOverlaps(GRanges(u2),enc)
c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$link),]
d2<-merge(as.data.frame(c2_enc), as.data.frame(u2), by="link", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)
#grouped together
tmp<-d2
#tmp$numCT<-str_count(tmp$CT.x,",")
#tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)
pdf("~/scMultiomics_AD/Trio_peak_annot_encode.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_manual(values=c("dodgerblue","gold","lightpink","orange","red","forestgreen"),labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-unique(b)
all<-import("~/scMultiomics_AD/CT_filtered_peaks.srt.bed")
over<-findOverlaps(all, u2)
all$Linked<-F
all[queryHits(over)]$Linked<-T
all$index<-seq(1, length(all))
ol_enc=findOverlaps(GRanges(all),enc)
b_enc<-all[queryHits(ol_enc),]
b_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
b_enc<-b_enc[!duplicated(b_enc$index),]
all$encLab<-"None"
d2<-c(b_enc, all[!(all$index %in% b_enc$index)])
t<-table(d2$encLab, d2$Linked)
## CTCF
chi<-rbind(t[1,],colSums(t[-1,]))
chisq.test(chi)
pval<-phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F)
FE<-(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2])
## dELS
chi<-rbind(t[2,],colSums(t[-2,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## DNAase
chi<-rbind(t[3,],colSums(t[-3,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## pELS
chi<-rbind(t[4,],colSums(t[-4,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## PLS
chi<-rbind(t[5,],colSums(t[-5,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
names(FE)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
names(pval)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
pval2<-c(2.2e-16,2.2e-16,2.692e-10,5.903e-07,1.227e-13)
names(pval2)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
Heatmap of correlation values of AD and control-specific trios identified in microglia (left) and excitatory/inhibitory neurons (right) for TFs involved in at least 3 trios.
c2$class<-"DiffDir"
d2$class<-"ADonly"
e2$class<-"Ctrlonly"
# changed<-c(c2,d2,e2)
changed<-c(c2,d2,e2)
t<-table(changed$TF)
changed$Name<-paste0(changed$TF,"-", changed$gene)
changed<-changed[which(changed$TF %in% c("MEF2C",names(t[t>10]))),]
changed$CTspec<-ifelse(changed$CT %in% c("Inhibitory","Excitatory,Inhibitory","Inhbitory,Excitatory"), "Neuron",
ifelse(changed$CT %in% c("Microglia","Excitatory","Oligodendrocytes","OPCs","Astrocytes"), changed$CT, "shared"))
# add cluster
clusters<-read.table("~/JASPAR/clusters_motif_names.tab")
clust<-strsplit(clusters$V2,",")
len<-lapply(clust, length)
clust<-unlist(clust)
len<-unlist(len)
names<-c()
for (i in 1:length(len)){
names<-c(names, rep(clusters$V1[i], len[i]))
}
clusters<-data.frame(cluster=names, motif2=clust)
clusters<-clusters[!duplicated(clusters$motif2),]
changed$motif2<-sapply(strsplit(changed$motif, "_"),`[`,1)
changed<-merge(changed, clusters, by="motif2")
changed<-changed[which(abs(changed$score.y)>0.3 | abs(changed$score.x)>0.3),]
changed$GRN<-paste0(changed$index,"-",changed$TF,"-", changed$gene)
changed<-changed[!duplicated(changed$GRN),]
# plot
mat<-as.matrix(cbind(changed$P.TF_cor.x,changed$G.TF_cor.x,changed$P.TF_cor.y, changed$G.TF_cor.y))
colnames(mat)<-rep(c("Peak-TF","Gene-TF"), 2)
rownames(mat)<-changed$TF
ha2<-HeatmapAnnotation(Score=rep(c("Peak-TF","Gene-TF"), 2), col= list(Score=c("Peak-TF"="mediumseagreen","Gene-TF"="orange")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), each=2), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
ha<-c(ha, ha2)
ra<-rowAnnotation(Group=changed$CTspec, TF=changed$TF, Class=changed$class, Cluster=changed$cluster, col=list(Group=c("shared"="grey50","Astrocytes"="darkgoldenrod1","Neuron"="seagreen", "Microglia"="mediumorchid3","Oligodendrocytes"="coral3","Excitatory"="cornflowerblue"), Class=c("ADonly"="red4", "Ctrlonly"="dodgerblue","DiffDir"="gold")))
ht1=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.2,0.1,0.4), plasma(3)), name="r", show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha, right_annotation=ra, use_raster=T,raster_quality=5,row_split=changed$CTspec, row_title=NULL)
lab<-which(changed$gene %in% "SPP1")
pdf("~/scMultiomics_AD/Triangle_correlations.pdf", width=10, height=10)
# rowAnnotation(gene=anno_mark(at=lab, labels=changed[lab,]$Name, side="left", labels_gp=gpar(fontsize=10)))+
ht1
dev.off()
CT="Excitatory"
ra<-rowAnnotation(TF=changed[which(changed$CTspec==CT),]$TF,Class=changed[which(changed$CTspec==CT),]$class, show_legend=T, col=list( Class=c("ADonly"="red4", "Ctrlonly"="dodgerblue","DiffDir"="gold")))
ht1=Heatmap(mat[which(changed$CTspec==CT),], cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.2,0.1,0.4), plasma(3)), name="r", show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha, right_annotation=ra,row_title=NULL)
pdf(paste0("~/scMultiomics_AD/Triangle_correlations_",CT,".pdf"), width=10, height=10)
ht1
dev.off()
changed<-b[which(b$category %in% c("AD-only","Ctrl-only")),]
changed$CTspec<-ifelse(changed$CT %in% c("Inhibitory","Excitatory,Inhibitory","Inhbitory,Excitatory","Excitatory"), "Neuron","shared")
changed<-changed[which(changed$CTspec=="Neuron"),]
t<-table(changed$TF)
changed<-changed[which(changed$TF %in% names(t[t>2])),]
mat<-as.matrix(cbind(changed$P.TF_cor.x,changed$G.TF_cor.x,changed$P.TF_cor.y, changed$G.TF_cor.y))
colnames(mat)<-rep(c("Peak-TF","Gene-TF"), 2)
rownames(mat)<-changed$TF
#ha2<-HeatmapAnnotation(Score=rep(c("Peak-TF","Gene-TF"), 2), col= list(Score=c("Peak-TF"="mediumseagreen","Gene-TF"="orange")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), each=2), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
#ha<-c(ha, ha2)
cols<-c(brewer.pal(name = "Set3", n = 8),brewer.pal(name = "Set1", n = 6), brewer.pal(name = "Pastel1", n = 6))
cols=setNames(cols, unique(changed$TF))
cols[2]<-"red"
cols[11]<-"blue"
cols[9]<-"cyan"
ra<-rowAnnotation(TF=changed$TF, show_legend=T, col=list(TF=cols))
ht1=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.1,0.1,0.4), plasma(3)), name="r", show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha, right_annotation=ra, use_raster=T,raster_quality=5,row_split=changed$category, row_title=NULL)
pdf("~/scMultiomics_AD/Triangle_correlations_Neuron_ExIn.pdf", width=5, height=10)
ht1
dev.off()
avgExp<-as.data.frame(AverageExpression(data, features=unique(b[which(b$TF=="MEF2C"),]$gene), assay="RNA", group.by="predicted.id"))
avgExp$gene<-rownames(avgExp)
avgExp<-avgExp[,c(1,3,4,5,6,7,9)]
colnames(avgExp)<-c("Ast","Exc","Inh","Mic","Olig","OPCs", "gene")
b$CTspec<-ifelse(b$CT %in% c("Excitatory,Inhibitory","Inhibitory,Excitatory"), "Neuron",
ifelse(b$CT %in% c("Microglia","Excitatory","Inhibitory","Oligodendrocytes","OPCs","Astrocytes"), b$CT, "shared"))
mef<-b[which(b$TF=="MEF2C"),]
mef<-mef[order(-abs(mef$score)),]
mef<-mef[!duplicated(mef$gene),]
mef<-mef[,c("gene","CTspec")]
avgExp.g<-merge(avgExp, mef, by="gene")
rownames(avgExp.g)<-avgExp.g$gene
CT<-avgExp.g$CTspec
avgExp.g<-avgExp.g[,c(2,3,4,5,6,7)]
CT<-ifelse(CT %in% c("shared","Astrocytes"), "other",CT)
ra<-rowAnnotation(Group=CT, col=list(Group=c("Microglia"="mediumorchid3","Neuron"="yellow", "other"="grey54", "Excitatory"="cornflowerblue","Inhibitory"="seagreen3")))
mat.scale<-t(scale(t(avgExp.g), center=T))
ht=Heatmap(mat.scale,cluster_columns=F,cluster_rows=T, col=colorRamp2(c(-1.8,0,2),viridis(3)), name="Expression Z", show_column_names=T, show_row_names=F, column_title=NULL, row_names_gp=gpar(fontsize=7),right_annotation=ra, use_raster=T,raster_quality=5)
pdf("~/scMultiomics_AD/MEF2C_trios_heatmap.pdf", width=6, height=7)
ht
dev.off()
df<-data.frame(mef2c=data@assays$RNA@data["MEF2C",], ct=data$predicted.id)
df<-df[which(df$ct %in% c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs")),]
pdf("~/scMultiomics_AD/MEF2C_boxplot.pdf", width=6, height=4)
ggplot(df, aes(x=ct, y=mef2c, fill=ct))+geom_boxplot()+theme_classic()+ylab("MEF2C Normalize Expression")+scale_fill_manual(values=c("darkgoldenrod1", "cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))+xlab("")+theme(legend.position="none")
dev.off()
####################
#GO
g1<-unique(mef[which(mef$Microglia==T),]$gene)
g2<-unique(mef[which(mef$Excitatory==T),]$gene)
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")
go1<-enrichr(g1,dbs)
go2<-enrichr(g2,dbs)
go1.1<-go1[["GO_Biological_Process_2021"]]
go2.1<-go2[["GO_Biological_Process_2021"]]
go1.1<-go1.1[which(go1.1$Adjusted.P.value<0.01),]
go2.1<-go2.1[which(go2.1$Adjusted.P.value<0.01),]
c
go1.1$CT<-"Microglia"
go2.1$CT<-"Neuron"
go<-rbind(go1.1,go2.1)
go$Name<-sapply(strsplit(go$Term, "(G", fixed=T), `[`,1)
go<-go[order(go$CT, go$Odds.Ratio),]
go$Name<-factor(go$Name, levels=go$Name)
go.top<-go %>% group_by(CT) %>% top_n(n=5, wt=Odds.Ratio)
pdf("~/scMultiomics_AD/MEF2C_target_GOenrichment_byCT.pdf")
ggplot(go, aes(x=Odds.Ratio, y=Name, fill=CT))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c("mediumorchid3","yellow2"))
dev.off()
region<-GRanges("chr4:90030247-90031491")
start(region)<-start(region)-1000
end(region)<-end(region)+1000
ChIP_track1<-BigwigTrack(region,"/~/MEF2C_AD-B5_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 300)+ylab("AD")+theme(legend.position="none")+scale_fill_manual(values="red")+ylim(0,10)
ChIP_track2<-BigwigTrack(region,"~/MEF2C_AD-CTL_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 300)+ylab("Ctrl")+theme(legend.position="none")+ylim(0,10)+scale_fill_manual(values="blue")
ChIP_track3<-BigwigTrack(region,"~/MEF2C_OL-NEUN_PT2_test/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 300)+ylab("NEUN")+theme(legend.position="none")+ylim(0,10)+scale_fill_manual(values="grey54")
p<-CombineTracks(plotlist=list(ChIP_track1,ChIP_track2,ChIP_track3), heights=c(2,2,2))
pdf(paste0("~/scMultiomics_AD/MEF2C_ADCtrl_NEUN.pdf"), width=6, height=7)
p
dev.off()
sig<-GRanges(c("chr1:9852501-9852660","chr1:233567051-233567110","chr10:123583851-123583910","chr12:49565251-49565360","chr14:70098351-70098460","chr14:88824601-88824710","chr14:88824601-88824710","chr19:49165501-49165510"))
subsetByOverlaps(b, sig[7])$GRN
2
mef_neurn=import("~/MEF2C_OL-NEUN_PT2_test/peak/idr_reproducibility/idr.optimal_peak.regionPeak.gz",format="bed",extraCols=extraCols_narrowPeak)
mef=import("~/MEF2C_AD-CTL_Output/peak/idr_reproducibility/idr.optimal_peak.regionPeak.gz",format="bed",extraCols=extraCols_narrowPeak)
mef2<-import("~/MEF2C_AD-CTL_Output//peak/idr_reproducibility/idr.optimal_peak.regionPeak.gz",format="bed",extraCols=extraCols_narrowPeak)
mef2c<-reduce(c(mef_neurn,mef))
b_mef<-subsetByOverlaps(b[which(b$TF=="MEF2C"),], mef2c)
gene<-"TRPM4"
ex.gr<-signac.gr
ex.gr<-ex.gr[which(ex.gr$gene ==gene)]
ex.gr<-ex.gr[which(ex.gr$group2 %in% c(0,2))]
ex.gr$group2<-ifelse(ex.gr$index==168217, 0, ex.gr$group2)
region<-GRanges(paste0(seqnames(ex.gr)[1],":",min(start(ex.gr)),"-",max(end(ex.gr))))
start(region)<-start(region)-5000
end(region)<-end(region)+5000
min.cutoff<-min(ex.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(ex.gr$score)*2
DefaultAssay(data)<-"CTpeaks"
Idents(data)<-data$Status
cov_plot<-CoveragePlot(object=data, region=region, assay="CTpeaks",annotation=F,window=300,peaks=F, links=F)&
scale_fill_manual(values=c("blue", "red"))
Links(data)<-ex.gr #full
link_plotA <- LinkPlot.height(data, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
ChIP_track1<-BigwigTrack(region,"~/MEF2C_AD-B5_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 100)+ylab("AD")+theme(legend.position="none")+scale_fill_manual(values="red")+ylim(0,10)
ChIP_track2<-BigwigTrack(region,"~/MEF2C_AD-CTL_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 100)+ylab("Ctrl")+theme(legend.position="none")+ylim(0,10)+scale_fill_manual(values="blue")
motif_track<-Bed_PeakPlot(resize(motifs@positions$MEF2C_MA0497.1, width=500, fix="center"),region)+ylab("MEF2C")
motif2<-Bed_PeakPlot(resize(motifs@positions$ZEB1_MA0103.3, width=500, fix="center"),region)+ylab("ZEB1")
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,ChIP_track2,ChIP_track1,link_plotA, motif_track, gene_plot), heights=c(6,3,3, 2,1,2))
pdf(paste0("~/scMultiomics_AD/MEF2C_trio_",gene ,"_linkPlot.pdf"), width=6, height=7)
p
dev.off()
extraCols_narrowPeak <- c(signalValue = "numeric", pValue = "numeric",qValue = "numeric", peak = "integer")
ZEB1_DLPFC=import(con="~/ZEB1_DLPFC-NEUN_Output/peak/idr_reproducibility/idr.optimal_peak.regionPeak.gz",format="bed",extraCols=extraCols_narrowPeak)
ZEB1_DLPFC$Tissue="DLPFC"
ZEB1_trio<-subsetByOverlaps(changed[which(changed$TF=="ZEB1"),],ZEB1_DLPFC)
ZEB1_trio<-changed[which(changed$TF=="ZEB1"),]
meh<-unique(b[which(b$TF=="ZEB1" & (b$Excitatory==T | b$Inhibitory==T)),])
meh$olapK27<-F
olap<-findOverlaps(meh, ZEB1_DLPFC)
meh[unique(queryHits(olap))]$olapK27<-T
table( meh$olapK27, meh$category)
###Fig4G,H
links<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_filt0.02_wPerm_wQval.rds")
signac_form<-as.data.frame(links)[,-c(2,3,4)]
colnames(signac_form)[c(19,20,21)]<-c("seqnames","start","end")
signac_form$seqnames<-paste0("chr",signac_form$seqnames)
signac_form$score<-ifelse(signac_form$group=="common", (signac_form$score.x+ signac_form$score.y)/2,
ifelse(signac_form$group=="Ctrl",signac_form$score.y, signac_form$score.x))
signac.gr<-GRanges(signac_form)
signac.gr$group2<-ifelse(signac.gr$link %in% ZEB1_trio$link,0,
ifelse(signac.gr$link %in% b$link, 2, 1))
neuron<-subset(data, predicted.id %in% c("Excitatory","Inhibitory"))
Idents(neuron)<-neuron$Status
ex.gr<-signac.gr[which(signac.gr$CT %in% c("Excitatory","Inhibitory","Excitatory,Inhibitory","Inhibitory,Excitatory")),]
ex.gr<-signac.gr
ex.gr<-ex.gr[which(ex.gr$gene =="GABRA5")]
ex.gr<-ex.gr[which(ex.gr$group2 %in% c(0,2))]
ex.gr<-ex.gr[-1,]
region<-GRanges(paste0(seqnames(ex.gr)[1],":",min(start(ex.gr)),"-",max(end(ex.gr))))
start(region)<-start(region)-5000
end(region)<-end(region)+5000
min.cutoff<-min(ex.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(ex.gr$score)*2
DefaultAssay(neuron)<-"CTpeaks"
cov_plot<-CoveragePlot(object=neuron, region=region, assay="CTpeaks",annotation=F,window=300,peaks=F, links=F)&
scale_fill_manual(values=c("blue", "red"))
Links(data)<-ex.gr #full
link_plotA <- LinkPlot.height(data, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
load(file="~/Nott_k27ac.rda")
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
Nott_k27ac_38<-liftOver(Nott_k27ac, ch)
Nott_k27ac_38<-unlist(Nott_k27ac_38) #lost 1,700
Nott_neuron<-Nott_k27ac_38[which(Nott_k27ac_38$cell_type =="Neuron"),]
k27_track<-Bed_PeakPlot(Nott_neuron, region)+ylab("Neuron H3K27ac")
ChIP_track<-BigwigTrack(region,"~/ZEB1_DLPFC-NEUN_Output/signal/pooled-rep/basename_prefix.pooled_x_basename_prefix.pooled.pval.signal.bigwig",smooth = 300)+ylab("ZEB1 ChIP")+theme(legend.position="none")
motif_track<-Bed_PeakPlot(resize(motifs@positions$ZEB1_MA0103.3, width=500, fix="center"),region)+ylab("ZEB1 motif")
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,ChIP_track,link_plotA, k27_track, motif_track, gene_plot), heights=c(6,3,2,1,1,2))
pdf(paste0("~/scMultiomics_AD/ZEB1_trio_GABRA5_linkPlot_zoomed.pdf"), width=6, height=7)
p
dev.off()
pdf(paste0("~/scMultiomics_AD/ZEB1_trio_GABRA5_linkPlot_justchip.pdf"), width=6, height=3)
ChIP_track
dev.off()
###########
# BY cell type
donkey<-subset(data, predicted.id %in% c("Excitatory","Inhibitory","Microglia"))
DefaultAssay(donkey)<-"CTpeaks"
Idents(donkey)<-donkey$predicted.id
cov_plot<-CoveragePlot(object=donkey, region=region, assay="CTpeaks",annotation=F,window=300,peaks=F, links=F)&scale_fill_manual(values=c("cornflowerblue","mediumorchid3", "seagreen3"))
p<-CombineTracks(plotlist=list(cov_plot,ChIP_track,link_plotA, k27_track, motif_track, gene_plot), heights=c(6,3,2,1,1,2))
pdf(paste0("~/scMultiomics_AD/ZEB1_trio_GABRA5_linkPlot_byCT.pdf"), width=6, height=7)
p
dev.off()
df<-data.frame(ZEB1=data@assays$RNA@data["ZEB1",], GABRA5=data@assays$RNA@data["GABRA5",], Status=data$Status, ct=data$predicted.id, subs=data$subs)
df2<-df[which(df$ct %in% c("Excitatory", "Inhibitory")),]
stat.test <- df2 %>%
group_by(subs) %>%
t_test(ZEB1 ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "subs")
stat.test2 <- df2 %>%
group_by(subs) %>%
t_test(GABRA5 ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test2 <- stat.test2 %>% add_xy_position(x = "subs")
pdf("~/scMultiomics_AD/ZEB1_GABRA5_neuronSubtypes.pdf", width=8, height=5)
ggplot(df2, aes(x=subs,y=ZEB1,fill=Status))+geom_boxplot()+theme_classic()+stat_pvalue_manual(stat.test, label="p")
ggplot(df2, aes(x=subs,y=GABRA5,fill=Status))+geom_boxplot()+theme_classic()
dev.off()
trio_val<-subsetByOverlaps(b, gRNA[which(gRNA$Significant==T),])
noquote(t(t(trio_val$GRN)))
3 trios that also overlap significant luciferase region
“269545-MEF2C_MA0497.1-CCSER1”
“138088-RARB_MA1552.1-JPH3”
“213871-SOX10_MA0442.2-ADAMTS1”
c2.df<-as.data.frame(c2)
c2.df$score<-ifelse(c2.df$group=="AD", c2.df$score.x,
ifelse(c2.df$group=="Ctrl", c2.df$score.y, (c2.df$score.x+ c2.df$score.y)/2))
c2.df<-c2.df[order(-c2.df$score),]
c2.df<-c2.df[!duplicated(c2.df$gene),]
c2.df$peak<-paste0(c2.df$seqnames,"-",c2.df$start-1,"-", c2.df$end)
c2.df<-c2.df[!duplicated(c2.df$peak),]
c2.df$dir<-ifelse(c2.df$score<0, "down","up")
c2.df<-c2.df[which(c2.df$dir=="up"),]
acc<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","Status"),assay="CTpeaks",features=c2.df$peak))
gex<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","Status"), assay="RNA", features=c2.df$gene))
acc<-acc[,-c(3,4,15,16)]
gex<-gex[,-c(3,4,15,16)]
acc.scale<-t(apply(acc,1, scale))
gex.scale<-t(apply(gex,1,scale))
colnames(acc.scale)<-colnames(acc)
colnames(gex.scale)<-colnames(gex)
acc.scale<-acc.scale[complete.cases(gex.scale),]
gex.scale<-gex.scale[complete.cases(gex.scale),]
c2.df<-c2.df[which(c2.df$gene %in% rownames(gex.scale)),]
ha2<-HeatmapAnnotation(celltype=rep(c("Ast","Exc","Inh","Mic","Oli","OPC"), each=2), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), 6), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
ha<-c( ha, ha2)
ra<-rowAnnotation(Group=c2.df$group, `Score`=c2.df$score, col=list(Group=c("common"="grey50","AD"="red","Ctrl"="blue"), Dir=c("up"="yellow","down"="purple"), `Score`=colorRamp2(c(0,0.25,0.75,1),c("white",rev(mako(3))))))
ht1=Heatmap(acc.scale, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(quantile(acc.scale, probs=0.05),0,2), c("## 290230","## CC4678FF","## F0F921FF" )), name="Accessibility Z", show_column_names=F, show_row_names=F, column_title=NULL, top_annotation=ha, row_dend_reorder=T, use_raster=T,raster_quality=5,row_split=c2.df$group)
ht2=Heatmap(gex.scale, cluster_rows=F,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(quantile(gex.scale, probs=0.05, na.rm=T),0,quantile(gex.scale, probs=0.95, na.rm=T)),viridis(3)), name="Expression Z", show_column_names=F, show_row_names=F, column_title=NULL,row_names_gp = gpar(fontsize = 3), top_annotation=ha, right_annotation=ra, row_title_gp=gpar(fontsize=0), use_raster=T, raster_quality=5)
pdf("~/scMultiomics_AD/Acc_GEX_link_filtered.pdf")
ht1+ht2
dev.off()
a10.gr<-c2.2[which(c2.2$Freq>=80 & c2.2$group !="Ctrl"),]
c10.gr<-c2.2[which(c2.2$Freq>=80 & c2.2$group !="AD"),]
t1<-as.data.frame(table(a10.gr$gene))
t2<-as.data.frame(table(c10.gr$gene))
t<-merge(t1,t2,by="Var1",all=T)
colnames(t)<-c("Var1","AD","Ctrl")
t<-melt(t)
p1<-as.data.frame(table(a10.gr$start))
p2<-as.data.frame(table(c10.gr$start))
p<-merge(p1,p2,by="Var1",all=T)
colnames(p)<-c("Var1","AD","Ctrl")
p<-melt(p)
pdf("~/scMultiomics_AD/AD_linksPerGene_filt.pdf", height=5,width=7)
a=ggplot(t, aes(x=log(value), color=variable,fill=variable))+geom_density(aes(y = ..count..), alpha=0.2, adjust=2)+xlab("log(Links per Gene)")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none")+ylab("counts")+
geom_vline(xintercept=log(4), linetype="dashed" )+geom_vline(xintercept=log(13))+geom_vline(xintercept=log(28), linetype="dashed")
b=ggplot(p, aes(x=log(value), color=variable,fill=variable))+geom_density( aes(y = ..count..),alpha=0.2, adjust=4)+xlab("log(Links per Peak)")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none") +ylab("counts")+
geom_vline(xintercept=log(1), linetype="dashed" )+geom_vline(xintercept=log(2))+geom_vline(xintercept=log(3), linetype="dashed")
a+b
dev.off()
t$value2<-ifelse(t$value>39,40,t$value)
p$value2<-ifelse(p$value>3, 4,p$value)
pdf("~/scMultiomics_AD/AD_linksPerGene_Perm80.pdf", height=5,width=7)
a=ggplot(t, aes(x=value2, color=variable,fill=variable))+geom_histogram(position="dodge", alpha=0.2, binwidth=2)+xlab("Links per Gene")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none")+ylab("counts")+
scale_x_continuous(breaks=c(1,20,40),labels=c("1","20",">40"))
b=ggplot(p, aes(x=value2, color=variable,fill=variable))+geom_bar(position="dodge", alpha=0.2)+xlab("Links per Peak")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none") +ylab("counts")+scale_x_continuous(breaks=c(1,2,3,4),labels=c("1","2","3",">3"))
a+b
dev.off()
new_links<-links_200.anno
a10_ct<-new_links[which(new_links$group !="Ctrl"),]
groups<-strsplit(a10_ct$CT, split=",")
t<-lapply(groups, function(i) i[[1]][!(i[[1]] %in% c("Endothelial","Pericytes"))])
t<-lapply(groups, function(i) i[order(i)])
t<-lapply(t, paste, collapse="&")
t<-as.character(t)
a10_ct$groups<-t
c10_ct<-new_links[which(new_links$group !="AD"),]
groups<-strsplit(c10_ct$CT, split=",")
t<-lapply(groups, function(i) i[[1]][!(i[[1]] %in% c("Endothelial","Pericytes"))])
t<-lapply(groups, function(i) i[order(i)])
t<-lapply(t, paste, collapse="&")
t<-as.character(t)
c10_ct$groups<-t
tab<-table(a10_ct$groups)
tab<-head(tab[order(-tab)], n=15)
tab2<-table(c10_ct$groups)
tab2<-head(tab2[order(-tab2)], n=15)
tab<-tab[names(tab) %in% names(tab2)]
tab2<-tab2[names(tab2) %in% names(tab)]
tab2<-tab2[names(tab)] ## do same order
u<-upset(fromExpression(tab), nintersects=12, nsets=6, sets.bar.color=c(
"cornflowerblue","mediumorchid3","seagreen3","coral3","darkgoldenrod1","firebrick"), main.bar.color="red3", text.scale=1.3 , mainbar.y.label="AD Links", mainbar.y.max=60000, show.numbers=F, keep.order=T, order.by=c("freq","degree"))
pdf("~/scMultiomics_AD/AD_FL_upset_filt_XY.pdf", width=8, height=5)
u
dev.off()
u2<-upset(fromExpression(tab2), nintersects=12, nsets=6, sets.bar.color=c(
"cornflowerblue","seagreen3","coral3","darkgoldenrod1","mediumorchid3","firebrick"), main.bar.color="blue", text.scale=1.3, mainbar.y.label="Ctrl Links", mainbar.y.max=60000, show.numbers=F, keep.order=T, order.by=c("freq","degree"))
pdf("~/scMultiomics_AD/Ctrl_FL_upset_filt_XY.pdf", width=8, height=5)
u2
dev.off()
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-unique(c2.2)
ol_enc=findOverlaps(GRanges(u2),enc)
c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$link),]
d2<-merge(c2_enc, u2, by="link", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)
## grouped together
tmp<-d2
## tmp$numCT<-str_count(tmp$CT.x,",")
## tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)
pdf("~/scMultiomics_AD/Link_annot_byCT_filtered_encode2.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_manual(values=c("dodgerblue","gold","lightpink","orange","red","forestgreen"),labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()
## up and down split
d2$score<-ifelse(d2$group=="AD",d2$score.x, d2$score.y)
tmp<-d2[which(d2$score>0),]
tmp$numCT<-str_count(tmp$CT,",")
tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$ann, tmp$Astrocytes),2)
m<-prop.table(table(tmp$ann, tmp$Microglia),2)
e<-prop.table(table(tmp$ann, tmp$Excitatory),2)
i<-prop.table(table(tmp$ann, tmp$Inhibitory),2)
o<-prop.table(table(tmp$ann, tmp$Oligodendrocytes),2)
p<-prop.table(table(tmp$ann, tmp$OPCs),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)
tmp<-d2[which(d2$score<0),]
tmp$numCT<-str_count(tmp$CT,",")
tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$ann, tmp$Astrocytes),2)
m<-prop.table(table(tmp$ann, tmp$Microglia),2)
e<-prop.table(table(tmp$ann, tmp$Excitatory),2)
i<-prop.table(table(tmp$ann, tmp$Inhibitory),2)
o<-prop.table(table(tmp$ann, tmp$Oligodendrocytes),2)
p<-prop.table(table(tmp$ann, tmp$OPCs),2)
df2<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df2)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df2<-as.data.frame(df2)
df2$annot<-rownames(df2)
df2<-melt(df2)
df$dir<-"pos"
df2$dir<-"neg"
df<-rbind(df, df2)
pdf("~/scMultiomics_AD/Link_annot_byCT_filtered_encode.pdf", width=5, height=8)
ggplot(df, aes(y=value, x=dir, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_brewer(palette="Dark2")+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))+facet_wrap(~variable)
dev.off()
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
c2<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_filt0.02_wPerm_wQval.rds")
c2.2<-GRanges(c2)
u2<-unique(c2.2)
all<-import("~/scMultiomics_AD/CT_filtered_peaks.srt.bed")
over<-findOverlaps(all, u2)
all$Linked<-F
all[queryHits(over)]$Linked<-T
all$index<-seq(1, length(all))
ol_enc=findOverlaps(GRanges(all),enc)
c2_enc<-all[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$index),]
all$encLab<-"None"
d2<-c(c2_enc, all[!(all$index %in% c2_enc$index)])
t<-table(d2$encLab, d2$Linked)
## CTCF
chi<-rbind(t[1,],colSums(t[-1,]))
chisq.test(chi)
pval<-phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F)
FE<-(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2])
## dELS
chi<-rbind(t[2,],colSums(t[-2,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## DNAase
chi<-rbind(t[3,],colSums(t[-3,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## pELS
chi<-rbind(t[4,],colSums(t[-4,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## PLS
chi<-rbind(t[5,],colSums(t[-5,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
names(FE)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
names(pval)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
CTCF 2.2<e-16 DELS=3.058e-09
Total ATAC UMIs,1199259332 Mean ATAC UMIs per cell,11385 Mean Peaks per cell,5499 Total ATAC Peaks,150561 Total GEX UMIs,1984992816 Mean GEX UMIs per cell,18845 Mean Genes per cell,3276 Total Genes Expressed,27264
c2$ATAC_num<-str_count(c2$CT, ",")+1
c2$k27_num<-str_count(c2$k27, "&")+1
Astrocytes<-unique(c2[grepl("Astrocytes",c2$CT),])
Astrocytes$found<-grepl("Astrocytes",Astrocytes$k27)
Astrocytes$specific<-ifelse(Astrocytes$ATAC_num==1, TRUE, F)
Atab<-as.data.frame(table(Astrocytes$specific, Astrocytes$found))
colnames(Atab)<-c("Specific","K27","Freq")
Atab$ct<-"Ast"
Excitatory<-unique(c2[grepl("Excitatory",c2$CT),])
Excitatory$found<-grepl("GLU",Excitatory$k27) | grepl("Neuron", Excitatory$k27)
Excitatory$specific<-ifelse(Excitatory$ATAC_num==1, TRUE, F)
Etab<-as.data.frame(table(Excitatory$specific, Excitatory$found))
colnames(Etab)<-c("Specific","K27","Freq")
Etab$ct<-"Exc"
Inhibitory<-unique(c2[grepl("Inhibitory",c2$CT),])
Inhibitory$found<-grepl("GABA",Inhibitory$k27) | grepl("Neuron", Inhibitory$k27)
Inhibitory$specific<-ifelse(Inhibitory$ATAC_num==1, TRUE, F)
Itab<-as.data.frame(table(Inhibitory$specific, Inhibitory$found))
colnames(Itab)<-c("Specific","K27","Freq")
Itab$ct<-"Inh"
Oligodendrocytes<-unique(c2[grepl("Oligodendrocytes",c2$CT),])
Oligodendrocytes$found<-grepl("Oligodendrocytes",Oligodendrocytes$k27)
Oligodendrocytes$specific<-ifelse(Oligodendrocytes$ATAC_num==1, TRUE, F)
Otab<-as.data.frame(table(Oligodendrocytes$specific, Oligodendrocytes$found))
colnames(Otab)<-c("Specific","K27","Freq")
Otab$ct<-"Oli"
Microglia<-unique(c2[grepl("Microglia",c2$CT),])
Microglia$found<-grepl("Microglia",Microglia$k27)
Microglia$specific<-ifelse(Microglia$ATAC_num==1, TRUE, F)
Mtab<-as.data.frame(table(Microglia$specific, Microglia$found))
colnames(Mtab)<-c("Specific","K27","Freq")
Mtab$ct<-"Mic"
df<-rbind(Atab, Etab, Itab, Otab, Mtab)
df$color<-paste0(df$Specific,"-",df$K27)
df$color<-ifelse(df$color=="FALSE-FALSE","TRUE-FALSE", df$color)
pdf("~/scMultiomics_AD/Linked_peaks_olapk27_barPlot_filtered_XY.pdf")
ggplot(df, aes(y=Freq/1000, x=Specific, fill=K27))+geom_bar(stat="identity", color="grey40")+ theme_classic()+ylab("## Linked peaks (x1,000)") + xlab("")+scale_x_discrete(labels=c("Shared", "Specific"))+
theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15))+facet_wrap(~ct, ncol=5, strip.position="bottom") +scale_fill_manual(labels=c("No Overlap","Overlap H3K27ac"),values=c("grey80","darkseagreen2"))
dev.off()
df$ctSpec<-ifelse(df$Specific==T, "Specific","Shared")
pdf("~/scMultiomics_AD/Linked_peaks_olapk27_barPlot_filtered_split.pdf")
ggplot(df, aes(y=Freq/1000, x=ct, fill=K27))+geom_bar(stat="identity", color="grey40")+ theme_classic()+ylab("## Linked peaks (x1,000)") + xlab("")+
theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15))+scale_fill_manual(labels=c("No Overlap","Overlap H3K27ac"),values=c("grey80","darkseagreen2"))+facet_wrap(~ctSpec, scales="free")
dev.off()
df2<-df[which(df$Specific==T),]
rr<-c()
for(i in seq(1,9, by=2)){
r<-df2[i+1,3]/(df2[i,3]+df2[i+1,3])
rr<-c(rr,r)
}
## mean specific 0.7870
## mean non-specific 0.6746
Atab<-as.data.frame(table(Astrocytes$found))
colnames(Atab)<-c("K27","Freq")
Atab$ct<-"Ast"
Etab<-as.data.frame(table( Excitatory$found))
colnames(Etab)<-c("K27","Freq")
Etab$ct<-"Exc"
Itab<-as.data.frame(table(Inhibitory$found))
colnames(Itab)<-c("K27","Freq")
Itab$ct<-"Inh"
Otab<-as.data.frame(table( Oligodendrocytes$found))
colnames(Otab)<-c("K27","Freq")
Otab$ct<-"Oli"
Mtab<-as.data.frame(table( Microglia$found))
colnames(Mtab)<-c("K27","Freq")
Mtab$ct<-"Mic"
df2<-rbind(Atab, Etab, Itab, Otab, Mtab)
rr<-c()
for(i in seq(1,9, by=2)){
r<-df2[i+1,2]/(df2[i,2]+df2[i+1,2])
rr<-c(rr,r)
}
signac_form<-as.data.frame(c2)[,-c(1,2,3)]
colnames(signac_form)[c(19,20,21)]<-c("seqnames","start","end")
signac_form$seqnames<-paste0("chr",signac_form$seqnames)
signac_form$score<-ifelse(signac_form$group=="common", (signac_form$score.x+ signac_form$score.y)/2,
ifelse(signac_form$group=="Ctrl",signac_form$score.y, signac_form$score.x))
signac.gr<-GRanges(signac_form)
signac.gr$group2<-ifelse(signac.gr$group=="AD",0,
ifelse(signac.gr$group=="common",1,2))
gene="KANSL1"
tmp.gr<-signac.gr[which(signac.gr$gene==gene),]
ex.gr<-tmp.gr[which(tmp.gr$Excitatory==T | tmp.gr$Inhibitory==T),]
min.cutoff<-min(tmp.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(tmp.gr$score)*2
neuron<-subset(data, predicted.id %in% c("Excitatory","Inhibitory"))
Idents(neuron)<-neuron$Status
region<-GRanges(paste0(seqnames(tmp.gr)[1],":",min(start(tmp.gr))-1000,"-",max(end(tmp.gr)+1000)))
neuron$bin<-paste0(neuron$predicted.id, "-",neuron$Status)
Idents(neuron)<-neuron$bin
Idents(neuron)<-factor(Idents(neuron), levels=c("Excitatory-AD","Excitatory-Ctrl","Inhibitory-AD","Inhibitory-Ctrl"))
DefaultAssay(neuron)<-"CTpeaks"
cov_plot<-CoveragePlot(object=neuron, region=region, assay="CTpeaks",annotation=F, idents=c("Excitatory-AD","Excitatory-Ctrl"),window=5000,peaks=F, links=F, region.highlight=
resize(GRanges("chr17:46224763-46225789"), width=8000,fix="center"))&
scale_fill_manual(values=c("dodgerblue4", "cornflowerblue"))
Links(data)<-ex.gr ## full
link_plotA <- LinkPlot.height(data, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
bgp<-Bed_GWASPlot(GRanges("chr17:46224763-46225789"), region)+ylab("")
#dap_plot<-Bed_PeakPlot(GRanges(c2_DAP[which(c2_DAP$isDAP==T & (c2_DAP$celltype=="Excitatory" | c2_DAP$celltype=="Inhibitory") ),]), region)
## I went step by step thorugh function in plot dependencies
enc.annot<-peak.plot+ scale_fill_manual(values=c("dodgerblue","gold","lightpink","orange","red",),labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS"))
## expr_plot <- ExpressionPlot(object = data,features = gene,assay = "RNA", idents=c("Astrocytes","Excitatory","Inhibitory","Oligodendrocytes","Microglia"))&scale_fill_manual(limits=c("Astrocytes","Excitatory","Inhibitory","Oligodendrocytes","Microglia"),values=c("darkgoldenrod1","cornflowerblue","seagreen3","coral3","mediumorchid3"))
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,link_plotA, bgp,gene_plot), heights=c(6,2,1,2))
pdf(paste0("~/scMultiomics_AD/",gene,"_linkPlot.pdf"), width=8, height=8)
p
dev.off()
kansl1<-data.frame(kansl1=as.vector(data[["RNA"]]["KANSL1",]), celltype=data$predicted.id, Status=data$Status)
pdf("~/scMultiomics_AD/KANSL1_boxplots.pdf", width=8,height=2)
ggplot(kansl1[!(kansl1$celltype %in% c("Endothelial","Pericytes")),], aes(x=Status,y=kansl1, fill=celltype))+geom_boxplot()+theme_classic()+facet_wrap(~celltype, ncol=6)+ylab("KANSL1 expr.")+scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))+theme(legend.position="none")
dev.off()
tab<-prop.table(table(data$predicted.id, data$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
stat.test <- df %>%
group_by(Var2) %>%
t_test(Freq ~ Status) %>%
adjust_pvalue() %>%
add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")
pdf("~/scMultiomics_AD/ALL_CT_BOXPLOT_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()
md<-data@meta.data
cells=rownames(md[which(md$subs !="Exc_10"),])
pdf("~/scMultiomics_AD/All_subclusters_WNN.pdf", width=15, height=5)
p1<-DimPlot(data, cells=cells,reduction="harmony.rna.umap",group.by="subs", pt.size=0.1,cols=c(
"Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1",
"End"="gray54",
"Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
"Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1",
"Mic_0"="purple", "Mic_1"="darkorchid4",
"Mic_2"="magenta", "Mic_3"="magenta4",
"Mic_4"="orchid",
"Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1",
"OPC"="firebrick", "Per"="lightpink"))&ggtitle("RNA")& theme(legend.position="none", plot.title = element_text(size=30))
p2<-DimPlot(data, cells=cells,reduction="harmony.atac.umap",group.by="subs", pt.size=0.1,cols=c(
"Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1",
"End"="gray54",
"Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
"Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1",
"Mic_0"="purple", "Mic_1"="darkorchid4",
"Mic_2"="magenta", "Mic_3"="magenta4",
"Mic_4"="orchid",
"Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1",
"OPC"="firebrick", "Per"="lightpink"))&ggtitle("ATAC")& theme(legend.position="none",plot.title = element_text(size=30))
p3<-DimPlot(data, cells=cells,reduction="wnn.umap",group.by="subs", pt.size=0.1,cols=c(
"Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1",
"End"="gray54",
"Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
"Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1",
"Mic_0"="purple", "Mic_1"="darkorchid4",
"Mic_2"="magenta", "Mic_3"="magenta4",
"Mic_4"="orchid",
"Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1",
"OPC"="firebrick", "Per"="lightpink"))&ggtitle("WNN")& theme(legend.position="none", plot.title = element_text(size=30))
p4<-FeaturePlot(data,cells=cells, features="ATAC.weight", reduction="wnn.umap")+ggtitle("RNA-ATAC weight")+scale_color_viridis()+ theme(plot.title = element_text(size=30))
plot_grid(p1,p2,p3, ncol=3)
dev.off()
pdf("~/scMultiomics_AD/All_subclusters_WNN_part2.pdf", width=10, height=10)
p4
p1+theme(legend.position="bottom")
dev.off()
p1<-DimPlot(data, cells=cells,reduction="wnn.umap",group.by="subs", label=F, pt.size=0.1,cols=c(
"Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
"Ast_3"="goldenrod","Ast_4"="goldenrod1",
"End"="gray54",
"Exc_0"="royalblue1", "Exc_1"="deepskyblue2", "Exc_2"="steelblue1", "Exc_3"="steelblue3", "Exc_4"="cornflowerblue", "Exc_5"="royalblue3", "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
"Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
"Inh_0"="palegreen", "Inh_1"="springgreen", "Inh_2"="darkolivegreen3", "Inh_3"="seagreen3", "Inh_4"="forestgreen", "Inh_5"="darkseagreen", "Inh_6"="palegreen4", "Inh_7"="darkolivegreen1",
"Mic_1"="darkorchid4",
"Mic_2"="magenta", "Mic_3"="magenta4",
"Mic_4"="orchid", "Mic_0"="purple",
"Oli_0"="lightsalmon", "Oli_1"="darksalmon",
"Oli_2"="coral", "Oli_3"="brown1",
"OPC"="firebrick", "Per"="lightpink"))&ggtitle("Cell type")& theme(legend.position="none")
p2<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Status",pt.size=0.1, cols=c("AD"="firebrick", "Ctrl"="dodgerblue"))&ggtitle("Disease State")& theme(legend.position="bottom")
p3<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Sex", pt.size=0.1, cols=c("F"="khaki", "M"="seagreen"))&ggtitle("Sex")& theme(legend.position="bottom")
p4<-FeaturePlot(data, cells=cells,reduction="wnn.umap", features="Age", pt.size=0.1)&ggtitle("Age")& theme(legend.position="bottom")&scale_color_viridis()
p5<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Braak", pt.size=0.1, cols=c("0"="dodgerblue","4"="tomato", "5"="red", "6"="firebrick4"))&ggtitle("Braak")&theme(legend.position="bottom")
pdf("~/scMultiomics_AD/WNN_diseasestat.pdf", width=6, height=6)
p2
dev.off()
## extra<-read.csv("~/scMultiomics_AD/meta_RIN_PMI.csv")
## extra[16,1]<-"HCTZZT"
## meta<-read.csv("~/scMultiomics_AD/MetaData_wNK.csv")
## rownames(meta)<-meta$X
## md.m<-merge(meta, extra, by.x="id",by.y="ID")
## rownames(md.m)<-md.m$X
## data<-AddMetaData(data, md.m)
##
p1<-FeaturePlot(data,cells=cells, reduction="wnn.umap", features="PMI..h.", pt.size=0.1)&ggtitle("PMI")&scale_color_viridis(option="mako"
)& theme(legend.position="bottom")
p2<-FeaturePlot(data, cells=cells,reduction="wnn.umap", features="RIN", pt.size=0.1)&ggtitle("RIN")&scale_color_viridis(option="rocket"
)& theme(legend.position="bottom")
pdf("~/scMultiomics_AD/WNN_moreStats.pdf", width=6, height=6)
p1
p2
dev.off()
GWAS_snps<-import("~//FUMA_hg38_LD.bed", format="bed")
snp_tmp<-read.csv("~/AD_Wightman/GWAS_AD_Wightman_SNPs_including_linkage_disequilibrium.csv")
seqnames<-paste("chr",sapply(strsplit(snp_tmp$uniqID,":"), `[`, 1),sep="")
start<-sapply(strsplit(snp_tmp$uniqID,":"), `[`, 2)
start<-as.numeric(start)
end<-start+1
df<-data.frame(seqnames=seqnames, start=start, end=end)
snp2<-GRanges(df)
snp2$rs<-snp_tmp$rsID
ch<-import.chain("~/hg19ToHg38.over.chain")
snp38<-liftOver(snp2, ch)
snp38<-unlist(snp38) ## lost 7
snp1_2<-c(GWAS_snps,snp38)
over<-findOverlaps(unique(c2),unique(snp38))
c3<-unique(c2)
c3$olap<-0
c3[queryHits(over),]$olap<-1
## c3<-c3[which(c3$olap==1),]
tmp<-unique(c3)
## tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table( tmp$olap,tmp$Astrocytes),2)
m<-prop.table(table( tmp$olap,tmp$Microglia),2)
e<-prop.table(table( tmp$olap,tmp$Excitatory),2)
i<-prop.table(table( tmp$olap,tmp$Inhibitory),2)
o<-prop.table(table( tmp$olap,tmp$Oligodendrocytes),2)
p<-prop.table(table( tmp$olap,tmp$OPCs),2)
df<-data.frame(ct=c("Ast","Mic","Exc","Inh","Oli","OPC"),count=c(a[2,2],m[2,2],e[2,2],i[2,2],o[2,2],p[2,2]))
a<-table( tmp$olap,tmp$Astrocytes)
m<-table( tmp$olap,tmp$Microglia)
e<-table( tmp$olap,tmp$Excitatory)
i<-table( tmp$olap,tmp$Inhibitory)
o<-table( tmp$olap,tmp$Oligodendrocytes)
p<-table( tmp$olap,tmp$OPCs)
df2<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df2)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df2<-as.data.frame(df2)
df2<-t(df2)
frac<-paste0(df2[,2],"/",(df2[,1]+df2[,2]))
df$frac<-frac
pdf("~/scMultiomics_AD/Peak_olap_ADsnp.pdf")
ggplot(df, aes(y=ct, x=count*100, fill=ct))+geom_bar(stat="identity")+theme_classic()+ylab("")+xlab("% of Linked-Peaks Overlap AD SNPs")+scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))+theme(legend.position="none")+geom_text(aes(label=frac), nudge_x=0.05)+xlim(0,0.5)
dev.off()
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$TSS<-ifelse(strand(gtf)=="+", start(gtf), end(gtf))
TSS.df<-data.frame(gene=gtf$gene_name, TSS=gtf$TSS)
tmp<-merge(c2, TSS.df, by="gene")
tmp$p1<-abs(tmp$start-tmp$TSS)
tmp$p2<-abs(tmp$end-tmp$TSS)
tmp$distLinkedGene<-ifelse(tmp$p1<tmp$p2, tmp$p1, tmp$p2)
pdf("~/scMultiomics_AD/Distance_LinkedTSS_byScore.pdf",width=4,height=4)
ggplot(tmp[which(tmp$score>0.2 & tmp$distLinkedGene<=500000),], aes(x=distLinkedGene, y=abs(score)))+geom_bin_2d(bins=100)+theme_classic()+xlab("Distance to Linked-Gene TSS")+ylab("Abs. Score")+scale_fill_continuous(type = "viridis")
dev.off()
pdf("~/scMultiomics_AD/Distance_LinkedTSS_byScore_boxplot.pdf",width=4,height=4)
ggplot(tmp[which(tmp$score>0.2 & tmp$distLinkedGene<=500000),], aes(y=distLinkedGene, x=bin))+geom_boxplot()+theme_classic()+ylab("Distance to Linked-Gene TSS")+xlab("Abs. Score")
dev.off()
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## size
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$size<-width(gtf)
geneSize<-data.frame(gene=gtf$gene_name, size=gtf$size)
t<-as.data.frame(table(c2$gene))
mer<-merge(t, geneSize, by.x="Var1",by.y="gene")
mer$bin40<-ifelse(mer$Freq>=40, ">=40",
ifelse(mer$Freq<=2, "<=2","other"))
t.test(mer[which(mer$bin40==">=40"),]$size, mer[which(mer$bin40=="<=2"),]$size)
t.test(mer[which(mer$bin40==">=40"),]$size, mer[which(mer$bin40=="other"),]$size)
t.test(mer[which(mer$bin40=="other"),]$size, mer[which(mer$bin40=="<=2"),]$size)
## expression
sumR2<-as.data.frame(sumR)
sumR2$Var1<-rownames(sumR2)
mer2<-merge(t, sumR2, by="Var1")
mer2$bin40<-ifelse(mer2$Freq>=40, ">=40",
ifelse(mer2$Freq<=2, "<=2","other"))
t.test(mer2[which(mer2$bin40==">=40"),]$sumR, mer2[which(mer2$bin40=="<=2"),]$sumR)
t.test(mer2[which(mer2$bin40==">=40"),]$sumR, mer2[which(mer2$bin40=="other"),]$sumR)
t.test(mer2[which(mer2$bin40=="other"),]$sumR, mer2[which(mer2$bin40=="<=2"),]$sumR)
aggregate(sumR~bin40, mer2, quantile)
## avg score
score<-aggregate(abs(score)~gene, c2.2, mean)
mer3<-merge(t, score, by.x="Var1",by.y="gene")
mer3$bin40<-ifelse(mer3$Freq>=40, ">=40",
ifelse(mer3$Freq<=2, "<=2","other"))
t.test(mer3[which(mer3$bin40==">=40"),]$`abs(score)`, mer3[which(mer3$bin40=="<=2"),]$`abs(score)`)
t.test(mer3[which(mer3$bin40==">=40"),]$`abs(score)`, mer3[which(mer3$bin40=="other"),]$`abs(score)`)
t.test(mer3[which(mer3$bin40=="other"),]$`abs(score)`, mer3[which(mer3$bin40=="<=2"),]$`abs(score)`)
## ## ## DON'T INCLUDE OTHER
pdf("~/scMultiomics_AD/Score_Expression_GeneSize_by_LinkPerGene.pdf", width=10, height=6)
p1<-ggplot(mer[which(mer$bin40!="other"),], aes(x=bin40, y=log10(size), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Gene Length)")+ stat_compare_means(method = "anova")+theme(legend.position="none")
p2<-ggplot(mer2[which(mer2$bin40!="other" & mer2$sumR<12622522),], aes(x=bin40, y=log10(sumR), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Total UMIs)")+ stat_compare_means(method = "t.test")+theme(legend.position="none")
p3<-ggplot(mer3[which(mer3$bin40!="other"),], aes(x=bin40, y=`abs(score)`, color=bin40))+geom_boxplot()+theme_classic()+ylab("Avg. Abs. Score")+ stat_compare_means(method = "t.test")+theme(legend.position="none")
p1+p2+p3
dev.off()
mer$bin40<-factor(mer$bin40, levels=c("<=2", "other",">=40"))
mer2$bin40<-factor(mer2$bin40, levels=c("<=2", "other",">=40"))
mer3$bin40<-factor(mer3$bin40, levels=c("<=2", "other",">=40"))
mycomp<-list(c("<=2", ">=40"), c("other",">=40"), c("<=2","other"))
pdf("~/scMultiomics_AD/Score_Expression_GeneSize_by_LinkPerGene.pdf", width=10, height=6)
p1<-ggplot(mer, aes(x=bin40, y=log10(size), color=bin40))+geom_boxplot()+stat_compare_means(comparisons=mycomp,method="t.test", label="p.signif")+theme_classic()+theme(legend.position="none")
p2<-ggplot(mer2[which(mer2$sumR<12622522),], aes(x=bin40, y=log10(sumR), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Total UMIs)")+ stat_compare_means(comparisons=mycomp,method = "t.test",label="p.signif")+theme(legend.position="none")
p3<-ggplot(mer3, aes(x=bin40, y=`abs(score)`, color=bin40))+geom_boxplot()+theme_classic()+ylab("Avg. Abs. Score")+ stat_compare_means(comparisons=mycomp,method = "t.test",label="p.signif")+theme(legend.position="none")
p1+p2+p3
dev.off()
a10.gr<-c2.2[which( c2.2$group !="Ctrl"),]
c10.gr<-c2.2[which( c2.2$group !="AD"),]
t1<-as.data.frame(table(a10.gr$gene))
t2<-as.data.frame(table(c10.gr$gene))
jD<-merge(t1,t2,by="Var1")
colnames(jD)[2:3]<-c("AD","Ctrl")
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
degs_up<-degs[which(degs$cat=="up"),]
degs_down<-degs[which(degs$cat=="down"),]
jD$deg<-ifelse(jD$Var1 %in% degs_up$gene, "up",
ifelse(jD$Var1 %in% degs_down$gene,"down","NS"))
ad_related<-read.csv("~/AD_gwas_genes_012822.csv")
jD$label<-ifelse(jD$Var1 %in% c(ad_related$Gene, "MAPT"), jD$deg, "NS")## only keep degs that are know ad related
jD$size<-as.factor(ifelse(jD$label=="NS", 0, 1))
jD$alpha=as.factor(ifelse(jD$deg=="NS", 0,
ifelse(jD$label=="NS", 0.5,1)))
lmod<-lm(Ctrl~0+AD,jD)
jD$residuals<-lmod$residuals
jD$alpha<-as.factor(ifelse(jD$residuals>quantile(jD$residuals, probs=0.99) | jD$residuals<quantile(jD$residuals,probs=0.01),1,0 ))
jD$stresiduals<-rstandard(lmod)
jD2<-jD[which((jD$stresiduals>3| jD$residuals< -3)& jD$deg !="NS"),]
pdf("~/scMultiomics_AD/Peaks_per_gene_wDEGs_80Perm_StRes3.pdf", width=4, height=4)
ggplot(jD, aes(x=AD, y=Ctrl, color=deg))+geom_point(aes(color=deg, alpha=alpha),size=0.5)+ scale_color_manual(values=c("blue","grey50","red"))+xlab("## of connected Peaks (AD)")+ylab("## of connected Peaks (Ctrl)")+theme_classic()+theme(legend.position="none", axis.text=element_text(size=12), axis.title=element_text(size=14))+geom_text_repel(data=jD[which(jD$alpha==1&jD$deg!="NS"),], aes(x=AD,y=Ctrl,label=Var1), box.padding=1,point.padding=0,label.padding=1,segment.colour="black", size=3, min.segment.length=0, max.overlaps=60)+geom_abline(slope=1.092114,intercept=0)+geom_abline(slope=1.092114, intercept=24.33, linetype="dashed")+geom_abline(slope=1.092114, intercept=-19.9523, linetype="dashed")+scale_alpha_discrete(range=c(0.1,1))
dev.off()
+geom_text_repel(data=jD[which(jD$label!="NS"),], aes(x=AD,y=Ctrl,label=Var1), box.padding=1,point.padding=1,label.padding=1,segment.colour="black", size=4, min.segment.length=0, max.overlaps=20)+scale_size_discrete(range=c(0.15,1.6))+scale_alpha_discrete(range=c(0.2,1))
jD$residuals<-lmod$residuals
jD$fitted.values<-lmod$fitted.values
jD<-jD[order(jD$residuals),]
jD$Rank<-seq(1,nrow(jD))
pdf("~/scMultiomics_AD/Peaks_per_gene_Residuals.pdf")
ggplot(jD, aes(x=Rank, y=residuals, color=deg))+geom_jitter(width=6)+theme_classic()+scale_color_manual(values=c("blue","grey60","red"))+theme(legend.position="none")+facet_wrap(~deg)+geom_text_repel(data=jD[which(jD$residuals>30 | jD$residuals< -30),], aes(x=Rank, y=residuals, label=Var1),box.padding=1,point.padding=0.3,segment.colour="black", size=2, min.segment.length=0, max.overlaps=80)
dev.off()
p<-ggplot(jD, aes(x=AD, y=Ctrl, color=deg))+geom_point(aes(color=deg),size=0.5)+ scale_color_manual(values=c("blue","grey50","red"))+xlab("## of connected Peaks (AD)")+ylab("## of connected Peaks (Ctrl)")+theme_classic()+theme(legend.position="none", axis.text=element_text(size=12), axis.title=element_text(size=14))
saveWidget(as_widget(ggplotly(p)), file="plotly_Links_per_gene.html", selfcontained=T)
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
c2<-GRanges(c2)
over<-findOverlaps(c2, gtf)
c2$geneAnnot<-NA
c3<-c2[queryHits(over)]
c3$geneAnnot<-gtf[subjectHits(over)]$gene_name
c3$gene_body<-c3$gene==c3$geneAnnot
c3<-c3[order(c3$gene_body, decreasing==T),]
c3<-c3[!duplicated(c3$link),]
c4<-c(c2[-queryHits(over)], c3)
c4$gene_body<-ifelse(is.na(c4$gene_body)==T, F, c4$gene_body)
c4$gb_p<-ifelse(c4$gene_body==T | (c4$annotation=="Promoter"& c4$distanceToTSS<1000 ),T,F)
prop.table(table(c4$pos, c4$gb_p),1)
size<-data.frame(size=width(gtf), gene=gtf$gene_name)
c4<-merge(as.data.frame(c4), size, by="gene")
t<-as.data.frame(table(c4$gene, c4$gb_p))
colnames(t)[1:2]<-c("gene","gb_p")
t<-merge(t,size, by="gene")
t$long<-t$size>167585.1 #85th percentile
t$bin40<-ifelse(t$Freq>=40, ">=40",
ifelse(t$Freq<=2, "<=2","other"))
t2<-t[which(t$gb_p==F),]
aggregate(Freq~long, t2, quantile)
t.test(t2[which(t2$bin40==">=40"),]$size, t2[which(t2$bin40=="<=2"),]$size)
t.test(t2[which(t2$long==T),]$Freq, t2[which(t2$long==F),]$Freq)
t2<-as.data.frame(table(c4$gene))
colnames(t2)<-c("gene","total")
t2<-merge(t2,size, by="gene")
t2$long<-t2$size>167585.1
t2$bin40<-ifelse(t2$total>=40, ">=40",
ifelse(t2$total<=2, "<=2","other"))
t.test(t2[which(t2$bin40==">=40"),]$size, t2[which(t2$bin40=="<=2"),]$size)
t.test(t2[which(t2$long==T),]$total, t2[which(t2$long==F),]$total)
including gene body links, long genes have a median of 22 links per gene.
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
c4_degs<-merge(as.data.frame(c4), degs, by="gene")
bin<-c()
for ( i in 1:nrow(c4_degs)){
j<-grepl(c4_degs$celltype[i], c4_degs$CT[i])
bin<-c(bin,j)
}
c4_degs$agree<-bin
t<-table(c4_degs$gene, c4_degs$agree)
table(t[,2])
#only condition specific links
c2_noCommon<-c2_degs[which(c2_degs$group != "common"),]
c2_noCommon$up<-c2_noCommon$avg_log2FC>0
#only agree on CT
c2_sub<-c2_noCommon[which(c2_noCommon$agree==T),]
c2_sub$score_dir<-c2_sub$score>0
noDup<-c2_sub[!duplicated(c2_sub$bin),]
chisq.test(table(c2_sub[which(c2_sub$score_dir==F),]$group, c2_sub[which(c2_sub$score_dir==F),]$up))
chisq.test(table(c2_sub[which(c2_sub$score_dir==T),]$group, c2_sub[which(c2_sub$score_dir==T),]$up))
#how many only have promoter links
length(c2_sub[which(c2_sub$annot=="Promoter"),]$gene)
length(unique(c2_sub[which(c2_sub$annot=="Promoter"),]$gene))
geneP<-unique(c2_sub[which(c2_sub$annot=="Promoter"),]$gene)
#non-promoter DEG links overlap snps
c2_noProm<-c2_sub[which(c2_sub$annot !="Promoter"),]
subs<-SplitObject(data,split.by="predicted.id")
counts<-list()
meta<-list()
for (i in 1:6){
subs[[i]]<-FindMultiModalNeighbors(subs[[i]], reduction.list=list("harmony.rna","harmony.atac"),dims.list=list(1:30,2:50))
subs[[i]]<-FindClusters(subs[[i]], resolution=0.8, graph.name="wknn")
c<-c()
for (j in unique(subs[[i]]$seurat_clusters)){
c<-cbind(c, rowSums(subs[[i]]@assays$CTpeaks@counts[,which(subs[[i]]$seurat_clusters==j & subs[[i]]$Status=="AD")]) )
c<-cbind(c, rowSums(subs[[i]]@assays$CTpeaks@counts[,which(subs[[i]]$seurat_clusters==j & subs[[i]]$Status=="Ctrl")]) )
}
df<-data.frame(Status=rep(c("AD","Ctrl"), length(unique(subs[[i]]$seurat_clusters))), Cluster=rep(unique(subs[[i]]$seurat_clusters), each=2))
rownames(df)<-paste0(df$Status,"-", df$Cluster)
colnames(c)<-paste0(df$Status,"-", df$Cluster)
meta[[i]]<-df
counts[[i]]<-c
}
all<-c()
for (i in 1:6){
dds<-DESeqDataSetFromMatrix(countData=counts[[i]],
colData=meta[[i]], design=~Status)
dds<-DESeq(dds)
res2 <- results(dds, contrast=c("Status","AD","Ctrl"))
res2$celltype<-subs[[i]]$predicted.id[1]
res2$gene<-rownames(res2)
all<-c(all,res2)
}
all<-lapply(all, as.data.frame)
ALL<-rbind(all[[1]],all[[2]],all[[3]],all[[4]],all[[5]],all[[6]])
ALL_sig<-ALL[which(ALL$padj<0.05 & abs(ALL$log2FoldChange)>0.5),]
c2$peak<-paste0(seqnames(c2),"-", start(c2),"-", end(c2))
c2_DAP<-merge(c2, ALL_sig, by.x="peak",by.y="gene", all.x=T)
bin<-c()
for (i in 1:nrow(c2_DAP)){
bin<-c(bin, grepl(c2_DAP$celltype[i], c2_DAP$CT[i]))
}
bin<-ifelse(is.na(bin)==T,F,T)
c2_DAP$isDAP<-bin
actualDAPs<-unique(c2_DAP[which(c2_DAP$isDAP==T),]$peak)
ALL_DAPs<-ALL_sig[which(ALL_sig$gene %in% actualDAPs),]
#NOTE: this will only include DAPs that agree on CT
write.csv(ALL_DAPs, "~/scMultiomics_AD/DAPs_metacell_ADCtrl_DESeq2_CTmatch.csv")
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
c2_dd<-merge(c2_DAP, degs, by="gene", all.x=T)
c2_dd$dd<-c2_dd$celltype.y==c2_dd$celltype.x
length(unique(c2_dd[which(c2_dd$dd==T),]$peak)) #1215 /8588
length(unique(c2_dd[which(c2_dd$dd==T),]$gene)) #447 /911
dd<-c2_dd[which(c2_dd$dd==T),]
447/911 DEGs are linked to a DAP in the same cell type. And vice versa 1215/8588 DAPs 976/1310 DAP-DEG links agree on log2FC direction
gene="PSEN1"
CT="Oligodendrocytes"
tmp.gr<-signac.gr[which(signac.gr$gene==gene),]
tmp.gr<-tmp.gr[which(tmp.gr$Oligodendrocytes==T ),]
min.cutoff<-min(tmp.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(tmp.gr$score)*2
region<-GRanges(paste0(seqnames(tmp.gr)[1],":",min(start(tmp.gr))-4000,"-",max(end(tmp.gr)+4000)))
Idents(subs$Oligodendrocytes)<-subs$Oligodendrocytes$Status
DefaultAssay(subs$Oligodendrocytes)<-"CTpeaks"
cov_plot<-CoveragePlot(object=subs$Oligodendrocytes, region=region, assay="CTpeaks",annotation=F, idents=c("AD","Ctrl"),window=800,peaks=F, links=F)&
scale_fill_manual(values=c("red", "blue"))
#
Links(data)<-tmp.gr ## full
link_plotA <- LinkPlot.height(tmp.gr, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
#
dap_plot<-Bed_PeakPlot(GRanges(c2_DAP[which(c2_DAP$isDAP==T & (c2_DAP$celltype=="Oligodendrocytes") ),]), region)
#
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,link_plotA,dap_plot,gene_plot), heights=c(4,2, 1,2))
pdf(paste0("~/scMultiomics_AD/",gene,"_linkPlot_w_DAP_",CT,".pdf"), width=8, height=5)
p
dev.off()
DefaultAssay(data)<-"RNA"
data<-NormalizeData(data)
rs<-rowSums(data@assays$RNA@data)
q<-quantile(rs, probs=0.6)
avgExp<-as.data.frame(AverageExpression(data, assay="RNA", group.by="predicted.id", features=names(rs[which(rs>q)])))
avgExp<-as.matrix(avgExp)
num<-as.data.frame(apply(avgExp, 1, function(i) sum(i>0.5)))
count<-as.data.frame(table(c2$gene))
num$Var1<-rownames(num)
colnames(num)[1]<-"numCT"
mer<-merge(num, count, by="Var1")
mer$spec<-ifelse(mer$numCT<2, T, F)
t.test(mer[which(mer$spec==T),]$Freq, mer[which(mer$spec==F),]$Freq)
c2.mer<-merge(c2, num, by.x="gene",by.y="Var1")
prop.table(table(c2.mer$numCT, c2.mer$ATAC_num), 2)
####################################
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_celltype_markers_MAST_noFilter.csv")
degs<-degs[which(degs$p_val_adj<0.05 & degs$avg_log2FC>0.2 & degs$cluster !="Endothelial" & degs$cluster!="Pericytes"),]
c2.mer<-merge(c2, degs, by="gene", all=T)
sum(c2.mer$cluster == c2.mer$CT, na.rm=T)
c2.sub<-c2.mer[which(c2.mer$ATAC_num==1 & is.na(c2.mer$cluster)==F & c2.mer$CT !="Pericytes" & c2.mer$CT !="Endothelial"),]
table(c2.sub[which(c2.sub$CT != c2.sub$cluster),]$cluster)
table(c2.sub[which(c2.sub$CT != c2.sub$cluster),]$CT)
c2.sub$mis<-c2.sub$CT == c2.sub$cluster
count$CTdeg<-ifelse(count$Var1 %in% degs$gene, T,F)
aggregate(Freq~CTdeg, count, mean)
bin<-c()
for (i in 1:nrow(c2.mer)){
b<-grepl(c2.mer$cluster[i], c2.mer$CT[i])
bin<-c(bin,b)
}
rs<-as.data.frame(rowSums(data@assays$RNA@counts))
rs$gene<-rownames(rs)
colnames(rs)[1]<-"RNA"
rs$DEG<-rs$gene %in% degs$gene
rs$link<-rs$gene %in% c2$gene
rs2<-rs[which(rs$RNA> quantile(rs$RNA, probs=0.6)),]
fisher.test(table(rs2$DEG, rs2$link))
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")
go1<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Excitatory"),]$gene), dbs)
go2<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Inhibitory"),]$gene), dbs)
go3<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Astrocytes"),]$gene), dbs)
go4<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Microglia"),]$gene), dbs)
go5<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Oligodendrocytes"),]$gene), dbs)
go6<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="OPCs"),]$gene), dbs)
go1<-go1[["GO_Biological_Process_2021"]]
go2<-go2[["GO_Biological_Process_2021"]]
go3<-go3[["GO_Biological_Process_2021"]]
go4<-go4[["GO_Biological_Process_2021"]]
go5<-go5[["GO_Biological_Process_2021"]]
go6<-go6[["GO_Biological_Process_2021"]]
Terms<-unique(c(go1[which(go1$Adjusted.P.value<0.01),][1:3,]$Term, go2[which(go2$Adjusted.P.value<0.01),][1:3,]$Term, go3[which(go3$Adjusted.P.value<0.01),][1,]$Term, go4[which(go4$Adjusted.P.value<0.01),][1:3,]$Term, go5[which(go5$Adjusted.P.value<0.01),][1:3,]$Term, go6[which(go6$Adjusted.P.value<0.01),][1:3,]$Term))
#######################################
go1<-go1[order(go1$Term),]
go2<-go2[order(go2$Term),]
go3<-go3[order(go3$Term),]
go4<-go4[order(go4$Term),]
go5<-go5[order(go5$Term),]
go6<-go6[order(go6$Term),]
go<-cbind(go1[which(go1$Term %in% Terms),]$Adjusted.P.value, go2[which(go2$Term %in% Terms),]$Adjusted.P.value, go3[which(go3$Term %in% Terms),]$Adjusted.P.value, go4[which(go4$Term %in% Terms),]$Adjusted.P.value, go5[which(go5$Term %in% Terms),]$Adjusted.P.value, go6[which(go6$Term %in% Terms),]$Adjusted.P.value)
rownames(go)<-unique(go1[which(go1$Term %in% Terms),]$Term)
go<-as.matrix(go)
go<- -log10(go)
colnames(go)<-c("Exc","Inh","Ast","Mic","Oli","OPC")
ha<-HeatmapAnnotation(celltype=colnames(go), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
ht2=Heatmap(go, cluster_rows=T,show_row_dend=F,cluster_columns=T,col=colorRamp2(c(0,3,6),brewer.pal(n = 3, name = "YlOrRd")), name="-log10(Adj P)", show_column_names=T, show_row_names=T, column_title=NULL,row_names_gp = gpar(fontsize = 10), top_annotation=ha, row_names_side = "left", row_names_max_width=max_text_width(
rownames(go),
gp = gpar(fontsize = 12)
))
pdf("~/scMultiomics_AD/CellType-specific_linked_genes_GO.pdf", width=8, height=5)
ht2
dev.off()
part1<-import("~/Microglia/outs/analysis/feature_linkage/feature_linkage.bedpe", format="bedpe")
a10<-as.data.frame(part1)
a10<-a10[which(a10$NA.2!="peak-peak"),]
{
a10$seqnames<-a10$first.seqnames
start<-as.integer(ifelse(a10$NA.2=="peak-gene", (a10$first.end+a10$first.start)/2, (a10$second.end+a10$second.start)/2))
end<-ifelse(a10$NA.2=="gene-peak",a10$first.start, a10$second.start)
a10$start<-ifelse(start<end, start,end)
a10$end<-ifelse(start<end,end,start)
a10$peak.seqnames<-paste0("chr",ifelse(a10$NA.2=="peak-gene",a10[,1], a10[,6]),sep="")
a10$peak.start<-ifelse(a10$NA.2=="peak-gene",a10[,2], a10[,7])
a10$peak.end<-ifelse(a10$NA.2=="peak-gene",a10[,3], a10[,8])
a<-matrix(unlist(strsplit(a10$name,"><")),ncol=2,byrow=T)
gene<-ifelse(a10$NA.2=="peak-gene",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
peak<-ifelse(a10$NA.2=="gene-peak",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
a10$gene<-gene
a10$peak<-peak
a10$gene.strand<-"*"
a10$peak.strand<-"*"
a10$gene.seqnames<-paste0("chr",ifelse(a10$NA.2=="gene-peak",a10[,1], a10[,6]),sep="")
a10$gene.start<-ifelse(a10$NA.2=="gene-peak",a10[,2], a10[,7])
a10$gene.end<-ifelse(a10$NA.2=="gene-peak",a10[,3], a10[,8])
a10$Peak_pos<-paste0(a10$peak.seqnames,"-",a10$peak.start-1,"-", a10$peak.end)
}# # # format a10 ( ad)
a10<-a10[which(abs(a10$score)>0.2),]
# a10<-a10[,c(26,27,28,22,19,20,21,23,11,12,13,14,15,16,17,18,29)]
annot_peaks<-read.csv("~/scMultiomics_AD/CTpeaks_wK27olap.csv")
annot_peaks$peak<-gsub("X",23, annot_peaks$peak)
annot_peaks$peak<-gsub("Y",24, annot_peaks$peak)
a10_ct<-merge(a10, annot_peaks, by.x="Peak_pos", by.y="peak")
a10_ct<-a10_ct[,c(20,21,22,23,24,13,14,17,18,19,37,38,39,40)]
tmp<-a10_ct
colnames(tmp)[c(8,9,10)]<-c("seqnames","start","end")
colnames(a10_ct)[c(8,9,10)]<-c("signac.seqnames","signac.start","signac.end")
a10.gr<-makeGRangesFromDataFrame(a10_ct, keep.extra.columns = T, seqnames.field="peak.seqnames",start.field="peak.start",end.field="peak.end")
mic.gr<-a10.gr[grepl("Mic", a10.gr$peak_called_in),]
over<-findOverlaps(mic.gr, c2)
c2_o<-c2[subjectHits(over)]
c2_o$o_gene<-mic.gr[queryHits(over)]$gene
c2_o$o_score<-mic.gr[queryHits(over)]$score
c2_o<-c2_o[which(c2_o$o_gene==c2_o$gene),]
mic.gr$peak<-paste0(seqnames(mic.gr),"-",start(mic.gr)-1,"-",end(mic.gr))
mic.gr$link<-paste0(mic.gr$peak,"-", mic.gr$gene)
c2_o$link<-paste0(c2_o$peak,"-", c2_o$gene)
mic.gr$c2<- ifelse((mic.gr$link %in% c2_o$link), T,F)
mic.gr$deg<-mic.gr$gene %in% degs[which(degs$celltype=="Microglia"),]$gene
mic.gr$dap<-mic.gr$peak %in% daps[which(daps$celltype=="Microglia"),]$gene
73 microglia links 35 found in the full link set AD common Ctrl 4 27 4 Links that overlapped full link set had higher scores 7 are to microglia AD/ctrl deg 5 are to microglia ad/ctrl dap 2 links are both deg and dap -one is common and one ad-spec in full data
Nott: rs6733839
signac_form<-as.data.frame(c2)[,-c(1,2,3)]
colnames(signac_form)[c(19,20,21)]<-c("seqnames","start","end")
signac_form$seqnames<-paste0("chr",signac_form$seqnames)
signac_form$score<-ifelse(signac_form$group=="common", (signac_form$score.x+ signac_form$score.y)/2,
ifelse(signac_form$group=="Ctrl",signac_form$score.y, signac_form$score.x))
signac.gr<-GRanges(signac_form)
signac.gr$group2<-ifelse(signac.gr$group=="AD",0,
ifelse(signac.gr$group=="common",1,2))
snps<-read.table("~/mac3_mac1_EPACTS.txt", header=F)
colnames(snps)[1:3]<-c("chr","start","end")
snps$chr<-paste0("chr",snps$chr)
snps.gr<-GRanges(snps)
snps.gr$P<-snps.gr$V5
gene="BIN1"
tmp.gr<-signac.gr[which(signac.gr$gene==gene),]
tmp.gr<-tmp.gr[which(tmp.gr$Microglia==T ),]
min.cutoff<-min(tmp.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(tmp.gr$score)*2
region<-GRanges("chr2:127077002-127145234")
Idents(subs$Microglia)<-subs$Microglia$Status
DefaultAssay(subs$Microglia)<-"CTpeaks"
cov_plot<-CoveragePlot(object=subs$Microglia, region=region, assay="CTpeaks",annotation=F, idents=c("AD","Ctrl"),window=500,peaks=F, links=F)&
scale_fill_manual(values=c("red", "blue"))
#
Links(data)<-tmp.gr ## full
link_plotA <- LinkPlot.height(tmp.gr, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
#
snp<-"rs6733839"
snps.gr$highlight<-ifelse(snps.gr$V4==snp, T, F)
bgp<-Bed_GWASPlot(snps.gr[which(snps.gr$highlight==T),], region)+theme(legend.position="none")
#
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,link_plotA, bgp,gene_plot), heights=c(4,2, 1,2))
pdf(paste0("~/scMultiomics_AD/",gene,"_linkPlot_w_snp.pdf"), width=8, height=8)
p
dev.off()
### create sample lists
args = commandArgs(trailingOnly=TRUE)
i=args[1]
print(i)
set.seed(i)
df<-read.csv("~/scMultiomics_AD/LinkPerm/ALL_cells.csv")
id<-unique(df$Cluster)
ctrl<-c("HCT17HEX","NT1261", "1230", "NT1271", "1238", "3586", "HCTZZT", "1224")
sub<-sample(id, 7)
if (sum(sub %in% ctrl)==8 | sum(!(sub %in% ctrl))==7 ) {
sub<-sample(id,7)
}
df_test<-df[which(df$id %in% sub),]
df_test2<-df[!(df$id %in% sub),]
write.csv(df_test, paste0("~/scMultiomics_AD/LinkPerm/AD.barcodes.",i,".csv"),quote=F, row.names=F )
write.csv(df_test, paste0("~/scMultiomics_AD/LinkPerm/Ctrl.barcodes.",i,".csv"),quote=F, row.names=F )
##Call Links with cellranger arc
##bring perms in and reformat
files=as.list(readLines("~/scMultiomics_AD/LinkPerm/random7/Linkages.txt"))
FT_list=lapply(files,function(x){
FT=import(con=x,format="bedpe")
})
format<-function(x){
a10<-as.data.frame(x)
a10<-a10[which(a10$NA.2!="peak-peak"),]
a10$seqnames<-paste0("chr",ifelse(a10$NA.2=="peak-gene",a10[,1], a10[,6]),sep="")
a10$start<-ifelse(a10$NA.2=="peak-gene",a10[,2], a10[,7])
a10$end<-ifelse(a10$NA.2=="peak-gene",a10[,3], a10[,8])
a<-matrix(unlist(strsplit(a10$name,"><")),ncol=2,byrow=T)
gene<-ifelse(a10$NA.2=="peak-gene",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
peak<-ifelse(a10$NA.2=="gene-peak",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
a10$gene<-gene
a10$peak<-peak
a10$Peak_pos<-paste0(a10$peak.seqnames,"-",a10$peak.start-1,"-", a10$peak.end)
a10<-a10[which(abs(a10$score)>0.2),]
a10.gr<-GRanges(a10_ct)
a10.gr$link<-paste0(a10.gr$gene,"-",a10.gr$index)
return(a10.gr)
}
#format
FL_format<-lapply(FT_list, format)
## distribution of group-specific
ll<-data.frame(i=1, j=1, AD=0,common=0,Ctrl=0)
for (i in 1:50){
for (j in 1:50){
if (i !=j){
AD<-as.data.frame(FL_format[[i]])
Ctrl<-as.data.frame(FL_format[[j]])
comp<-merge(AD,Ctrl, by=c("seqnames","start","end","gene"), all=T)
comp$group<-ifelse(is.na(comp$score.x)==T, "Ctrl",
ifelse(is.na(comp$score.y)==T,"AD","common"))
t<-table(comp$group)
ll.tmp<-data.frame(i=i, j=j, AD=t[1],common=t[2],Ctrl=t[3])
ll<-rbind(ll, ll.tmp)
} }}
ll<-ll[-1,]
ll$propSpec<-(ll[,3]+ll[,5])/(ll[,3]+ll[,4]+ll[,5])
pdf("~/scMultiomics_AD/LinkPerm_propSpec_allCombinations_221118.pdf", width=5,height=4)
ggplot(ll, aes(x=propSpec))+geom_histogram(bins=50)+theme_classic()+geom_vline(xintercept=0.359041, color="red", )+xlab("Proportion group-specific")
dev.off()
## number that are really AD-spec
ALL_0.05<-do.call("c", FL_format)
table(c2[!(c2$link %in% ALL_0.05$link),]$group)
data$bin<-paste0(data$id, "-",data$predicted.id)
bin<-unique(data$bin)
keep<-bin[!grepl("Pericytes", bin)]
keep<-keep[!grepl("Endothelial", keep)]
rna<-c()
atac<-c()
for(i in 1:length(keep)){
rna<-cbind(rna, rowSums(data@assays$RNA@counts[,which(data$bin ==keep[i])]))
atac<-cbind(atac, rowSums(data@assays$CTpeaks@counts[,which(data$bin ==keep[i])]))
}
colnames(rna)<-keep
colnames(atac)<-keep
bulk <- CreateSeuratObject(counts = rna,project="all")
chrom_assay <- CreateChromatinAssay(
counts = atac,
sep = c(":", "-"),
genome = 'hg38',
annotation = Annotation(data)
)
bulk[["ATAC"]] <- chrom_assay
DefaultAssay(bulk)<-"RNA"
bulk<-NormalizeData(bulk)
bulk$pseudobulk<-colnames(bulk)
bulk$id<-sapply(strsplit(bulk$pseudobulk,"-"),`[`,1)
bulk$Status<-ifelse(bulk$id %in% md[which(md$Status=="Ctrl"),]$id, "Ctrl","Ad")
AD<-subset(bulk, Status=="Ad")
Ctrl<-subset(bulk, Status=="Ctrl")
DefaultAssay(AD)<-"ATAC"
DefaultAssay(Ctrl)<-"ATAC"
AD<-RegionStats(AD, genome=BSgenome.Hsapiens.UCSC.hg38)
Ctrl<-RegionStats(Ctrl, genome=BSgenome.Hsapiens.UCSC.hg38)
AD<-LinkPeaks(AD, peak.assay="ATAC", expression.assay="RNA")
Ctrl<-LinkPeaks(Ctrl, peak.assay="ATAC", expression.assay="RNA")
Ctrl.Links<-Links(Ctrl)
AD.Links<-Links(AD)
Cl2<-as.data.frame(Ctrl.Links)
Al2<-as.data.frame(AD.Links)
Cl2<-Cl2[which(abs(Cl2$score)>0.2),]
Al2<-Al2[which(abs(Al2$score)>0.2),]
Cl2$qvalue<-p.adjust(Cl2$pvalue)
Al2$qvalue<-p.adjust(Al2$pvalue)
Cl2<-Cl2[which(Cl2$qvalue<0.05),]
Al2<-Al2[which(Al2$qvalue<0.05),]
Cl2$link<-paste0(Cl2$peak, "-", Cl2$gene)
Al2$link<-paste0(Al2$peak, "-", Al2$gene)
me<-merge(Al2,Cl2, by=c("seqnames","start","end","gene","link"), all=T)
me$group<-ifelse(is.na(me$score.x)==T, "Ctrl", ifelse(is.na(me$score.y)==T, "AD","common"))
c2$link<-paste0(seqnames(c2),"-",start(c2),"-",end(c2),"-",c2$gene)
df<-data.frame(link=c2$link, group=c2$group)
me2<-merge(me, df, by="link", all=T)
me2$group.x<-ifelse(is.na(me2$group.x), "NA",me2$group.x)
me2$group.y<-ifelse(is.na(me2$group.y),"NA", me2$group.y)
t<- table(me2$group.x, me2$group.y)
sum(me$link %in% c2$link)/nrow(me)
(t[1,1]+t[2,2]+t[3,3])/sum(t[-4,-4])
df<-data.frame(PDE10A=AD@assays$RNA@data["PDE10A",], peak=AD@assays$ATAC@data["chr6-165694243-165694700",], ct=AD$ct,id=AD$id)
pdf("~/scMultiomics_AD/Cor_psuedobulk_links_PDE10A.pdf")
ggplot(df, aes(x=PDE10A,y=peak, color=id, shape=ct))+geom_point()
dev.off()
10X
AD common Ctrl NA
AD 4066 15585 1513 36953
pBulk common 1525 15698 1071 10501 Ctrl 403 4482 1746 20136 NA 34837 169281 69698 0https://www-science-org.ezproxy3.lhl.uab.edu/doi/full/10.1126/science.aay0793
neu<-read.table("~/NOTT_hg38_neu_interactome.bed")
colnames(neu)<-neu[1,]
neu<-neu[-1,]
neu.1<-makeGRangesFromDataFrame(neu, keep.extra.columns=T)
peak2<-neu[,-c(1,2,3)]
neu.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")
tmp<-ifelse(c2$signac.start>start(c2) & c2$signac.start<end(c2),"peak-gene","gene-peak")
start<-ifelse(tmp=="peak-gene", c2$signac.end-100, c2$signac.start-100)
end<-ifelse(tmp=="peak-gene", c2$signac.end+1000, c2$signac.start+1000)
genePos<-GRanges(data.frame(seqnames=paste0("chr",c2$signac.seqnames), start=start, end=end))
genePos$gene<-c2$gene
## olap placseq peak1
links<-Pairs(c2, genePos)
plac<-Pairs(neu.1,neu.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene
## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene
##
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)
c2$PLACseq_neun<-ifelse(c2$link %in% link_keep, T,F)
mic.1<-GRanges(mic)
peak2<-mic[,-c(1,2,3)]
mic.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")
plac<-Pairs(mic.1,mic.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene
## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene
##
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)
c2$PLACseq_mic<-ifelse(c2$link %in% link_keep, T,F)
olig<-read.table("~/NOTT_hg38_olig_interactome.bed")
colnames(olig)<-olig[1,]
olig<-olig[-1,]
olig.1<-makeGRangesFromDataFrame(olig, keep.extra.columns=T)
peak2<-olig[,-c(16,17,18)]
olig.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")
plac<-Pairs(olig.1,olig.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene
## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene
##
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)
c2$PLACseq_olig<-ifelse(c2$link %in% link_keep, T,F)
c2$PLACseq<-ifelse(c2$PLACseq_neun==T | c2$PLACseq_mic==T | c2$PLACseq_olig==T,T,F)
mpra<-read.csv("~/lit_search_validation/Full_Cooper_MPRA_sumStats.csv")
mpra$start<-mpra$pos
mpra$end<-mpra$start+1
mpra<-GRanges(mpra)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
mpra<-liftOver(mpra, ch)
mpra<-unlist(mpra)
olap<-findOverlaps(unique(c2), mpra)
c2.2<-c2[queryHits(olap)]
c2.2$mpra_q<-mpra[subjectHits(olap)]$q
c2.2$mpra_sig<-ifelse(c2.2$mpra_q<0.01,1,0) ## their cutoff
mpra$sig<-ifelse(mpra$q<0.01,1,0)
mpra$link<-F
mpra[subjectHits(olap)]$link<-T
t<-table(mpra$sig,mpra$link)
p<-prop.table(t,1)
p[2,2]/p[1,2] ## fold enrichment for active mpra 1.48
chisq.test(t) ## pval 0.022
a=37
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],mpra)
mpra$link<-F
mpra[subjectHits(olap)]$link<-T
t<-table(mpra$sig,mpra$link)
chisq.test(t) ## p-value < 2.2e-16
## 94 peak have overlap
b=31
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
95 percent confidence interval: 1.459384 4.029969 sample estimates: odds ratio 2.417484
crispr<-read.csv("~/lit_search_validation/Cooper_CRISPR_validated.csv")
mp<-as.data.frame(mpra)[,c(1,2,3,7)]
crispr.pos<-merge(crispr,mp,by.y="rsID",by.x="snp") ## get liftover positions from mpra grange
## add crispr-ex
crispr.pos$method<-"CRISPRi"
tmp<-crispr.pos[which(crispr.pos$snp %in% c("rs36026988","rs13025717","rs9271171","rs10224310")),]
tmp<-tmp[which(tmp$cell_type=="Microglia"),]
tmp<-tmp[!duplicated(tmp$snp_target),]
tmp$sig<-T
tmp$method<-"CRIPSR-ex"
crispr.pos<-rbind(crispr.pos,tmp)
cm<-GRanges(crispr.pos[which(crispr.pos$cell_type=="Microglia"),])
cn<-GRanges(crispr.pos[which(crispr.pos$cell_type=="Neuron"),])
olap<-findOverlaps(c2,cm)
c2.mic<-c2[queryHits(olap)]
c2.mic$crispr.gene<-cm[subjectHits(olap)]$gene
c2.mic$crispr.log2FC<-cm[subjectHits(olap)]$log2FC
c2.mic$crispr.sig<-cm[subjectHits(olap)]$sig
c2.mic$crispr.method<-cm[subjectHits(olap)]$method
c2.mic<-c2.mic[which(c2.mic$gene==c2.mic$crispr.gene),]
c2.mic<-c2.mic[which(c2.mic$Microglia==T),]
cn$index<-seq(1,length(cn))
## Doesn't have CLU/ other snp selected ones
toTest<-readRDS("~/22_restart/Links_toTest.rds")
olap<-findOverlaps(c2,cn)
c2.neu<-c2[queryHits(olap)]
c2.neu$crispr.gene<-cn[subjectHits(olap)]$gene
c2.neu$crispr.log2FC<-cn[subjectHits(olap)]$log2FC
c2.neu$crispr.sig<-cn[subjectHits(olap)]$sig
c2.neu<-c2.neu[which(c2.neu$gene==c2.neu$crispr.gene),]
c2.neu<-c2.neu[which(c2.neu$Inhibitory==T | c2.neu$Excitatory==T),]
oops<-subsetByOverlaps(c2.neu,toTest)
oops2<-subsetByOverlaps(toTest,c2.neu)
library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
snps<-SNPlocs.Hsapiens.dbSNP151.GRCh38
loc<-snpsById(snps,crispr$snp, ifnotfound="drop")
loc<-merge(as.data.frame(loc),crispr, by.x="RefSNP_id", by.y="snp")
loc$seqnames<-paste0("chr",loc$seqnames)
loc$start<-loc$pos
loc$end<-loc$start+1
loc<-GRanges(loc)
over<-findOverlaps(c2, loc)
loc<-loc[subjectHits(over)]
loc$linked_gene<-c2[queryHits(over)]$gene
s<-loc[which(loc$gene==loc$linked_gene),]
s[!duplicated(s$snp_target),]
overlaps regions on our test list chr17 45893937-45897413 MAPT T chr17 46224752-46225787 MAPT F chr17 46224752-46225787 KANSL1 T
no CR1 no SPI1 all FNBP4 MS4A4E first linked to STX3 2nd MS4A4E there no RIN3 KNOP1 linkes to CCP110 and GPRC5B GPRC5B snp slightly outside peak no PLEKHM1 all MAPT all BIN1 APOC1 snp linked to CLASRP all KANSL1 c4A onelinked to GPSM3" “HLA-DMA” “HLA-DMB” “HLA-DOA” “HLA-DPA1” “HLA-DPB1”HLA-DQA1" “HLA-DQB1” “HLA-DRA” “HLA-DRB1” “HLA-DRB5” no epha1 all CLU
all<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
all<-GRanges(all)
all$peak<-paste0(start(all),"-",end(all))
NPC<-read.csv("~/lit_search_validation/Weiss_NPC_MPRA.csv")
NPC<-NPC[,4:13]
colnames(NPC)[c(6,7)]<-c("start","end")
NPC<-NPC[is.na(NPC$start)==F,]
NPC<-GRanges(NPC)
olap<-findOverlaps(unique(c2),NPC)
NPC$sig<-NPC$Active...NPC
NPC$link<-F
NPC[subjectHits(olap)]$link<-T
t<-table(NPC$sig,NPC$link)
p<-prop.table(t,1)
p[3,2]/p[1,2] ## fold enrichment for active mpra 2.6
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 95 links overlap active
a=95
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],NPC)
NPC$sig<-NPC$Active...NPC
NPC$link<-F
NPC[subjectHits(olap)]$link<-T
t<-table(NPC$sig,NPC$link)
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 94 peak have overlap
b=94
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
95 percent confidence interval: 1.424516 2.547133 sample estimates: odds ratio 1.905552
ESC<-read.csv("~/lit_search_validation/Weiss_ESC_MPRA.csv")
ESC<-ESC[,4:13]
colnames(ESC)[c(6,7)]<-c("start","end")
ESC<-ESC[is.na(ESC$start)==F,]
ESC<-GRanges(ESC)
olap<-findOverlaps(unique(c2),ESC)
ESC$sig<-ESC$Active...ESC
ESC$link<-F
ESC[subjectHits(olap)]$link<-T
t<-table(ESC$sig,ESC$link)
p<-prop.table(t,1)
p[3,2]/p[1,2] ## fold enrichment for active mpra 1.77
chisq.test(t[-2,]) ## p-value < 5.36e-10
## 95 links overlap active
a=127
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],ESC)
ESC$sig<-ESC$Active...ESC
ESC$link<-F
ESC[subjectHits(olap)]$link<-T
t<-table(ESC$sig,ESC$link)
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 94 peak have overlap
b=101
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
95 percent confidence interval: 1.946489 3.342428 sample estimates: odds ratio 2.547977
uebbing<-read.csv("~/lit_search_validation/Uebbing_NPC_MPRA.csv")
uebbing<-uebbing[,1:20]
uebbing<-GRanges(uebbing)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
uebbing38<-liftOver(uebbing, ch)
uebbing<-unlist(uebbing38) ## lost 7
olap<-findOverlaps(unique(c2),uebbing)
uebbing$sig<-uebbing$Active
uebbing$link<-F
uebbing[subjectHits(olap)]$link<-T
t<-table(uebbing$sig,uebbing$link)
p<-prop.table(t,1)
p[2,2]/p[1,2] ## fold enrichment for active mpra 1.6
chisq.test(t) ## p-value < 2.2e-16
## 95 links overlap active
a=668
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],uebbing)
uebbing$link<-F
uebbing[subjectHits(olap)]$link<-T
t<-table(uebbing$sig,uebbing$link)
chisq.test(t) ## p-value < 2.2e-16
## 94 peak have overlap
b=777
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
95 percent confidence interval: 1.570836 1.938006 sample estimates: odds ratio 1.745004
sure<-read.table("~/SuRE_SNP_table_181029.txt", sep="\t")
colnames(sure)<-sure[1,]
sure<-sure[-1,]
colnames(sure)[3]<-"start"
sure$end<-as.numeric(sure$start)+1
sure<-GRanges(sure)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
sure38<-liftOver(sure, ch)
sure<-unlist(sure38) ## lost 7
## sure$sig<-sure$k562.wilcox.p.value <0.006192715 | sure$hepg2.wilcox.p.value <0.00173121
sure$sig<-sure$k562.wilcox.p.value <0.01
sure.sig<-sure[which(sure$sig==T),]
olap<-findOverlaps(unique(c2),sure.sig)
length(unique(queryHits(olap))) ## 43253
c2.olap<-c2[queryHits(olap)]
c2.olap$snp<-sure.sig[subjectHits(olap)]$SNP_ID
a=11083
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],sure.sig)
length(unique(queryHits(olap)))
b=12195
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 3.132566 3.328924
## sample estimates:
## odds ratio
## 3.229191
sure$sig<-sure$hepg2.wilcox.p.value <0.01
sure.sig<-sure[which(sure$sig==T),]
olap<-findOverlaps(unique(c2),sure.sig)
length(unique(queryHits(olap))) ## 43253
c2.olap<-c2[queryHits(olap)]
c2.olap$snp<-sure.sig[subjectHits(olap)]$SNP_ID
a=6713
olap<-findOverlaps(all[!(all$peak %in% c2$peak)],sure.sig)
length(unique(queryHits(olap)))
b=7399
mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)
## 95 percent confidence interval:
## 1.821630 1.949439
## sample estimates:
## odds ratio
## 1.884453
95 percent confidence interval: 5.825352 6.104481 sample estimates: odds ratio 5.963298
df<-data.frame(source=c("Bryois_eQTL_2022","Cooper_HEK_MPRA_2022","Weiss_NPC_MPRA_2021","Weiss_ESC_MPRA_2021","Uebbing_NPC_MPRA_2020","GTEx_Brain_FC","SuRE_HepG2","SuRE_K562"), OR=c(3.40,2.42,1.91,2.54,1.74,3.53, 1.88,3.23), ymin=c(3.14,1.46,1.43,1.95,1.57,3.0,1.82,3.13), ymax=c(3.69,4.03,2.55,3.34,1.94,4.10,1.95,3.33))
pdf("~/scMultiomics_AD/lit_search_validation_OR.pdf", width=6, height=4)
ggplot(df[!(df$source %in% c("Bryois_eQTL_2022","GTEx_Brain_FC")),])+geom_bar(stat="identity",aes(y=source,x=OR), fill="slategray3")+geom_errorbar(aes(y=source,xmin=ymin,xmax=ymax))+theme_classic()+ylab("")+ggtitle("Enrichment in linked-peaks")
dev.off()
peaks<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
peaks<-GRanges(peaks)
gtf<-import("~/quantseq/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="start_codon"),]
gtf<-gtf[!duplicated(gtf$gene_name),]
rs<-rowSums(data[["RNA"]])
gtf2<-gtf[which(gtf$gene_name %in% names(rs[which(rs>500)])),]
olap<-findOverlaps(peaks, gtf,maxgap=500000)
allPossible<-peaks[queryHits(olap)]
allPossible$gene<-gtf[subjectHits(olap)]$gene_name
allPossible$link<-paste0(seqnames(allPossible),":",start(allPossible),"-",allPossible$gene)
gtex<-read.table("~/lit_search_validation/Brain_Frontal_Cortex_BA9.v8.egenes.txt", sep="\t")
colnames(gtex)<-gtex[1,]
gtex<-gtex[-1,]
colnames(gtex)[15]<-"start"
gtex$end<-as.numeric(gtex$start)+1
gtex_fc<-GRanges(gtex[which(gtex$qval<0.05),])
c2$link2<-paste0(seqnames(c2),":",start(c2),"-",c2$gene)
olap<-findOverlaps(c2,gtex_fc)
c2_sub<-c2[queryHits(olap)]
c2_sub$test_gene<-gtex_fc[subjectHits(olap)]$gene_name
sum(c2_sub$gene==c2_sub$test_gene)
c2_sub$agree<-c2_sub$gene==c2_sub$test_gene
c2_sub<-c2_sub[order(c2_sub$agree, decreasing=T),]
c2_uni<-c2_sub[!duplicated(c2_sub$index),]
prop.table(table(c2_uni$agree)) ## 32%
a=157
ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,gtex_fc)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-gtex_fc[subjectHits(olap)]$gene_name
sum(ap_sub$gene==ap_sub$test_gene)
b=347
mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)
## 95 percent confidence interval:
## 3.295097 4.850273
## sample estimates:
## odds ratio
## 4.004342
qtl<-read.csv("~//lit_search_validation/eQTL_Bryois.csv")
pos<-read.csv("~//lit_search_validation/eQTL_Bryois_snpPos.csv") ## this is 5% FDR filtered
colnames(pos)[c(5,6)]<-c("seqnames","start")
pos$end<-pos$start+1
pos<-GRanges(pos)
seqlevels(pos)<-paste0("chr",seqlevels(pos))
olap<-findOverlaps(c2, pos)
c2_olap<-c2[queryHits(olap)]
c2_olap$snp<-pos[subjectHits(olap)]$SNP
c2_olap$symbol<-pos[subjectHits(olap)]$symbol
c2_olap$eQTL_celltype<-pos[subjectHits(olap)]$cell_type
agree<-c2_olap[which(c2_olap$gene==c2_olap$symbol),]
## how many multi-linked peaks do I lose in that step
c2_olap$agree<-c2_olap$gene==c2_olap$symbol
c2_olap<-c2_olap[order(-c2_olap$agree),]
t<-table(c2_olap$index, c2_olap$agree)
t<-data.frame(False=t[,1],True=t[,2])
nrow(t[which(t$True>0),])/nrow(t) ## of the peaks that overlap eQTLs. 50% are linked to the eQTL gene
library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
qtl<-read.csv("~/eQTL_Bryois.csv")
qtl<-qtl[,c(1,2,4,10)]
qtl<-qtl[grepl("rs",qtl$SNP),]
snps<-SNPlocs.Hsapiens.dbSNP151.GRCh38
loc<-snpsById(snps,qtl$SNP, ifnotfound="drop")
qtl.loc<-merge(qtl,as.data.frame(loc),by.y="RefSNP_id",by.x="SNP")
qtl.loc$seqnames<-paste0("chr",qtl.loc$seqnames)
colnames(qtl.loc)[6]<-"start"
qtl.loc$end<-qtl.loc$start+1
qtl<-GRanges(qtl.loc)
## ## ##
qtl$sig<-ifelse(qtl$adj_p<0.05,T,F)
qtl.sig<-qtl[which(qtl$sig==T),]
qtl.sig$pair<-paste0(qtl.sig$SNP,"-", qtl.sig$symbol)
qtl.sig<-qtl.sig[!duplicated(qtl.sig$pair)]
olap<-findOverlaps(c2,qtl.sig)
c2_sub<-c2[queryHits(olap)]
c2_sub$test_gene<-qtl.sig[subjectHits(olap)]$symbol
c2_sub$pair<-qtl.sig[subjectHits(olap)]$pair
c2_sub$agree<-c2_sub$gene==c2_sub$test_gene
sum(c2_sub$gene==c2_sub$test_gene)
c2_sub<-c2_sub[order(c2_sub$agree, decreasing=T),]
c2_uni<-c2_sub[!duplicated(c2_sub$index),]
prop.table(table(c2_uni$agree)) ## 57%
a=484
ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,qtl.sig)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-qtl.sig[subjectHits(olap)]$symbol
sum(ap_sub$gene==ap_sub$test_gene)
b=673
mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)
## reverse reverse
olap<-findOverlaps(c2,qtl.sig)
keep<-c2_sub[which(c2_sub$gene==c2_sub$test_gene),]$pair
qtl.sig$link<-qtl.sig$pair %in% keep
qtl.sig$olap<-F
qtl.sig[subjectHits(olap)]$olap<-T
t<-table(qtl.sig$olap, qtl.sig$link)
95 percent confidence interval: 1.724113 2.459824 sample estimates: odds ratio 2.064658
qtl<-read.csv("~/scMultiomics_AD/Link_overlap_Bryois_eQTLs.csv")
guides<-read.csv("~/feature_ref3.csv")
c2<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_filt0.02_wPerm_wQval.rds")
guides<-unique(guides$id)
df<-data.frame(seqnames=sapply(strsplit(guides,"_"),`[`,1), start=sapply(strsplit(guides,"_"),`[`,2))
df$start<-as.numeric(df$start)
df$end<-df$start+1
df<-df[complete.cases(df),]
gr<-GRanges(df)
test <-subsetByOverlaps(GRanges(c2), gr)
olap<-findOverlaps(test, gr)
guide<-paste0(seqnames(gr[subjectHits(olap)]),"_",start(gr[subjectHits(olap)]),"_F")
## get guide that is in peak
atest<-test[queryHits(olap)]
atest$guide<-guide
atest$score<-ifelse(atest$group=="AD", atest$score.x, atest$score.y)
atest<-atest[order(abs(atest$score),decreasing=T),]
rna<-read.table("~/HEK/tpms.tsv", sep="\t", header=T)
rna<-rna[,c(1,2,75)] ## nucleus 3' HEK
k27<-import("~/HEK/H3k27ac_HEK.bb") ## it is hg38
over<-findOverlaps(atest, k27)
atest$HEK_k27olap<-"None"
atest[unique(queryHits(over))]$HEK_k27olap<-"Olap"
## didn't set all.x=T bc only lose pseudos (ie AC0..,AL...)
atest2<-merge(as.data.frame(atest),rna, by.x="gene", by.y="Gene.Name")
atest2<-GRanges(atest2)
table(unique(atest2)$HEK_k27olap)
good<-atest2[which(atest2$HEK_k27olap=="Olap" & atest2$nucleus..long.polyA.RNA..NHEK.cell.line>10),]
doubleGood<-good[which(good$link %in% agree$link),]
hic<-read.xlsx("~/Hu_Geschwind_2021_HiC_NeuN.xlsx")
colnames(hic)<-hic[2,]
hic<-hic[-c(1,2),]
colnames(hic)[2:3]<-c("start","end")
hic<-GRanges(hic)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
hic38<-liftOver(hic, ch)
hic38<-unlist(hic38)
c2$link2<-paste0(seqnames(c2),":",start(c2),"-",c2$gene)
neun<-c2[which(c2$Excitatory==T | c2$Inhibitory==T),]
olap<-findOverlaps(neun,hic38)
neun_sub<-neun[queryHits(olap)]
neun_sub$test_gene<-hic38[subjectHits(olap)]$gene
sum(neun_sub$gene==neun_sub$test_gene)
neun_sub$agree<-neun_sub$gene==neun_sub$test_gene
neun_sub<-neun_sub[order(neun_sub$agree, decreasing=T),]
neun_uni<-neun_sub[!duplicated(neun_sub$index),]
prop.table(table(neun_uni$agree)) ## 44%
a=11056
ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,hic38)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-hic38[subjectHits(olap)]$gene
sum(ap_sub$gene==ap_sub$test_gene)
b=45508
mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)
## 95 percent confidence interval:
## 2.345473 2.443410
## sample estimates:
## odds ratio
## 2.3939
df<-data.frame(source=c("Bryois_eQTL_2022","GTEx_Brain_FC","Hu_NEUN_HiC"), OR=c(6.373,4.00,2.39), ymin=c(5.65,3.30,2.35), ymax=c(7.17,4.85,2.44), type=c(1,1,2))
pdf("~/scMultiomics_AD/lit_search_validation_eQTL_HiC.pdf", width=6, height=4)
ggplot(df)+geom_bar(stat="identity",aes(y=source,x=OR, fill=as.factor(type)))+geom_errorbar(aes(y=source,xmin=ymin,xmax=ymax))+theme_classic()+ylab("")+theme(legend.position="none")+ggtitle("Enrichment in Links")
dev.off()
## atest from crispr.rmd
gtex_fc$gene<-gtex_fc$gene_name
gtex_fc$source<-"GTEx"
qtl.sig$gene<-qtl.sig$symbol
qtl.sig$source<-"Bryois"
hic38$source<-"HiC"
ahh<-c(gtex_fc, qtl.sig,hic38)
olap<-findOverlaps(atest, ahh)
atest2<-atest[queryHits(olap)]
atest2$source<-ahh[subjectHits(olap)]$source
atest2$test_gene<-ahh[subjectHits(olap)]$gene
atest2$agree<-atest2$gene==atest2$test_gene
atest2<-atest2[order(atest2$agree, decreasing=T),]
c2$MPRA<-c2$CooperMPRA==T | c2$Weiss_NPC==T | c2$Weiss_ESC==T | c2$Uebbing_MPRA==T | c2$SuRE_HePG2==T | c2$SuRE_K562==T
c2$eQTL<-c2$eQTL_Bryois==T | c2$GTEx_FC==T
df<-as.data.frame(c2)[,c(48,49,50)]
nrow(df[which(df$MPRA == T & df$eQTL == T & df$Geschwin_HiC == T),]) ##1
nrow(df[which(df$MPRA == T & df$eQTL == F & df$Geschwin_HiC == T),]) ##1541
nrow(df[which(df$MPRA == T & df$eQTL == F & df$Geschwin_HiC == F),]) ##58801
nrow(df[which(df$MPRA == T & df$eQTL == T & df$Geschwin_HiC == F),]) ##130
nrow(df[which(df$MPRA == F & df$eQTL == T & df$Geschwin_HiC == T),]) ##9
nrow(df[which(df$MPRA == F & df$eQTL == F & df$Geschwin_HiC == T),]) ##6582
nrow(df[which(df$MPRA == F & df$eQTL == T & df$Geschwin_HiC == F),]) ##477
mor<-read.table("~/Morabito_Sup4.txt")
colnames(mor)<-mor[1,]
mor<-mor[-1,]
mor$seqnames<-sapply(strsplit(mor$cCRE,"-"), `[`,1)
mor$start<-sapply(strsplit(mor$cCRE,"-"), `[`,2)
mor$end<-sapply(strsplit(mor$cCRE,"-"), `[`,3)
mor<-GRanges(mor)
olap<-findOverlaps(c2, mor)
c2.olap<-c2[queryHits(olap)]
c2.olap$morabito_gene<-mor[subjectHits(olap)]$target_gene
c2.olap$morabito_group<-mor[subjectHits(olap)]$group
agree<-c2.olap[which(c2.olap$gene ==c2.olap$morabito_gene),]
prop1<-length(agree[which(agree$CooperMPRA==T | agree$Weiss_NPC==T | agree$Weiss_ESC==T | agree$Uebbing_MPRA==T | agree$SuRE_K562==T | agree$SuRE_HePG2==T | agree$GTEx_FC==T | agree$eQTL_Bryois==T | agree$Geschwin_HiC==T)])
mat<-matrix(data=c(245775,65873,6589,1668),ncol=2)
fisher.test(t(mat))
Cooper MPRA 1 Weiss NPC 9 Weiss ESC 14 Uebbing MPRA 83 SuRE K562 984 SuRE HepG2 567 GTEx FC 6 Byois eQTL 15 HiC 241
## APP
gRNA<-read.xlsx("~/scMultiomics_AD/SingleCellValidation_Luciferase_HEKs_transformed.xlsx")
gRNA<-gRNA[-1,]
gRNA$seqnames<-sapply(strsplit(gRNA$Region,"_"), `[`,1)
gRNA$start<-sapply(strsplit(gRNA$Region,"_"), `[`,2)
gRNA$end<-as.numeric(gRNA$start)+500
gRNA<-GRanges(gRNA)
gRNA.app<-gRNA[grepl("APP", gRNA$Linked.gene),]
app<-subsetByOverlaps(c2[which(c2$gene=="APP"),], gRNA.app)
signac_form<-as.data.frame(app)[,-c(1,2,3)]
colnames(signac_form)[c(19,20,21)]<-c("seqnames","start","end")
signac_form$seqnames<-paste0("chr",signac_form$seqnames)
signac_form$score<-ifelse(signac_form$group=="common", (signac_form$score.x+ signac_form$score.y)/2,
ifelse(signac_form$group=="Ctrl",signac_form$score.y, signac_form$score.x))
signac.gr<-GRanges(signac_form)
signac.gr$group2<-ifelse(signac.gr$group=="AD",0,
ifelse(signac.gr$group=="common",1,2))
ex.gr<-signac.gr
min.cutoff<-min(ex.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(ex.gr$score)*2
Idents(data)<-data$predicted.id
Idents(data)<-factor(Idents(data), levels=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"))
region<-GRanges(paste0(seqnames(ex.gr)[1],":",min(start(ex.gr))-5000,"-",max(end(ex.gr)+5000)))
DefaultAssay(data)<-"CTpeaks"
cov_plot<-CoveragePlot(object=data, region=region, assay="CTpeaks",annotation=F, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),window=5000,peaks=F, links=F)&
scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))
Links(data)<-ex.gr ## full
link_plotA <- LinkPlot.height(data, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
expr_plot <- ExpressionPlot(object = data,features = gene,assay = "RNA", idents=c("Astrocytes","Excitatory","Inhibitory","Microglia", "Oligodendrocytes", "OPCs"))&scale_fill_manual(limits=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)
p<-CombineTracks(plotlist=list(cov_plot,link_plotA, gene_plot), heights=c(6,2,2))
pdf(paste0("~/scMultiomics_AD/APP_linkPlot_olap_Luciferase.pdf"), width=8, height=8)
p
dev.off()
t1<-as.data.frame(table(a10.gr$gene))
t2<-as.data.frame(table(c10.gr$gene))
t<-merge(t1,t2,by="Var1",all=T)
colnames(t)<-c("Var1","AD","Ctrl")
t<-melt(t)
p1<-as.data.frame(table(start(a10.gr)))
p2<-as.data.frame(table(start(c10.gr)))
p<-merge(p1,p2,by="Var1",all=T)
colnames(p)<-c("Var1","AD","Ctrl")
p<-melt(p)
pdf("~/scMultiomics_AD/AD_linksPerGene_patch.pdf", height=5,width=7)
a=ggplot(t, aes(x=value, color=variable,fill=variable))+geom_density(aes(y = ..count..), alpha=0.2, adjust=2)+xlab("Links per Gene")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none")+ylab("counts")
b=ggplot(p, aes(x=value, color=variable,fill=variable))+geom_density( aes(y = ..count..),alpha=0.2, adjust=4)+xlab("Links per Peak")+theme_classic()+
scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
theme(axis.text=element_text(size=8), legend.position="none") +ylab("counts")
b+a
dev.off()
var<-rowVars(data[["RNA"]]@data)
var<-as.data.frame(var)
var$gene<-rownames(data[["RNA"]])
biny<-merge(links,var, by="gene", all.y=T)
biny$score<-ifelse(biny$group=="AD", biny$score.x,biny$score.y)
biny$bin<-ifelse(is.na(biny$CT)==T, "0-0.2",
ifelse(abs(biny$score)<=0.4, "0.2-0.4",
ifelse(abs(biny$score)<=0.6,"0.4-0.6",">0.6")))
## sumR is rowsums >200
biny2<-biny[which(biny$gene %in% names(sumR)),] ## remove genes with no expression
biny2<-biny2[order(abs(biny2$score), decreasing=T),] ## order on score
biny2<-biny2[!duplicated(biny2$gene),]
biny2$bin<-factor(biny2$bin, levels=c("0-0.2","0.2-0.4","0.4-0.6",">0.6"), ordered=T)
pdf("~/22_restart/Gene_var_by_bin.pdf")
ggplot(biny2, aes(x=bin, y=log(var)))+geom_boxplot()+theme_classic()+ylim(-8,3)
dev.off()
Microglia<-subset(data,subset=predicted.id=="Microglia")
var<-rowVars(Microglia[["RNA"]]@data)
var<-as.data.frame(var)
var$gene<-rownames(data[["RNA"]])
links_t<-links[which(links$microglia==T),]
biny<-merge(links_t,var, by="gene", all.y=T)
biny$score<-ifelse(biny$group=="AD", biny$score.x,biny$score.y)
biny$bin<-ifelse(is.na(biny$CT)==T, "0-0.2",
ifelse(abs(biny$score)<=0.4, "0.2-0.4",
ifelse(abs(biny$score)<=0.6,"0.4-0.6",">0.6")))
## sumR is rowsums >200
biny2<-biny[which(biny$gene %in% names(sumR)),] ## remove genes with no expression
biny2<-biny2[order(abs(biny2$score), decreasing=T),] ## order on score
biny2<-biny2[!duplicated(biny2$gene),]
biny2$bin<-factor(biny2$bin, levels=c("0-0.2","0.2-0.4","0.4-0.6",">0.6"), ordered=T)
pdf("~/22_restart/Gene_var_by_bin_mic.pdf")
ggplot(biny2, aes(x=bin, y=log(var)))+geom_boxplot()+theme_classic()+ylim(-8,10)
dev.off()
AD_var<-aggregate(abs(score.x)~gene, as.data.frame(c2), var)
AD_mean<-aggregate(abs(score.x)~gene, as.data.frame(c2), mean)
colnames(AD_mean)[2]<-"AD_mean"
Ctrl_var<-aggregate(abs(score.y)~gene, as.data.frame(c2),var)
Ctrl_mean<-aggregate(abs(score.y)~gene, as.data.frame(c2), mean)
colnames(Ctrl_mean)[2]<-"Ctrl_mean"
both<-merge(AD_var, merge(AD_mean, merge(Ctrl_var,Ctrl_mean, by="gene", all=T), by="gene", all=T),by="gene", all=T)
both[is.na(both)]<-0
colnames(both)[c(2,4)]<-c("AD_var","Ctrl_var")
both<-both[order(both$AD_var),]
pdf("~/22_restart/Links_score_var.pdf")
ggplot(both, aes(x=AD_var,y=Ctrl_var))+geom_point()
dev.off()
ad<-merge(AD_var, AD_mean, by="gene")
ad$group<-"AD"
ctrl<-merge(Ctrl_var, Ctrl_mean, by="gene")
ctrl$group<-"Ctrl"
colnames(ad)[2:3]<-c("var","mean")
colnames(ctrl)[2:3]<-c("var","mean")
both2<-rbind(ad,ctrl)
both2<-both2[is.na(both2$var)==F,]
both2$var<-as.numeric(both2$var)
both2<-both2[order(-both2$mean, -both2$var),]
pdf("~/scMultiomics_AD/Links_score_var_byMean.pdf")
ggplot(both2, aes(x=mean, y=var))+geom_point()+xlab("Mean |score|")+ylab("Var |score|")+theme_classic()+geom_smooth()+geom_text_repel(data=subset(both2, var>0.06 | mean>0.52), aes(x=mean,y=var, label=gene), color="firebrick")+theme(axis.text=element_text(size=10),axis.title=element_text(size=15))
dev.off()