Preprocessing

PMI associated gene list from [https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5511187/#sec-16title]

library(Seurat)
library(sctransform)
library(dplyr)
library(scDblFinder)
library(biomaRt) 
library(scater)
library(SingleCellExperiment)
library(SeuratDisk)
library(Signac)
library(GenomicRanges)
library(rtracklayer)
library(EnsDb.Hsapiens.v86)
library(scales)
library(future) 
plan("multicore", workers = 5)
options(future.globals.maxSize = 1000000 * 1024^2)
library(harmony)
library(biovizBase)



data.data <- Read10X(data.dir = "~/outs/filtered_feature_bc_matrix/")
rna_counts <- data.data$`Gene Expression`
atac_counts <- data.data$Peaks

data <- CreateSeuratObject(counts = rna_counts,project="all")
step<-"Init"
numCells<-nrow(data@meta.data)
###################################################################################
#   Remove PMI genes
PMI<-read.csv("~/PMI_assoc_genes.csv",header=T)
PMI<-PMI[which(PMI$Tissuename=="Cerebral Cortex"),]
threshold<-c()

data<-data[-which(rownames(data) %in% PMI$Gene.Name),]
data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = "^MT-")
a<-signif(quantile(data$nFeature_RNA,probs=0.99)[[1]],digits=1)
threshold<-a
###################################################################################






###################################################################################
#     Add atac assay
#   and get TSS, nucleosome signal, blacklist fraction
grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-"))
grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts)
atac_counts <- atac_counts[as.vector(grange.use), ]
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"

frag.file <- paste0("~/outs/atac_fragments.tsv.gz")
chrom_assay <- CreateChromatinAssay(
  counts = atac_counts,
  sep = c(":", "-"),
  genome = 'hg38',
  fragments = frag.file,
  min.cells = 10,
  annotation = annotations
)
data[["ATAC"]] <- chrom_assay
DefaultAssay(data)<-"ATAC"
data <- NucleosomeSignal(data)
data <- TSSEnrichment(data)
data$blacklist_fraction <- FractionCountsInRegion(object = data, assay = 'ATAC',regions = blacklist_hg38)
###################################################################################


###################################################################################
sam_BEB18034<-read.csv("~/sample2_barcodes_BEB18034.csv") 
sam_BEB18034$x<-gsub("2","1",sam_BEB18034$x)
sam_BNT1261<-read.csv("~/sample2_barcodes_BNT1261.csv")
sam_BNT1261$x<-gsub("2","1",sam_BNT1261$x)
sam_BEB19157<-read.csv("~/sample3_barcodes_BEB19157.csv")
sam_BEB19157$x<-gsub("3","2",sam_BEB19157$x)
sam_1230<-read.csv("~/sample3_barcodes_1230.csv")
sam_1230$x<-gsub("3","2",sam_1230$x)
sam_3329<-read.csv("~/sample4_barcodes_3329.csv")
sam_3329$x<-gsub("4","3",sam_3329$x)
sam_HBQS<-read.csv("~/sample4_barcodes_HBQS.csv")
sam_HBQS$x<-gsub("4","3",sam_HBQS$x)
sam_BEB18062<-read.csv("~/sample5_barcodes_BEB18062.csv")
sam_BEB18062$x<-gsub("5","4",sam_BEB18062$x)
sam_NT1271<-read.csv("~/sample5_barcodes_NT1271.csv")
sam_NT1271$x<-gsub("5","4",sam_NT1271$x)
sam_4313<-read.csv("~/sample9_barcodes_4313.csv")
sam_4313$x<-gsub("9","5",sam_4313$x)
sam_4482<-read.csv("~/sample9_barcodes_4482.csv")
sam_4482$x<-gsub("9","5",sam_4482$x)
sam_HCT17HEX<-read.csv("~/sample10_barcodes_HCT17HEX.csv")
sam_HCT17HEX$x<-gsub("10","6",sam_HCT17HEX$x)
sam_HCTZZT<-read.csv("~/sample10_barcodes_HCTZZT.csv")
sam_HCTZZT$x<-gsub("10","6",sam_HCTZZT$x)
sam_4305<-read.csv("~/sample11_barcodes_4305.csv")
sam_4305$x<-gsub("11","7",sam_4305$x)
sam_4443<-read.csv("~/sample11_barcodes_4443.csv")
sam_4443$x<-gsub("11","7",sam_4443$x)
###################################################################################


###################################################################################
data$id_B<-ifelse(colnames(data) %in% sam_BEB18034$x, "BEB18034",
                ifelse(colnames(data) %in% sam_BNT1261$x, "NT1261",
                       ifelse(colnames(data) %in% sam_BEB19157$x,"BEB19157",           
                              ifelse(colnames(data) %in% sam_1230$x, "1230",
                                     ifelse(colnames(data) %in% sam_3329$x, "3329",
                                            ifelse(colnames(data) %in% sam_HBQS$x,    "HBQS",                 
                                                   ifelse(colnames(data) %in% sam_BEB18062$x,"BEB18062",                     
                                                          ifelse(colnames(data) %in% sam_NT1271$x,  "NT1271",                   
                                                                 ifelse(colnames(data) %in% sam_4313$x,    "4313",                 
                                                                        ifelse(colnames(data) %in% sam_4482$x,    "4482",                 
                                                                               ifelse(colnames(data) %in% sam_HCT17HEX$x,"HCT17HEX",                     
                                                                                      ifelse(colnames(data) %in% sam_HCTZZT$x,  "HCTZZT",                   
                                                                                             ifelse(colnames(data) %in% sam_4305$x, "4305",
                                                                                                    ifelse(colnames(data) %in% sam_4443$x,    "4443", "Other"))))))))))))))       




data$id<-ifelse(grepl("-8$|-9$",rownames(data@meta.data)), "4674",
                ifelse(grepl("-10$|-11$",rownames(data@meta.data)),"1238",
                       ifelse(grepl("-12$|-13$",rownames(data@meta.data)),"4627",
                              ifelse(grepl("-14$|-15$",rownames(data@meta.data)),"1224",
                                     ifelse(grepl("-16$|-17$",rownames(data@meta.data)),"3586",
                                            ifelse(grepl("-18$|-19$",rownames(data@meta.data)),"4481",
                                                   ifelse(grepl("-20$|-21$",rownames(data@meta.data)),"5640",
                                                          ifelse(grepl("-22$|-23$", rownames(data@meta.data)),"4064", data$id_B))))))))

md<-data@meta.data
md$cell<-rownames(md)
meta<-read.csv("~/Meta.csv")
colnames(meta)[1]<-"id"

tmp<-merge(md, meta, by="id")
rownames(tmp)<-tmp$cell
tmp[,1:15]<-NULL
data<-AddMetaData(data, tmp)

data<-subset(data, id=="Other", invert=T) #removed those called as doublet/unassigned
###################################################################################




###################################################################################
pdf("~/VlnPlot_init.pdf", width=14, height=8)
VlnPlot(data, features =  c("nCount_ATAC", "nFeature_RNA","nCount_RNA","percent.mt","blacklist_fraction",'TSS.enrichment','nucleosome_signal'), ncol = 4, pt.size = 0, group.by="id") + NoLegend() &theme(text=element_text(size=8, angle=20), axis.text.x=element_text(size=7))    
dev.off()
###################################################################################

Filter, Norm, Scale, Dim Reduc

###################################################################################
#                     Filtering, norm, scale, dim reduc
data<-subset(data, subset = nFeature_RNA < 10000 &   
               nFeature_RNA > 200 &
               nucleosome_signal < 2 &
               percent.mt < 5 & 
               TSS.enrichment > 2)

data<-SCTransform(data, vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>% FindNeighbors(dims = 1:30) %>% 
  RunUMAP(dims = 1:30, reduction.name="umap.rna", reduction.key = "rnaUMAP_") 
DefaultAssay(data)<-"ATAC"
data<-RunTFIDF(data) %>% FindTopFeatures(min.cutoff='q0')%>% RunSVD()%>% 
  RunUMAP(reduction='lsi',dims=2:50,reduction.name="umap.atac",reduction.key="atacUMAP_") 
step<-c(step,"filter")
numCells<-c(numCells, nrow(data@meta.data))
###################################################################################
###################################################################################
pdf("~/Init_Umap.pdf")
p1<-DimPlot(data, reduction="umap.rna")
p2<-DimPlot(data, reduction="umap.atac")
p1+p2
dev.off()
###################################################################################

doublet filter

###################################################################################
#                     Doublet
DefaultAssay(data)<-"SCT"
sce<-as.SingleCellExperiment(data)
dbl<-computeDoubletDensity(sce)
data$isDbl<-ifelse(dbl<3.5,1,0)
data$dbl<-dbl
###################################################################################
###################################################################################
pdf("~/Doublet.pdf")
p1<-DimPlot( data,group.by="isDbl",reduction="umap.rna")
p2<-FeaturePlot(data, features="dbl",reduction="umap.rna", max.cutoff=10)
p1+p2
dev.off()
###################################################################################
#      1224       1230       1238       3329       3586       4064
# 0 0.04988328 0.01993355 0.02109426 0.03671148 0.03088433 0.02903396
# 1 0.95011672 0.98006645 0.97890574 0.96328852 0.96911567 0.97096604
# 
#      4305       4313       4443       4481       4482       4627
# 0 0.04025578 0.01385042 0.02153655 0.06106747 0.03017145 0.05621544
# 1 0.95974422 0.98614958 0.97846345 0.93893253 0.96982855 0.94378456
# 
#      4674       5640   BEB18034   BEB18062   BEB19157       HBQS
# 0 0.03709199 0.07242729 0.04557921 0.03784254 0.03742250 0.02955741
# 1 0.96290801 0.92757271 0.95442079 0.96215746 0.96257750 0.97044259
# 
#    HCT17HEX     HCTZZT     NT1261     NT1271
# 0 0.05676761 0.04646697 0.04438848 0.03055833
# 1 0.94323239 0.95353303 0.95561152 0.96944167



data<-subset(data, subset= isDbl==1)
###################################################################################




###################################################################################
#                         Subset to Protein coding genes & recluster
PC_genes<-read.csv("~/PC_genes.csv",header=T)

tmp_assay<-subset(data[["RNA"]],features =PC_genes$x)
Key(tmp_assay)<-"pc_"
data[["PC"]]<-tmp_assay
DefaultAssay(data)<-"PC"
data <- SCTransform(data,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>% 
  FindNeighbors(dims = 1:30) %>% RunUMAP(dims = 1:30, reduction.name="umap.rna",reduction.key = "rnaUMAP_")
data<-FindMultiModalNeighbors(data, reduction.list=list("pca","lsi"), dims.list=list(1:30,2:50))
data<-RunUMAP(data,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
data<-FindClusters(data, graph.name="wsnn", algorithm=3)

###################################################################################
pdf("~/rna_atac.umap.RECLUSTER.pdf", width=14, height=8)
p1=DimPlot(data, reduction = "umap.rna") + ggtitle("RNA")+theme(legend.position="none")
p2=DimPlot(data, reduction = "umap.atac") + ggtitle("ATAC")+theme(legend.position="none")
p3=DimPlot(data, reduction="wnn.umap")+ ggtitle("WNN")
p1+p2+p3
dev.off()
###################################################################################

Reference Mapping

###################################################################################
#                       Map to Ref
Map<-readRDS("~/Mathys_DLPFC_SCT_normalized.rds")

anchors<-FindTransferAnchors(reference=Map, query=data, dims=1:30, normalization.method="SCT", 
                             query.assay="SCT")
predictions<-TransferData(anchorset=anchors, reference=Map, query=data,
                          refdata=Map$celltype, dims=1:30)
data$predicted.id<-predictions$predicted.id
data$predicted.id.score<-predictions$predicted.id.score
###################################################################################
pdf("~/predicted.id.pdf",width=12,height=7)
p1<-DimPlot(data, group.by="predicted.id", reduction="wnn.umap")
p2<-FeaturePlot(data, features="predicted.id.score", reduction="wnn.umap",cols=c("coral1","deepskyblue3"))
p1+p2
dev.off()
###################################################################################

data2<-subset(data, subset=predicted.id.score>0.95)
step<-c(step,"predicted.id")
numCells<-c(numCells, nrow(data2@meta.data))

Harmony batch correction

###################################################################################
###################################################################################

#with all
data2<-subset(data, subset=predicted.id.score>0.95)

integrated<-RunHarmony(data2, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="harmony.rna", theta=1)
integrated<-RunUMAP(integrated,reduction="harmony.rna",dims=1:40,reduction.name="harmony.rna.umap")
integrated<-FindNeighbors(integrated,dims=1:30, reduction="harmony.rna")
integrated<-FindClusters(integrated, graph.name="SCT_nn", resolution=2.8)

pdf("~/WNN_Harmony_plot_CT.pdf", width=15, height=7)
p1<-DimPlot(integrated, group.by="predicted.id", reduction="harmony.rna.umap")
p2<-DimPlot(integrated, group.by="id", reduction="harmony.rna.umap")
p3<-DimPlot(integrated, reduction="harmony.rna.umap", label=T)
p1+p2+p3
dev.off()
pdf("~/WNN_Harmony_plot_batch.pdf", width=15, height=7)
DimPlot(integrated, group.by="predicted.id", reduction="harmony.rna.umap")
dev.off()

###################################################################################

#filter out cells that are not assigned the same cell type as their neighbors. Additional doublet precaution 
Idents(int)<-int$seurat_clusters
tab<-table(int$seurat_clusters, int$predicted.id)
tmp<-colnames(tab)[apply(tab,1,which.max)]
names(tmp)<-levels(int)
int<-RenameIdents(int,tmp)
int$cluster_celltype<-Idents(int)
int$agree<-ifelse(int$cluster_celltype==int$predicted.id, 1,0)

###################################################################################



integrated_sub2<-subset(int, subset=agree==1) 

integrated_sub2 <- SCTransform(integrated_sub2,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>% RunPCA(ndims=30) %>% 
  FindNeighbors(dims = 1:30) %>% RunUMAP(dims = 1:30, reduction.name="umap.rna",reduction.key = "rnaUMAP_") 

integrated_sub2<-RunHarmony(integrated_sub2, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="harmony.rna")
integrated_sub2<-RunUMAP(integrated_sub2,reduction="harmony.rna",dims=1:30,reduction.name="harmony.rna.umap")


DefaultAssay(integrated_sub2)<-"ATAC"
integrated_sub2<-RunTFIDF(integrated_sub2) %>% FindTopFeatures(min.cutoff='q0')%>% RunSVD()
integrated_sub2<-ScaleData(integrated_sub2, rownames(integrated_sub2[["ATAC"]]))
integrated_sub2<-RunHarmony(integrated_sub2, group.by.vars="id", reduction="lsi",assay.use="ATAC",reduction.save="harmony.atac", project.dim=F)
integrated_sub2<-RunUMAP(integrated_sub2,reduction="harmony.atac",dims=1:30,reduction.name="harmony.atac.umap")

integrated_sub2<-FindMultiModalNeighbors(integrated_sub2, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
integrated_sub2<-RunUMAP(integrated_sub2,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
data<-integrated_sub2

###################################################################################

Peak-calling

###################################################################################
DefaultAssay(data)<-"ATAC"
CTpeaks<-CallPeaks(data, macs2.path="~/miniconda3/envs/env1/bin/macs2", group.by="predicted.id")

chr<-c()
for (i in 1:22){
  chr<-c(chr, paste0("chr",i))
}
chr<-c(chr,"chrX")
chr<-c(chr,"chrY")
CTpeaks2<-CTpeaks[seqnames(CTpeaks) %in% chr]


df <- as.data.frame(CTpeaks)
df<-df[which(df$seqnames %in% chr),]
#bed file for reanalyze [sort -k1,1 -k2,2n CT_peak_set.bed]
write.table(df[,1:3], file="~/CT_peak_set.bed", quote=F, sep="\t", row.names=F, col.names=F)
CTpeaks2<-makeGRangesFromDataFrame(df)
macs2_counts<-FeatureMatrix(fragments=Fragments(data), features=CTpeaks2,cells=colnames(data))
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotation) <- "UCSC"
data[["CTpeaks"]]<-CreateChromatinAssay(counts=macs2_counts,  fragments=Fragments(data))
#get peak list labeled by celltype
write.csv(df,"~/22_restart/CTpeaks_annotated.csv")



saveRDS(data, "~/final.rds")

# get AD and Ctrl barcodes
df$status<-data$Status
AD<-df[which(df$status=="AD"),1:2]
Ctrl<-df[which(df$status=="Ctrl"),1:2]
write.csv(AD, "/barcodes_AD.csv", row.names=F, quote=F)
write.csv(Ctrl, "~/barcodes_Ctrl.csv", row.names=F, quote=F)

Peak-annot: encode

load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
u2$index<-seq(1,nrow(u2))
ol_enc=findOverlaps(GRanges(u2),enc)


u2$Astrocytes      <-grepl("Astrocytes",u2$peak_called_in)
u2$Microglia       <-grepl("Microglia",       u2$peak_called_in)
u2$Inhibitory      <-grepl("Inhibitory",      u2$peak_called_in)
u2$Excitatory      <-grepl("Excitatory",      u2$peak_called_in)
u2$Inhibitory      <-grepl("Inhibitory",      u2$peak_called_in)
u2$Oligodendrocytes<-grepl("Oligodendrocytes",u2$peak_called_in)
u2$OPCs             <-grepl("OPCs",            u2$peak_called_in)
u2$Pericytes        <-grepl("Pericytes",       u2$peak_called_in)
u2$Endothelial      <-grepl("Endothelial",      u2$peak_called_in)

c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-as.character(enc[subjectHits(ol_enc),]$encodeLabel)
c2_enc<-c2_enc[!duplicated(c2_enc$index),]
d2<-merge(c2_enc, u2, by="index", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)


# grouped together
tmp<-d2
# tmp$numCT<-str_count(tmp$CT.x,",")
# tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)


pdf("~/scMultiomics_AD/Peaks_annot_encode2.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_brewer(palette="Dark2",labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()

H3K27ac overlap

H3K27ac from Nott et al. 2019 and Kolzlenkov et al. 2018

peaks<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
peaks<-GRanges(peaks)



load(file="~/Nott_k27ac.rda")
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
Nott_k27ac_38<-liftOver(Nott_k27ac, ch)
Nott_k27ac_38<-unlist(Nott_k27ac_38)  # lost 1,700
load("~/Dracheva_K27ac.rda")
all.k27<-c(Nott_k27ac_38, K27List$GABA[,1], K27List$GLU[,1])
over<-findOverlaps(peaks, all.k27)

all_olap<-peaks[queryHits(over)]  # 189833
mcols(all_olap)$k27_ct<-all.k27[subjectHits(over)]$cell_type

all_olap$sub_ct<-gsub("GLU","Excitatory",all_olap$k27_ct)
all_olap$sub_ct<-gsub("GABA","Inhibitory",all_olap$sub_ct)

ct<-strsplit(all_olap$peak_called_in, ",")
a<-c()
for (i in 1:length(ct)){
 
  a<-c(a, all_olap$sub_ct[i] %in% ct[[i]])
  }


neuron<-all_olap[which(all_olap$sub_ct=="Neuron"),]
neuron1<-neuron[grepl("Excitatory", neuron$peak_called_in) | grepl("Inhibitory", neuron$peak_called_in),]

all_agree<-all_olap[a,]
olap_k27<-c(neuron1,all_agree)
olap_k27$peak<-paste(seqnames(olap_k27), "-", start(olap_k27),"-", end(olap_k27), sep="")

a<-as.data.frame(olap_k27) %>% group_by(peak) %>% summarize(k27= paste(sort(unique(k27_ct)),collapse="&"))

peaks$peak<-paste(seqnames(peaks), "-", start(peaks),"-", end(peaks), sep="")

tmp<-merge(peaks, a, by="peak", all.x=T)

tmp$ATAC_num<-str_count(tmp$peak_called_in, ",")+1
tmp$k27_num<-str_count(tmp$k27, "&")+1



k27_over<-findOverlaps(all.k27, all_agree) # 247359  # 398596
peaks_over<-findOverlaps(peaks, all_agree) # 125746   # 386892




write.csv(tmp, "~/scMultiomics_AD/CTpeaks_wK27olap.csv")

Cell type-spec

If peak was called in a cell type, does it overlap H3K27ac of that cell type? For peaks that were called in multiple cell types, they may overlap H3K27ac of only one cell type. (ie. peak called in microglia and astrocytes but only overlaps microglia H3K27ac)

tmp<-read.csv("~/scMultiomics_AD/CTpeaks_wK27olap.csv")
olap<-GRanges(tmp)


Astrocytes<-olap[grepl("Astrocytes",olap$peak_called_in),]
k27_Astrocytes<-all.k27[which(all.k27$cell_type=="Astrocytes"),]
over<-findOverlaps(Astrocytes, k27_Astrocytes)
k27_Astrocytes$olap<-FALSE
k27_Astrocytes[subjectHits(over)]$olap<-TRUE
Astrocytes$found<-FALSE
Astrocytes[unique(queryHits(over))]$found<-T
Atab<-as.data.frame(table(Astrocytes$found))
Atab$ct<-"Astrocytes"
Atab$cat<-"ATAC"
Atab2<-as.data.frame(table(k27_Astrocytes$olap))
Atab2$ct<-"Astrocytes"
Atab2$cat<-"k27"
Atab<-rbind(Atab, Atab2)

Excitatory<-olap[grepl("Excitatory",olap$peak_called_in),]
k27_Excitatory<-all.k27[which(all.k27$cell_type %in% c("Neuron","GLU")),]
over<-findOverlaps(Excitatory, k27_Excitatory)
Excitatory$found<-F
Excitatory[unique(queryHits(over))]$found<-T
Etab<-as.data.frame(table(Excitatory$found))
Etab$ct<-"Excitatory"
Etab$cat<-"ATAC"
k27_Excitatory$olap<-FALSE
k27_Excitatory[subjectHits(over)]$olap<-TRUE
Etab2<-as.data.frame(table(k27_Excitatory$olap))
Etab2$ct<-"Excitatory"
Etab2$cat<-"k27"
Etab<-rbind(Etab, Etab2)


Inhibitory<-olap[grepl("Inhibitory",olap$peak_called_in),]
k27_Inhibitory<-reduce(all.k27[which(all.k27$cell_type %in% c("Neuron","GABA")),])
over<-findOverlaps(Inhibitory, k27_Inhibitory)
k27_Inhibitory$olap<-FALSE
k27_Inhibitory[subjectHits(over)]$olap<-TRUE
Inhibitory$found<-FALSE
Inhibitory[unique(queryHits(over))]$found<-TRUE
Itab<-as.data.frame(table( Inhibitory$found))
Itab$ct<-"Inhibitory"
Itab$cat<-"ATAC"
Itab2<-as.data.frame(table(k27_Inhibitory$olap))
Itab2$ct<-"Inhibitory"
Itab2$cat<-"k27"
Itab<-rbind(Itab, Itab2)


Oligodendrocytes<-olap[grepl("Oligodendrocytes",olap$peak_called_in),]
k27_Oligodendrocytes<-all.k27[which(all.k27$cell_type=="Oligodendrocytes"),]
over<-findOverlaps(Oligodendrocytes, k27_Oligodendrocytes)
Oligodendrocytes$found<-F
Oligodendrocytes[unique(queryHits(over))]$found<-T
Otab<-as.data.frame(table( Oligodendrocytes$found))
Otab$ct<-"Oligodendrocytes"
Otab$cat<-"ATAC"
k27_Oligodendrocytes$olap<-FALSE
k27_Oligodendrocytes[subjectHits(over)]$olap<-TRUE
Otab2<-as.data.frame(table(k27_Oligodendrocytes$olap))
Otab2$ct<-"Oligodendrocytes"
Otab2$cat<-"k27"
Otab<-rbind(Otab, Otab2)


Microglia<-olap[grepl("Microglia",olap$peak_called_in),]
k27_Microglia<-all.k27[which(all.k27$cell_type=="Microglia"),]
over<-findOverlaps(Microglia, k27_Microglia)
Microglia$found<-F
Microglia[unique(queryHits(over))]$found<-T
Mtab<-as.data.frame(table( Microglia$found))
Mtab$ct<-"Microglia"
Mtab$cat<-"ATAC"
k27_Microglia$olap<-FALSE
k27_Microglia[subjectHits(over)]$olap<-TRUE
Mtab2<-as.data.frame(table(k27_Microglia$olap))
Mtab2$ct<-"Microglia"
Mtab2$cat<-"k27"
Mtab<-rbind(Mtab, Mtab2)


df<-rbind(Atab, Etab, Itab, Otab, Mtab)




df$color<-paste0(df$ct,"-",df$Var1)
df$color<-paste0(df$cat,"-",df$Var1)
df$celltype<-substr(df$ct,start=1,stop=3)
df$color<-ifelse(df$Var1==F, "AA No Overlap",df$color)

pdf("~/scMultiomics_AD/CTpeaks_olapk27_barPlot2_filtered0.02_XY.pdf")
ggplot(df, aes(y=Freq, x=cat,  fill=color))+geom_bar(color="grey60",stat="identity")+theme_classic()+ylab("#  of Peaks") + xlab("")+scale_x_discrete(labels=c("ATAC", "H3K27ac"))+
  theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17, angle=90), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15, angle=90))+scale_y_continuous(position = "right")+
  facet_wrap(~celltype ,  ncol=5, strip.position="bottom") +scale_fill_manual(limits=c("AA No Overlap","ATAC-TRUE","k27-TRUE"),labels=c( "No Overlap","Overlap H3K27ac",  "Overlap ATAC")
,values=c(alpha("grey80",0.5),alpha("darkslategray3",0.5),"darkslategray3"))+xlab("")
dev.off()




per<-list()
j=1
for (i in seq(2,nrow(df),by=2)){
  per[[j]]<-df[i,2]/(df[i,2]+df[i-1,2])
  j=j+1
}

S2: STATS

num<-t(t(table(data$id)))
colnames(num)<-"Total"
ct<-t(t(table(data$id, data$predicted.id)))
r1<-aggregate(nCount_RNA~id, data@meta.data, mean)
r2<-aggregate(nFeature_RNA~id, data@meta.data, mean)
t1<-aggregate(percent.mt~id, data@meta.data, mean)
a1<-aggregate(nCount_ATAC~id, data@meta.data, mean)
a2<-aggregate(nFeature_ATAC~id, data@meta.data, mean)
t2<-aggregate(TSS.enrichment~id, data@meta.data, mean)

c<-cbind(r1,r2,t1,a1,a2,t2)
c<-c[,c(1,2,4,6,8,10,12)]

c$Total<-num
c$Astrocytes      <-ct[,1]
c$Endothelial     <-ct[,2]
c$Excitatory      <-ct[,3]
c$Inhibitory      <-ct[,4]
c$Microglia       <-ct[,5]
c$Oligodendrocytes<-ct[,6]
c$OPCs            <-ct[,7]
c$Pericytes       <-ct[,8]


meta<-read.csv("~/22_restart/meta_RIN_PMI.csv")
meta$ID[16]<-"HCTZZT"
meta2<-merge(meta, c, by.x="ID",by.y="id")

write.csv(meta2, "~/scMultiomics_AD/Sample_stats.csv", quote=F, row.names=F)

DEGs

extra<-read.csv("~/meta_RIN_PMI.csv")
extra[16,1]<-"HCTZZT"
meta<-data@meta.data
meta$X<-rownames(meta)
meta<-meta[,-which(names(meta) %in% c("Age","Braak"))]
md.m<-merge(meta, extra, by.x="id",by.y="ID")
rownames(md.m)<-md.m$X

data<-AddMetaData(data, md.m)

Fig2:AD/Ctrl

DefaultAssay(data)<-"PC"
all.genes<-rownames(data)
data<-NormalizeData(data) 

Astrocytes<-subset(data, subset=predicted.id=="Astrocytes")
Inhibitory<-subset(data, subset=predicted.id=="Inhibitory")
Excitatory<-subset(data, subset=predicted.id=="Excitatory")
Oligodendrocytes<-subset(data, subset=predicted.id=="Oligodendrocytes")
OPCs<-subset(data, subset=predicted.id=="OPCs")
Microglia<-subset(data, subset=predicted.id=="Microglia")

subs<-list(Astrocytes,Inhibitory,Excitatory,Oligodendrocytes,OPCs,Microglia)

library(MAST)

i=1
Idents(subs[[i]])<-subs[[i]]$Status
subs[[i]]<-NormalizeData(subs[[i]])
ALL<-FindMarkers(subs[[i]], ident.1="AD",ident.2="Ctrl",min.pct=0.25,latent.vars=c("Age","Sex"),test.use="MAST", assay="PC") 
ALL$celltype<-subs[[i]]$predicted.id[1]
ALL$gene<-rownames(ALL)
for (i in 2:length(subs)){
    Idents(subs[[i]])<-subs[[i]]$Status
    subs[[i]]<-NormalizeData(subs[[i]])%>% ScaleData(features=row.names(data))
    markers<-FindMarkers(subs[[i]], ident.1="AD",ident.2="Ctrl",min.pct=0.25, latent.vars=c("Age","Sex"), test.use="MAST",
                         assay="PC") 
    markers$celltype<-subs[[i]]$predicted.id[1]
    markers$gene<-rownames(markers)
    ALL<-rbind(ALL,markers)
}


ALL$cat<-ifelse(ALL$avg_log2FC >0, "up","down")

significant<-ALL[which(abs(ALL$avg_log2FC)>0.25),]
significant<-significant[which(significant$p_val_adj<0.01),]

write.csv(significant, "~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")

Gene set enrichment

# # # #  all CT

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
library(enrichR)
setEnrichrSite("Enrichr")

dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")


ast_down<-enrichr(degs[which(degs$celltype=="Astrocytes" & degs$cat=="down"),]$gene, dbs)
ast_up<-enrichr(degs[which(degs$celltype=="Astrocytes" & degs$cat=="up"),]$gene, dbs)


mic_down<-enrichr(degs[which(degs$celltype=="Microglia" & degs$cat=="down"),]$gene, dbs)
mic_up<-enrichr(degs[which(degs$celltype=="Microglia" & degs$cat=="up"),]$gene, dbs)


ex_down<-enrichr(degs[which(degs$celltype=="Excitatory" & degs$cat=="down"),]$gene, dbs)
ex_up<-enrichr(degs[which(degs$celltype=="Excitatory" & degs$cat=="up"),]$gene, dbs)


in_down<-enrichr(degs[which(degs$celltype=="Inhibitory" & degs$cat=="down"),]$gene, dbs)
in_up<-enrichr(degs[which(degs$celltype=="Inhibitory" & degs$cat=="up"),]$gene, dbs)


oli_down<-enrichr(degs[which(degs$celltype=="Oligodendrocytes" & degs$cat=="down"),]$gene, dbs)
oli_up<-enrichr(degs[which(degs$celltype=="Oligodendrocytes" & degs$cat=="up"),]$gene, dbs)


opc_down<-enrichr(degs[which(degs$celltype=="OPCs" & degs$cat=="down"),]$gene, dbs)
opc_up<-enrichr(degs[which(degs$celltype=="OPCs" & degs$cat=="up"),]$gene, dbs)

Fig2E top) DOWN

# down


a<-ast_down[["GO_Biological_Process_2021"]]
b<-mic_down[["GO_Biological_Process_2021"]]
c<-ex_down[["GO_Biological_Process_2021"]]
d<-in_down[["GO_Biological_Process_2021"]]
e<-oli_down[["GO_Biological_Process_2021"]]
f<-opc_down[["GO_Biological_Process_2021"]]

a$celltype<-"Astrocytes"
b$celltype<-"Microglia"
c$celltype<-"Excitatory"
d$celltype<-"Inhibitory"
e$celltype<-"Oligodendrocytes"
f$celltype<-"OPC"

df<-rbind(a,b,c,d,e,f)
df2<-df[which(df$Adjusted.P.value<0.05),]
df2<-df2[order(df2$Adjusted.P.value),]
top<-df2[c(1:10),1]
df2_BP_down<-df2

df2<-df[which(df$Adjusted.P.value<0.01),]

# write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_downAD_across_ct_MAST.csv")
write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_downAD_across_ct_MAST.csv")

 t<-table(df2_BP_down$Term)
topDown<-head(df2$Term, n=10)
topDown<-df2 %>% group_by(celltype) %>% top_n(n=2, wt=Combined.Score)
topDown<-topDown$Term
tD<-df[which(df$Term %in% topDown),]

mat<-matrix(nrow=9, ncol=6)
colnames(mat)<-unique(df$celltype)
for (j in 1:9){
  for (i in 1:6){
    termTmp<-unique(topDown)[j]
    ctTmp<-colnames(mat)[i]
    sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub$Odds.Ratio
    }
    else{
      mat[j,i]<-0
    }
  }  
}

rownames(mat)<-sapply(strsplit(unique(topDown), " (", fixed=T), `[`, 1)
colnames(mat)<-substr(colnames(mat),start=1,stop=3)

sig_mat<-matrix(nrow=9, ncol=6)
colnames(sig_mat)<-unique(df$celltype)
for (j in 1:9){
  for (i in 1:6){
    termTmp<-unique(topDown)[j]
    ctTmp<-colnames(sig_mat)[i]
    sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$Adjusted.P.value}
    else{
      sig_mat[j,i]<-1}  }}


mat<-mat[,c(1,3,4,2,5,6)]
sig_mat<-sig_mat[,c(1,3,4,2,5,6)]


ha2<-HeatmapAnnotation(celltype=colnames(mat)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")

ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,35,100),c("grey95","deepskyblue","dodgerblue4")),  top_annotation=ha2, name="Odds.Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,row_names_side="left", row_title_gp=gpar(fontsize=14),row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ), cell_fun = function(j, i, x, y, w, h, fill) {
    if(sig_mat[i, j] <0.05) {
        grid.text("*", x, y, gp=gpar(fontsize=20, col="white"), vjust="center")
    } })

pdf("~/scMultiomics_AD/Top_downDEG_GObp.pdf", width=13, height=5)
ht
dev.off()

Fig2E bottom) UP

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
#            BP UP
a<-ast_up[["GO_Biological_Process_2021"]]
b<-mic_up[["GO_Biological_Process_2021"]]
c<-ex_up[["GO_Biological_Process_2021"]]
d<-in_up[["GO_Biological_Process_2021"]]
e<-oli_up[["GO_Biological_Process_2021"]]
f<-opc_up[["GO_Biological_Process_2021"]]

a$celltype<-"Astrocytes"
b$celltype<-"Microglia"
c$celltype<-"Excitatory"
d$celltype<-"Inhibitory"
e$celltype<-"Oligodendrocytes"
f$celltype<-"OPC"

df<-rbind(a,b,c,d,e,f)
df_all<-df
df2<-df[which(df$Adjusted.P.value<0.05),]
df2<-df2[order(df2$Adjusted.P.value),]
top<-df2[c(1:10),1]


write.csv(df2, "~/scMultiomics_AD/enrichr/top_BP_upAD_across_ct_MAST.csv")
# up
 t<-table(df2$Term)
topup<-head(df2$Term, n=10)
topup<-df2 %>% group_by(celltype) %>% top_n(n=5, wt=Combined.Score)
topup<-topup[order(topup$celltype),]
topup<-topup$Term
tD<-df[which(df$Term %in% topup),]

mat<-matrix(nrow=length(unique(topup)), ncol=6)
colnames(mat)<-unique(df$celltype)
for (j in 1:length(unique(topup))){
  for (i in 1:6){
    termTmp<-unique(topup)[j]
    ctTmp<-colnames(mat)[i]
    sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
    if (nrow(sub)>0){
      mat[j,i]<- sub$Odds.Ratio
    }
    else{
      mat[j,i]<-0
    }
  }  
}

rownames(mat)<-sapply(strsplit(unique(topup), " (", fixed=T), `[`, 1)
colnames(mat)<-substr(colnames(mat),start=1,stop=3)

sig_mat<-matrix(nrow=length(unique(topup)), ncol=6)
colnames(sig_mat)<-unique(df$celltype)
for (j in 1:length(unique(topup))){
  for (i in 1:6){
    termTmp<-unique(topup)[j]
    ctTmp<-colnames(sig_mat)[i]
    sub<-df[which(df$Term==termTmp & df$celltype==ctTmp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$Adjusted.P.value}
    else{
      sig_mat[j,i]<-1}  }}

mat<-mat[,c(1,3,4,2,5,6)]
sig_mat<-sig_mat[,c(1,3,4,2,5,6)]

ha2<-HeatmapAnnotation(celltype=colnames(mat)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")

ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,35,100),c("grey95","red","red4")),  top_annotation=ha2, name="Odds.Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,row_names_side="left", row_title_gp=gpar(fontsize=14),row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ), cell_fun = function(j, i, x, y, w, h, fill) {
    if(sig_mat[i, j] <0.05) {
        grid.text("*", x, y, gp=gpar(fontsize=20, col="white"), vjust="center")
    } })

pdf("~/scMultiomics_AD/Top_upDEG_GObp.pdf", width=10, height=5)
ht
dev.off()

Fig2A) Heatmap

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
tab<-table(degs$celltype, degs$cat)

mat<-matrix(nrow=length(unique(degs$gene)),ncol=6)
rownames(mat)<-unique(degs$gene)
colnames(mat)<-unique(degs$celltype)

for (i in 1:nrow(mat)){
  for (j in 1:ncol(mat)){
    gene_tmp<-degs[which(degs$gene==rownames(mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$celltype==colnames(mat)[j]),]
    mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$avg_log2FC,0)
  }
}

colnames(mat)<-substr(colnames(mat),start=1,stop=3)


# # all labels
noDup<-degs[!duplicated(degs$gene),]
mat<-mat[order(noDup$celltype, noDup$avg_log2FC),]
mat<-mat[,c(1,3,2,6,4,5)]
ha2<-HeatmapAnnotation(celltype=colnames(mat), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
bar1<-HeatmapAnnotation(up=anno_barplot(tab[,2], gp=gpar(fill="red"), axis_param=list(labels=c("","","","")), ylim=c(0,280)),
                        down=anno_barplot((tab[,1] * -1), gp=gpar(fill="blue"), axis_param=list(labels=c("","","")), ylim=c(-280,0),
                        show_annotation_name=F,
                        show_legend=F))
ha<-c( bar1, ha2)
# # 

# 
# # 
ad_related<-read.csv("~/AD_gwas_genes_012822.csv")
noDup<-noDup[order(noDup$celltype, noDup$avg_log2FC),]
lab<-which(noDup$gene %in% c(ad_related$Gene,"PLCG2","MAPT","ARL17B","SAMD4A","PTPRG","MDGA2","GPR158") & abs(noDup$avg_log2FC)>0.5)
ht=Heatmap(mat, cluster_rows=F,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-1,0,1),c("blue","white","red")), name="MAST-adj log2FC",  show_column_names=T, show_row_names=F, column_title=NULL,row_names_gp = gpar(fontsize = 3), top_annotation=ha)


pdf("~/scMultiomics_AD/AD_DEGs_MAST_heatmap_foldCs.pdf", width=6, height=8)
rowAnnotation(gene=anno_mark(at=lab, labels=noDup[lab,]$gene, side="left", labels_gp=gpar(fontsize=10)))+ht
dev.off()

Add labels for specific genes

genes<-read.csv("~/scMultiomics_AD/testing.csv", col.names=1)
genes$X1<-gsub(" ","", genes$X1)
tmp<-strsplit(genes$X1, ",")
tmp<-unlist(tmp)
write.csv(tmp, "~/scMultiomics_AD/Luciferase_linked_genes.csv")

# back to cluster
luc<-read.csv("~/scMultiomics_AD/Luciferase_linked_genes.csv")
toTest<-toTest[which(toTest$gene != "PVR"),]
all_connect_genes<-subsetByOverlaps(l2, toTest)
show_genes<-unique(c(luc$x, all_connect_genes$gene))
noDup<-noDup[order(noDup$celltype, noDup$avg_log2FC),]
lab<-which(noDup$gene %in% c(ad_related$Gene,"PLCG2","MAPT","ARL17B", show_genes,
                             "ACSL1", "CD163", "CD44", "CR1", "CEMIP2", "DAB2", "FOXP1", "LYVE1", "MAFB", "MITF", "MYO5A", "NOTCH2", "SLC11A1", "TGFBI"))


pdf("~/scMultiomics_AD/AD_DEGs_MAST_heatmap_foldCs_wTestingLabel_otherColorScale2.pdf", width=6, height=8)
rowAnnotation(gene=anno_mark(at=lab, labels=noDup[lab,]$gene, side="left", labels_gp=gpar(fontsize=10)))+ht
dev.off()

Fig2B) Shared DEGs

degs<-degs[order(degs$celltype),]
degs_t<-degs[which(degs$cat=="up"),]
du<-degs_t[,c(8,7)]
gene_ct<-table(du$gene, du$celltype)

mat<-matrix(ncol=6,nrow=6)
colnames(mat)<-unique(degs$celltype)
rownames(mat)<-unique(degs$celltype)
for (j in 1:6){
  for (i in 1:6){
    if (i>=j){
      mat[j,i]=nrow(gene_ct[which(gene_ct[,i]==1 & gene_ct[,j]==1),])
    }
    else{
      mat[j,i]<-0
    }
  }
}




degs_t<-degs[which(degs$cat=="down"),]
du<-degs_t[,c(8,7)]
gene_ct<-table(du$gene, du$celltype)

mat2<-matrix(ncol=6,nrow=6)
colnames(mat2)<-unique(degs$celltype)
rownames(mat2)<-unique(degs$celltype)
for (j in 1:6){
  for (i in 1:6){
    if (i<=j){
      mat2[j,i]=nrow(gene_ct[which(gene_ct[,i]==1 & gene_ct[,j]==1),])
    }
    else{
      mat2[j,i]<-0
    }
  }
}


colnames(mat)<-substr(colnames(mat),start=1,stop=3)
colnames(mat2)<-colnames(mat)
rownames(mat)<-colnames(mat)
rownames(mat2)<-colnames(mat)

ha<-rowAnnotation(celltype=colnames(mat)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")
ha2<-HeatmapAnnotation(celltype=substr(colnames(mat),start=1,stop=3)
, col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F,annotation_label="")

ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,20,25,26),c("white","red","red","grey80")),  top_annotation=ha2, name="Up", left_annotation=ha, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=14),cell_fun = function(j, i, x, y, w, h, fill) {
    if(mat[i, j] >0) {
      if(i==j){
        grid.text(mat[i,j], x, y, gp=gpar(fontsize=10, col="red"),  vjust=-1.5)
      }
      else grid.text(mat[i,j], x, y, gp=gpar(fontsize=10))
    }
  })
ht2=Heatmap(mat2, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(0,34,35,36),c("white","dodgerblue","dodgerblue","grey80")),  top_annotation=ha2, name="Down", left_annotation=ha, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=14),cell_fun = function(j, i, x, y, w, h, fill) {
    if(mat2[i, j] >0) {
      if(i==j){
        grid.text(mat2[i,j], x, y, gp=gpar(fontsize=10, col="blue"),  vjust=1.5)
      }
      else{
        grid.text(mat2[i,j], x, y, gp=gpar(fontsize=10))
      }
    } })

pdf("~/scMultiomics_AD/Shared_degs_heatmap_up.pdf", width=8,height=4)
ht+ht2
dev.off()

Fig2D) compare

# MATHYS
m1<-read.csv("~/mathys_data/MathysDE_Ast.csv")
m2<-read.csv("~/mathys_data/MathysDE_Ex.csv")
m3<-read.csv("~/mathys_data/MathysDE_In.csv")
m4<-read.csv("~/mathys_data/MathysDE_Microglia.csv")
m5<-read.csv("~/mathys_data/MathysDE_Oli.csv")
m6<-read.csv("~/mathys_data/MathysDE_opc.csv")
m1$celltype<-"Astrocytes"
m2$celltype<-"Excitatory"
m3$celltype<-"Inhibitory"
m4$celltype<-"Microglia"
m5$celltype<-"Oligodendrocytes"
m6$celltype<-"OPCs"
mat_degs<-rbind(m1,m2,m3,m4,m5,m6)
# mat_degs<-mat_degs[which(abs(as.numeric(mat_degs$IndModel.FC))>0.2 & as.numeric(mat_degs$IndModel.adj.pvals)<0.01),]
mat_degs<-mat_degs[which(mat_degs$DEGs.Ind.Model==T | mat_degs$DEGs.Ind.Mix.models==T),]

mat_v_our<-merge(degs, mat_degs, by=c("gene","celltype"), all.x=T)
mat_v_our$group<-ifelse(is.na(mat_v_our$p_val)==T, "Mathys",
                  ifelse(is.na(mat_v_our$DEGs.Ind.Model)==T, "Ours","Shared"))

mat_v_our<-merge(degs, mat_degs, by=c("gene","celltype"))
mat_v_our$IndModel.FC<-as.numeric(mat_v_our$IndModel.FC)
sum(sign(mat_v_our$avg_log2FC)==sign(mat_v_our$IndModel.FC))


# MORABITO
mor_degs<-read.csv("~/morabito/morabito_degs.csv")
mor_degs<-mor_degs[which(mor_degs$celltype!="PER.END"),]
mor_degs$celltype<-ifelse(mor_degs$celltype=="ASC", "Astrocytes",
                          ifelse(mor_degs$celltype=="EX", "Excitatory",
                                 ifelse(mor_degs$celltype=="INH", "Inhibitory",
                                        ifelse(mor_degs$celltype=="MG","Microglia",
                                               ifelse(mor_degs$celltype=="ODC", "Oligodendrocytes","OPCs")))))

mor_v_our<-merge(degs, mor_degs, by=c("gene","celltype"), all.x=T)
mor_v_our$group<-ifelse(is.na(mor_v_our$p_val.x)==T, "Morabito",
                  ifelse(is.na(mor_v_our$p_val.y)==T, "Ours","Shared"))





vv<-merge(mor_degs, mat_degs, by=c("gene","celltype"),all=T)
vv<-merge(vv, degs, by=c("gene","celltype"),all=T)
vv$Mat<-ifelse(is.na(vv$IndModel.adj.pvals)==T, F,T)
vv$Mor<-ifelse(is.na(vv$p_val.x)==T, F,T)
vv$Our<-ifelse(is.na(vv$p_val.y)==T, F, T)

x<-list(Mor=which(vv$Mor==T), Mat=which(vv$Mat==T), Our=which(vv$Our==T))
pdf("~/scMultiomics_AD/Mathys_morabito_our_degs.pdf")
ggVennDiagram(x)+theme(legend.position="none")
dev.off()

x<-vv[,24:26]
nrow(x[which(x$Mor==T & x$Mat==F & x$Our==F),]) # 2562
nrow(x[which(x$Mor==F & x$Mat==T & x$Our==F),]) # 5716
nrow(x[which(x$Mor==F & x$Mat==F & x$Our==T),]) # 666
nrow(x[which(x$Mor==T & x$Mat==T & x$Our==F),]) # 182
nrow(x[which(x$Mor==T & x$Mat==F & x$Our==T),]) # 280
nrow(x[which(x$Mor==F & x$Mat==T & x$Our==T),]) # 84
nrow(x[which(x$Mor==T & x$Mat==T & x$Our==T),]) # 61

dat<-c("Morabito"=2562, "Mathys"=5716,"Ours"=666,"Morabito&Mathys"=182, "Morabito&Ours"=280, "Mathys&Ours"=84, "Morabito&Mathys&Ours"=62)


pdf("~/scMultiomics_AD/Mathys_morabito_Our_degs_venn.pdf", width=4, height=4)
v=venn(dat)
plot(v)
dev.off()

Subcluster

Ast

  DefaultAssay(Astrocytes)<-"PC"
     Astrocytes <- SCTransform(Astrocytes,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>%      RunPCA(ndims=30)
    Astrocytes<-RunUMAP(Astrocytes,reduction="sub.harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
   # atac
     DefaultAssay(Astrocytes)<-"CTpeaks"
   Astrocytes<-RunTFIDF(Astrocytes) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
   Astrocytes<-RunUMAP(Astrocytes,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
   # wnn
   Astrocytes<-FindMultiModalNeighbors(Astrocytes, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
   Astrocytes<-RunUMAP(Astrocytes,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
   Astrocytes<-FindClusters(Astrocytes, graph.name="wknn", algorithm=1, resolution=0.3)


Astrocytes$subs<-paste0("Ast_", Astrocytes$seurat_clusters)

S2D) UMAP

pdf("~/scMultiomics_AD/Ast_subclusters.pdf")
DimPlot(Astrocytes, group.by="subs",reduction="wnn.umap", cols=c("Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1"), pt.size=2, label=T)
dev.off()

DEGs

Astrocytes<-NormalizeData(Astrocytes)
DEGs<-FindAllMarkers(Astrocytes,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")

GO

Supp Fig 5

degs_in<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")
degs_in<-degs_in[which(degs_in$cat=="up"),]
degs_in<-degs_in[order(degs_in$p_val_adj),]
degs_in<-degs_in[which(abs(degs_in$avg_log2FC)>0.55 & degs_in$p_val_adj<0.01 ),]


dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021")

In<-list()
for (i in 1:5){
In[[i]]<-enrichr(degs_in[which(degs_in$cluster== i-1),]$gene, dbs)
}



a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]

a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
df<-rbind(a,b,c,d,e)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Ast_subclusters_GO_BP.csv")


# switch back to BP

top<-df %>% group_by(i) %>% top_n(n=3, wt=Combined.Score)
top<-top[which(top$Adjusted.P.value<0.01),]
# top2.term<-top[which(top$i !=1),]$Term
term<-top$Term



mat<-matrix(nrow=length(unique(term)),ncol=5)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)

for (i in 1:nrow(mat)){
  for (j in 1:ncol(mat)){
    gene_tmp<-df[which(df$Term==rownames(mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
    mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
  }
}

mat<-mat[which(rownames(mat) %in% top2.term),]

rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn


sig_mat<-matrix(nrow=length(unique(term)),ncol=5)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
  for (j in 1:ncol(sig_mat)){
    gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
    sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
  }
}


colnames(mat)<-paste0("Ast_", colnames(mat))

ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,60),c("white", "# d3eff5","# 00518a")), name="Odds Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,  row_names_side = "left",row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ), cell_fun = function(j, i, x, y, w, h, fill) {
    if(sig_mat[i, j] >0) {
        grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
    } })


library(stringr)
pdf("~/scMultiomics_AD/Ast_subcluster_GObp.pdf", width=8, height=4)
   ht 
dev.off()

boxplot

tab<-prop.table(table(subs[[1]]$subs, subs[[1]]$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))


stat.test <- df %>%
  group_by(Var2) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")



#  0.192 
#  0.0649
#  0.48  
#  0.358 
#  0.534 

Bar plot

Supp Fig 5

meta<-Astrocytes@meta.data
tab<-prop.table(table(meta$cluster, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 5)


Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)

lev<-levels(df$Var2)


df$Var2<-factor(df$Var2, levels=lev)


tab2<-as.data.frame(table(meta$cluster))
tab2.m<-merge(tab2, cols.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)

tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]


df<-df[order(match(df$Var2, lev2)),]



pdf("~/scMultiomics_AD/Ast_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="none")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))

p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Num. Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)
p1+p2
dev.off()

S1E) heatmap

olig_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Astrocytes.csv")
olig_degs<-olig_degs[which(olig_degs$cat=="up"),]
top <- olig_degs %>% group_by(cluster) %>% top_n(n=10, wt=avg_log2FC)
top$clust<-paste("Ast_", top$cluster, sep="")

avg_Exp<-as.data.frame(AverageExpression(Astrocytes, features=top$gene, group.by="cluster", assay="PC"))

colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
mat_scaled<-t(apply(as.matrix(avg_Exp),1,scale))
colnames(mat_scaled)<-colnames(avg_Exp)


ha<-HeatmapAnnotation(cluster=colnames(mat_scaled), show_legend=F, col=list(cluster=c("Ast_0"="darkgoldenrod1", "Ast_1"="darkgoldenrod3", "Ast_2"="gold2", "Ast_3"="goldenrod", "Ast_4"="goldenrod1")))
ra<-rowAnnotation(cluster=top$clust, show_legend=F, col=list(cluster=c("Ast_0"="darkgoldenrod1", "Ast_1"="darkgoldenrod3","Ast_2"="gold2", "Ast_3"="goldenrod", "Ast_4"="goldenrod1")))

ht=Heatmap(mat_scaled,cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)


pdf("~/scMultiomics_AD/Ast_top10DEG_heatmap.pdf", width=4, height=6)
ht
dev.off()

sadick scores

This analysis was not shown in publication

sad<-read.csv("~/Sadick/Sadick_Ast_subDEGs.csv")
sad<-sad[which(sad$p_val_adj<0.01 & sad$avg_log2FC>0.35),]

top<-sad %>% group_by(LEN_so_astro_r2_cluster) %>% top_n(n=30, wt=avg_log2FC)
Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==0),]$gene), name="Clust0_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==1),]$gene), name="Clust1_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==2),]$gene), name="Clust2_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==3),]$gene), name="Clust3_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==4),]$gene), name="Clust4_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==5),]$gene), name="Clust5_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==6),]$gene), name="Clust6_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==7),]$gene), name="Clust7_score", search=T)

Astrocytes<-AddModuleScore(Astrocytes, features=list(top[which(top$LEN_so_astro_r2_cluster==8),]$gene), name="Clust8_score", search=T)


pdf("~/scMultiomics_AD/topick_Astrocytes_subclusters.pdf")
FeaturePlot(Astrocytes, reduction="wnn.umap", features=c("Clust0_score1","Clust1_score1","Clust2_score1","Clust3_score1","Clust4_score1","Clust5_score1","Clust6_score1","Clust7_score1","Clust8_score1"))
dev.off()

Inh

   DefaultAssay(Inhibitory)<-"PC"
     Inhibitory <- SCTransform(Inhibitory,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>%      RunPCA(ndims=30)
   # Inhibitory<-RunHarmony(Inhibitory, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="sub.harmony.rna", lambda=0.8)
   Inhibitory<-RunUMAP(Inhibitory,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
   # atac
     DefaultAssay(Inhibitory)<-"CTpeaks"
   Inhibitory<-RunTFIDF(Inhibitory) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
    Inhibitory<-RunUMAP(Inhibitory,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
   # wnn
   Inhibitory<-FindMultiModalNeighbors(Inhibitory, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
   Inhibitory<-RunUMAP(Inhibitory,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
   Inhibitory<-FindClusters(Inhibitory, graph.name="wknn", algorithm=1, resolution=0.2)

   
Inhibitory$batch<-ifelse(Inhibitory$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")

S2J) UMAP

Inhibitory$subs<-paste0("Inh_", Inhibitory$seurat_clusters)
pdf("~/scMultiomics_AD/Inh_subclusters.pdf")
DimPlot(Inhibitory, group.by="subs",reduction="wnn.umap", cols=c( "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1"), pt.size=2, label=T)
dev.off()

DEGs

DEGs<-FindAllMarkers(Inhibitory,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")

subtype

degs_in<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")
degs_in<-degs_in[which(degs_in$cat=="up"),]
degs_in<-degs_in[order(degs_in$p_val_adj),]


dbs<-"Azimuth_Cell_Types_2021"
# dbs<-"Allen_Brain_Atlas_10x_scRNA_2021"
In<-list()
for (i in 1:8){
In[[i]]<-enrichr(degs_in[which(degs_in$cluster== i-1),]$gene, dbs)
}



head(In[[i]][["Azimuth_Cell_Types_2021"]])



pdf("~/scMultiomics_AD/IN_marker_vlnplot_2.pdf", width=15,height=20)
VlnPlot(Inhibitory, features=c("PVALB","SST","VIP","LAMP5", "SNCG"),  pt.size=0.5)
dev.off()


cluster.ids<-c("PVALB+",
               "SST+",
               "VIP+", 
               "SNCG+", 
               "LAMP5+ 1",
               "Chandelier",
               "LAMP5+ 2", 
               "SST+ KLHL1+"         )
tmp<-Inhibitory$seurat_clusters             
names(cluster.ids)<-levels(Inhibitory)
Inhibitory<-RenameIdents(Inhibitory,cluster.ids)
Inhibitory$azimuth<-Idents(Inhibitory)
Inhibitory$seurat_clusters<-tmp


Idents(Inhibitory)<-Inhibitory$seurat_clusters



# Inhibitory$azimuth<-factor(Inhibitory$azimuth,levels=levels(Inhibitory)[order(levels(Inhibitory))])


pdf("~/scMultiomics_AD/Azimuth_In_layers2.pdf", width=7, height=5)
DimPlot(Inhibitory, reduction="wnn.umap",group.by="azimuth", cols=c(
  "PVALB+"="palegreen3",
  "SNCG+"="gold",
"VIP+"="firebrick1",
"LAMP5+ 1"="orange", 
"LAMP5+ 2"="darkorange3",
"Chandelier"="mediumorchid",
"SST+"="deepskyblue2",
"SST+ KLHL1+"="skyblue1"  )
)+ggtitle("GABAergic Subtypes")
dev.off()





Inhibitory$cluster<-paste("Inh_", Inhibitory$seurat_clusters, ": ", Inhibitory$azimuth,)
pdf("~/scMultiomics_AD/Azimuth_In_layers2.pdf", width=7, height=5)
DimPlot(Inhibitory, reduction="wnn.umap",group.by="cluster", cols=c(
"Inh_0: Pvalb+"         =cols.df[17,1],
"Inh_1: VIP+"           =cols.df[18,1],
"Inh_2: Sst+ RPL35AP11+"=cols.df[19,1],
"Inh_3: LAMP5+"         =cols.df[20,1],
"Inh_4: LAMP5+ AARD+"   =cols.df[21,1],
"Inh_5: Chandelier"     =cols.df[22,1],
"Inh_6: Sst+ KLHL1+"    =cols.df[23,1],
"Inh_7: Sst+ Chodl+"     =cols.df[24,1]))+ggtitle("GABAergic Subtypes")
dev.off()











pdf("~/scMultiomics_AD/In_marker_expression_plot.pdf")
VlnPlot(Inhibitory, features=c("PVALB","VIP","LAMP5","SST","SNCG"), assay="PC", pt.size=0.5) & scale_fill_manual(values=c(
  "Pvalb+"="palegreen",
"VIP+"="firebrick1",
"LAMP5+"="sienna", 
"LAMP5+ AARD+"="coral",
"Chandelier"="mediumorchid",
"Sst+ RPL35AP11+"="deepskyblue2",
"Sst+ KLHL1+"="deepskyblue4", 
"Sst+ Chodl+"="skyblue1"  ))
dev.off()

pdf("~/scMultiomics_AD/In_marker_featPlot.pdf")
FeaturePlot(Inhibitory, reduction="wnn.umap", features=c("PVALB","VIP","LAMP5","SST"), max.cutoff=1)&scale_color_viridis()
dev.off()


Inhibitory$cluster<-paste("Inh_", Inhibitory$seurat_clusters)
pdf("~/scMultiomics_AD/IN_marker_dotplot.pdf", width=6, height=8)
DotPlot(Inhibitory, features=c("PVALB","VIP","LAMP5","SST"), col.min=0, col.max=1.5, dot.scale=20, group.by="cluster")+scale_color_viridis()
dev.off()

S2K) heatmap

# # IN

markers<-data.frame(SST= c("GRID2","RALYL","COL25A1","SST","GRIK1","NXPH1","SOX6","TRHDE","SLC8A1","ST6GALNAC5"),
VIP= c("SYNPR","SLC24A3","GRM7","KCNT2","GALNTL6","THSD7A","ADARB2","VIP","LRP1B","ERBB4"),
PVALB=c("DPP10","SDK1","ERBB4","ADAMTS17","ZNF804A","BTBD11","SLIT2","MYO16","GRIA4","PVALB"),
LAMP5=c("LAMP5","PTPRT","PRELID2","EYA4","PTCHD4","FGF13","FBXL7","MYO16","GRIA4","RELN"),
Chand=c("ADAMTS5","PTHLH","NOG","COL15A1","PLEKHH2","COL21A1","NPNT","SRGAP1","HTR1F","FAM19A4"), SNCG=c("MAML3",   "CNR1", "SNCG", "CNTN5",    "ADARB2","CXCL14",  "SLC8A1",   "FSTL5",    "NPAS3",    "ASIC2"))


markers$seq<-seq(1, nrow(markers))
markers_melt<-melt(markers, id="seq")

avg_Exp<-as.data.frame(AverageExpression(Inhibitory, features=markers_melt$value,assay="PC", group.by="subs"))
colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
avg_Exp<-as.matrix(avg_Exp)

markers_melt<-markers_melt[which(markers_melt$value %in% rownames(avg_Exp)),]
markers_melt<-markers_melt[!duplicated(markers_melt$value),]

mat_scaled<-t(apply(avg_Exp,1,scale))
# mat_scaled<-apply(avg_Exp,2,scale)

colnames(mat_scaled)<-colnames(avg_Exp)

ha<-HeatmapAnnotation(celltype=colnames(avg_Exp),col= list(celltype=c( "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1")), show_legend=F)
ha2<-HeatmapAnnotation(subtype=c("PVALB+","SST+","VIP+",  "SNCG+",  "LAMP5+",  "Chandelier","LAMP5+", "SST+ KLHL1+") , col=list(subtype=c("PVALB+"="green","VIP+"="orange",   "LAMP5+"="dodgerblue",  "Chandelier"="purple","SST+"="red","SST+ KLHL1+"="red3", "SNCG+"="yellow")), show_legend=T)
ha2<-c(ha,ha2)
ra<-rowAnnotation(subtype=markers_melt$variable, col=list(subtype=c("SST"="red","VIP"="orange","PVALB"="green","LAMP5"="dodgerblue","Chand"="purple", "SNCG"="yellow")))

ht=Heatmap(mat_scaled,cluster_rows=T,cluster_columns=T,col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha2, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_split=markers_melt$variable,row_title_gp=gpar(fontsize=14),row_names_gp=gpar(fontsize=7))


pdf("~/scMultiomics_AD/In_Subtype_heatmap.pdf", width=5,height=6)
ht
dev.off()

GO

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Inhibitory.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")

#    0   1   2   3   4   5   6   7 
#  197 181 228 182 296 229 254 178 


In<-list()
for (i in 1:8){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}



a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]
f<-In[[6]][["GO_Biological_Process_2021"]]
g<-In[[7]][["GO_Biological_Process_2021"]]
h<-In[[8]][["GO_Biological_Process_2021"]]



a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
f$i<-5
g$i<-6
h$i<-7

df<-rbind(a,b,c,d,e,f,g,h)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Inh_subclusters_GO_BP.csv")
top<-df %>% group_by(i) %>% top_n(n=2, wt=-Adjusted.P.value)
term<-top$Term
term<-sort(unique(term))


mat<-matrix(nrow=length(unique(term)),ncol=8)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)

for (i in 1:nrow(mat)){
  for (j in 1:ncol(mat)){
    gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
    mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
  }
}


rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn



sig_mat<-matrix(nrow=length(unique(term)),ncol=8)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
  for (j in 1:ncol(sig_mat)){
    gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
    sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
  }
}


colnames(mat)<-paste0("Inh_", colnames(mat))

ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,15),c("white", "# d3eff5","# 00518a")), name="Odds Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,  row_names_side = "left",row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ), cell_fun = function(j, i, x, y, w, h, fill) {
    if(sig_mat[i, j] >0) {
        grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
    } })


library(stringr)
pdf("~/scMultiomics_AD/Inh_subcluster_GObp.pdf", width=8, height=4)
   ht 
dev.off()

S2L) Boxplot

tab<-prop.table(table(Inhibitory$azimuth, Inhibitory$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))


tests<-list()
for (i in 1:nrow(tab)){
  ct<-rownames(tab)[i]
  df.tmp<-df[which(df$Var2==ct),]
  tests[[i]]<-t.test(df.tmp[which(df.tmp$Status=="Ctrl"),]$Freq, df.tmp[which(df.tmp$Status=="AD"),]$Freq)$p.value
}


#  Chandelier      0.4980
#  Lamp5+          0.0417
#  Pvalb+          0.0415
#  Sncg+           0.5030
#  Sst+ Chodl+     0.3860
#  Sst+ KLHL1+     0.4400
#  Sst+ RPL35AP11+ 0.3100
#  VIP+            0.1710
stat.test <- df %>%
  group_by(Var2) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")

pdf("~/scMultiomics_AD/In_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()
    AD Ctrl

Inh_0 2043 2104 Inh_1 1571 2096 Inh_2 1836 1785 Inh_3 916 1047 Inh_4 564 598 Inh_5 341 372 Inh_6 305 380 Inh_7 154 219

Exc

   DefaultAssay(Excitatory)<-"PC"
     Excitatory <- SCTransform(Excitatory,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>%      RunPCA(ndims=30)
   Excitatory<-RunUMAP(Excitatory,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
   # atac
     DefaultAssay(Excitatory)<-"CTpeaks"
   Excitatory<-RunTFIDF(Excitatory) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
    Excitatory<-RunUMAP(Excitatory,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
   # wnn
   Excitatory<-FindMultiModalNeighbors(Excitatory, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
   Excitatory<-RunUMAP(Excitatory,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
   Excitatory<-FindClusters(Excitatory, graph.name="wknn", algorithm=1, resolution=0.)

   
Excitatory$batch<-ifelse(Excitatory$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")



Excitatory<-FindClusters(Excitatory, graph.name="wknn", algorithm=1, resolution=0.15)


Excitatory$subs<-paste0("Exc_", Excitatory$seurat_clusters)

S2H) UMAP

Excitatory2<-subset(Excitatory, subset=seurat_clusters==10, invert=T)
pdf("~/scMultiomics_AD/Exc_subclusters.pdf")
DimPlot(Excitatory2, group.by="subs",reduction="wnn.umap", cols=c( "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3"), pt.size=2, label=T)
dev.off()

DEGs

DEGs<-FindAllMarkers(Excitatory2,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")

subtype

# # Excitatory subtypes
# same as large subcluster DEG list but this ran first
degs_ex<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")


degs_ex<-degs_ex[which(degs_ex$cat=="up"),]
degs_ex<-degs_ex[order(degs_ex$gene,degs_ex$p_val_adj),]


dbs<-"Azimuth_Cell_Types_2021"
ex<-list()
for (i in 1:10){
ex[[i]]<-enrichr(degs_ex[which(degs_ex$cluster== i-1),]$gene, dbs)
}

i=i+1
head(ex[[i]][["Azimuth_Cell_Types_2021"]])


Excitatory<-subset(Excitatory, seurat_clusters==10, invert=T)


cluster.ids<-c("L2-3 Intratelencephalon-Projecting 1",     
               "L2-3 Intratelencephalon-Projecting 2",   
               "L5 Intratelencephalon-Projecting 2",     
               "L5 Intratelencephalon-Projecting 1",    
               "L6 THEMIS+ LINC00343+",   
               "L6 Corticothalamic-Projecting",          
               "L5 Intratelencephalon-Projecting 3",       
               "L6 Car3+ Intratelencephalon-Projecting", 
               "L5-6  Near-Projecting",                  
               "L5 Extratelencephalon-Projecting"          )

tmp<-Excitatory$seurat_clusters             
names(cluster.ids)<-levels(Excitatory)
Excitatory<-RenameIdents(Excitatory,cluster.ids)
Excitatory$azimuth<-Idents(Excitatory)
Excitatory$seurat_clusters<-tmp
# Idents(Excitatory)<-Excitatory$seurat_clusters


Excitatory$azimuth<-factor(Excitatory$azimuth,levels=levels(Excitatory)[order(levels(Excitatory))])
Excitatory$cluster<-paste("Exc_",Excitatory$seurat_clusters,": ",Excitatory$azimuth, sep="")

pdf("~/scMultiomics_AD/Azimuth_Ex_layers.pdf", width=8, height=5)
DimPlot(Excitatory, reduction="wnn.umap", group.by="azimuth", pt.size=0.2, cols=c(
  "L2-3 Intratelencephalon-Projecting 1" ="firebrick1",     
               "L2-3 Intratelencephalon-Projecting 2"="firebrick3",   
               "L5 Intratelencephalon-Projecting 1"="gold",     
               "L5 Intratelencephalon-Projecting 2"="goldenrod3",          
               "L5 Intratelencephalon-Projecting 3"="darkgoldenrod4",                  
               "L5 Extratelencephalon-Projecting"="khaki1", 
               "L5-6  Near-Projecting"="turquoise", 
               "L6 THEMIS+ LINC00343+"="deepskyblue",   
               "L6 Corticothalamic-Projecting"="blue",       
               "L6 Car3+ Intratelencephalon-Projecting"="cadetblue3"
  ))+ggtitle("Glutamatergic Subtypes")
dev.off()



pdf("~/scMultiomics_AD/Azimuth_Ex_layers.pdf", width=8, height=5)
DimPlot(subs[[3]], reduction="wnn.umap", group.by="cluster", pt.size=0.2, cols=c(
               "Exc_0: L2-3 Intratelencephalon-Projecting 1" =cols.df[7,1],     
               "Exc_1: L2-3 Intratelencephalon-Projecting 2"=cols.df[8,1],   
               "Exc_2: L5 Intratelencephalon-Projecting 2"=cols.df[9,1],     
               "Exc_3: L5 Intratelencephalon-Projecting 1"=cols.df[10,1],          
               "Exc_4: L6 THEMIS+ LINC00343+"=cols.df[11,1],                  
               "Exc_5: L6 Corticothalamic-Projecting"=cols.df[12,1], 
               "Exc_6: L5 Intratelencephalon-Projecting 3"=cols.df[13,1], 
               "Exc_7: L6 Car3+ Intratelencephalon-Projecting"=cols.df[14,1],   
               "Exc_8: L5-6  Near-Projecting"=cols.df[15,1],       
               "Exc_9: L5 Extratelencephalon-Projecting"=cols.df[16,1]
  ))+ggtitle("Glutamatergic Subtypes")
dev.off()



subs[[3]]$label<-paste("Exc_", subs[[3]]$seurat_clusters, sep="")
pdf("~/scMultiomics_AD/Azimuth_Ex_layers_label.pdf", width=5, height=5)
DimPlot(subs[[3]], reduction="wnn.umap", group.by="label", pt.size=0.2, cols=c(
               "Exc_0" =cols.df[7,1],     
               "Exc_1"=cols.df[8,1],   
               "Exc_2"=cols.df[9,1],     
               "Exc_3"=cols.df[10,1],          
               "Exc_4"=cols.df[11,1],                  
               "Exc_5"=cols.df[12,1], 
               "Exc_6"=cols.df[13,1], 
               "Exc_7"=cols.df[14,1],   
               "Exc_8"=cols.df[15,1],       
               "Exc_9"=cols.df[16,1]
  ), label=T)+ggtitle("Glutamatergic Subtypes")+theme(legend.position="none")
dev.off()





# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

degs_ex<-read.csv("~/scMultiomics_AD/DEGs/DEG_Excitatory_subTypes_AgeSex.csv")


degs_ex$subtype<-ifelse(degs_ex$cluster==0, "L2/3 RORB+ CCDC68+ ",
                        ifelse(degs_ex$cluster==1, "L2/3 Intratelencephalon-Projecting",
                               ifelse(degs_ex$cluster==2, "L5 Intratelencephalon-Projecting 2",
                                       ifelse(degs_ex$cluster==3, "L6 THEMIS+ SNTG2+",
                         ifelse(degs_ex$cluster==4,  "L5 Intratelencephalon-Projecting ",
                         ifelse(degs_ex$cluster==5,  "L6 FEZF2+ KLK7+",
                         ifelse(degs_ex$cluster==6,  "L3 RORB+ OTOGL+",
                         ifelse(degs_ex$cluster==7,  "L6 Corticothalamic-Projecting 1",
                         ifelse(degs_ex$cluster==8,  "L5 Intratelencephalon-Projecting 3",
                         ifelse(degs_ex$cluster==9,  "L6 CAR3+ Intratelencephalon-Projecting",
                         ifelse(degs_ex$cluster==10, "L5/6 Near-Projecting",
                         ifelse(degs_ex$cluster==11, "NS","L5 FEZF2+ CSN1S1+"))))))))))))

# degs_ex$layer<-lapply(strsplit(degs_ex$subtype, split=" "), `[`, 1)
write.csv(degs_ex, "~/scMultiomics_AD/DEGs/DEG_Ex_subTypes.csv")

S2I) heatmap

markers<-data.frame(L23_IP=c("EPHA6","CA10","LAMA2","RASGRF2","GNAL","KIAA1217","MEIS2","DAB1","CUX2","LRRTM4","PDZD2","ENC1","FAM19A1","CNTN5","LINC01378","PVRL3","CBLN2","x","x"),
L5_EP= c("NRP1","PTCHD1.AS","COL5A2","SLC35F3","COL24A1","HOMER1","VAT1L","NRG1","FAM19A1","CBLN2","x","x","x","x","x","x","x","x","x") ,
L5_IP= c("CADPS2","CHN2","IL1RAPL2","CPNE4","RORB","CNTN5","TOX","FSTL5","POU6F2","FSTL4","x","x","x","x","x","x","x","x","x"),
L56_NP=c("TLE4","CALCRL","LUZP2","TSHZ2","HTR2C","FER1L6-AS2","CDH6","KIAA1217","NPSR1.AS1","NPSR1-AS1","ZNF385D","ITGA8","CPNE4","NXPH2","TLL1 ","CRYM","PDE9A","CBLN2","ASIC2"),
L6_CTP=c("ADAMTSL1","TRPM3","KIAA1217","MEIS2","SEMA5A","SEMA3E","EGFEM1P","SORCS1","HS3ST4","TOX","x","x","x","x","x","x","x","x","x"),
L6_CAR=c("NTNG2","NR4A2","POSTN","KCNMB2","B3GAT2","RNF152","THEMIS","OLFML2B","STK32B","GAS2L3","x","x","x","x","x","x","x","x","x"),
L6_T_L=c("EMA3A","TRPM3","ADAMTS3","LINC00343","ANKRD30B","DCHS2","THEMIS","SEC24D","GULP1","ROBO1","x","x","x","x","x","x","x","x","x"))


markers$seq<-seq(1, nrow(markers))
markers_melt<-melt(markers, id="seq")
markers_melt<-markers_melt[which(markers_melt$value != "x"),]
# ex_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
# markers_malt2<-markers_melt[which(markers_melt$value %in% ex_degs$gene),]


a<-markers_melt %>% group_by(value) %>% summarize(subtype=paste(sort(variable), collapse=","))
# t<-table(a$value)
# a<-a[which(a$value %in% names(t[t<2])),]

avg_Exp<-as.data.frame(AverageExpression(Excitatory, features=a$value,assay="RNA", group.by="subs"))
colnames(avg_Exp)<-gsub("RNA.","", colnames(avg_Exp))
avg_Exp<-as.matrix(avg_Exp)

a<-a[which(a$value %in% rownames(avg_Exp)),]
a$layer<-sapply(strsplit(a$subtype, "_"), `[`, 1)
a$num<-str_count(a$subtype, ",")+1
a<-a[order(a$subtype, a$num),]
avg_Exp<-avg_Exp[a$value,]

mat_scaled<-t(apply(avg_Exp,1,scale))

colnames(mat_scaled)<-colnames(avg_Exp)

ha<-HeatmapAnnotation(celltype=colnames(avg_Exp),col= list(celltype=c( "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3")), show_legend=F)
ha2<-HeatmapAnnotation(subtype=c("L2-3 Intratelencephalon-Projecting 1",     
               "L2-3 Intratelencephalon-Projecting 2",   
               "L5 Intratelencephalon-Projecting 2",     
               "L5 Intratelencephalon-Projecting 1",    
               "L6 THEMIS+ LINC00343+",   
               "L6 Corticothalamic-Projecting",          
               "L5 Intratelencephalon-Projecting 3",       
               "L6 CAR3+ Intratelencephalon-Projecting", 
               "L5-6  Near-Projecting",                  
               "L5 Extratelencephalon-Projecting"  ) , 
               col=list(subtype=c("L2-3 Intratelencephalon-Projecting 1"="green4",     
               "L2-3 Intratelencephalon-Projecting 2"="green",   
               "L5 Intratelencephalon-Projecting 2"="orangered",     
               "L5 Intratelencephalon-Projecting 1"="orange",    
               "L6 THEMIS+ LINC00343+"="dodgerblue",   
               "L6 Corticothalamic-Projecting"="royalblue3",          
               "L5 Intratelencephalon-Projecting 3"="tomato",       
               "L6 CAR3+ Intratelencephalon-Projecting"="skyblue1", 
               "L5-6  Near-Projecting"="plum",                  
               "L5 Extratelencephalon-Projecting"="gold"  )), show_legend=F)
ha2<-c(ha,ha2)
n <- 15
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sam<-sample(col_vector, n)
names(sam)<-unique(a$subtype)
ra<-rowAnnotation(subtype=a$subtype, col=list(subtype=sam))

ht=Heatmap(mat_scaled,cluster_rows=F,cluster_columns=F,column_split=c("L23","L23","L5","L5","L6","L6","L5","L6","L56","L5"),col=colorRamp2(c(-1,0,1),viridis(3)), top_annotation=ha, name="Z",  show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=a$layer)


pdf("~/scMultiomics_AD/Neuron_Subtype_heatmap_Ex.pdf", width=4,height=8)
ht
dev.off()





# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 3
spec_mark<-table(markers_melt$value)
markers_melt<-markers_melt[which(markers_melt$value %in% names(spec_mark[spec_mark==1])),]
plot_mark<-merge(markers_melt, ex_degs, by.x="value",by.y="gene")

top_mark<-plot_mark %>% group_by(variable) %>% top_n(n=2, wt=avg_log2FC)
top_mark<-top_mark[order(top_mark$variable),]

Excitatory2$cluster<-paste0("Exc_", Excitatory2$seurat_clusters)
pdf("~/scMultiomics_AD/Ex_marker_dotplot.pdf", width=10, height=9)
DotPlot(Excitatory2, features=unique(top_mark$value), col.min=0, col.max=1.5, dot.scale=10, group.by="cluster")+scale_color_viridis(option="plasma", direction=-1)+theme(axis.text.x=element_text(size=15, angle=90), axis.text.y=element_text(size=15),axis.title=element_text(size=0))
dev.off()
tab<-table(markers_melt$value, markers_melt$variable)
mat<-as.matrix(tab)
mat<-mat[rownames(avg_Exp),]
df<-as.data.frame(mat)

pdf("~/scMultiomics_AD/EX_marker_upset_annotation.pdf",width=3, height=10)
ggplot(df, aes(y=Var1, x=Var2, alpha=as.factor(Freq)))+geom_point()+theme_classic()+scale_alpha_manual(values=c(0,1))+ylab("")+xlab("")+
  theme(legend.position="none", axis.text.x=element_text(angle=90))+scale_y_discrete(position="right", limits=rev)+scale_x_discrete(labels=c("L23","L5 Extratelencephalon-Projecting","L5 Intratelencephalon-Projecting","L56 Near Projecting","L6 Corticothalamic-Projecting","L6 CAR3+ Intratelencephalon-Projecting","L6 THEMIS+ LINC00343+"))
dev.off()

GO

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Excitatory.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")

In<-list()
for (i in 1:10){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}



a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]
e<-In[[5]][["GO_Biological_Process_2021"]]
f<-In[[6]][["GO_Biological_Process_2021"]]
g<-In[[7]][["GO_Biological_Process_2021"]]
h<-In[[8]][["GO_Biological_Process_2021"]]
i<-In[[9]][["GO_Biological_Process_2021"]]
j<-In[[10]][["GO_Biological_Process_2021"]]


a$i<-0
b$i<-1
c$i<-2
d$i<-3
e$i<-4
f$i<-5
g$i<-6
h$i<-7
i$i<-8
j$i<-9

df<-rbind(a,b,c,d,e,f,g,h,i,j)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Exc_subclusters_GO_BP.csv")
top<-df %>% group_by(i) %>% top_n(n=2, wt=-Adjusted.P.value)
top<-top[which(top$Adjusted.P.value<0.01),]

term<-top$Term
term<-sort(unique(term))


mat<-matrix(nrow=length(unique(term)),ncol=10)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)

for (i in 1:nrow(mat)){
  for (j in 1:ncol(mat)){
    gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
    mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
  }
}



rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn


sig_mat<-matrix(nrow=length(unique(term)),ncol=10)
rownames(sig_mat)<-unique(term)
colnames(sig_mat)<-unique(df$i)
df_sig<-df[which(df$Adjusted.P.value<0.01),]
for (i in 1:nrow(sig_mat)){
  for (j in 1:ncol(sig_mat)){
    gene_tmp<-df_sig[which(df_sig$Term==rownames(sig_mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(sig_mat)[j]),]
    sig_mat[i,j]<-ifelse(nrow(gene_tmp)>0, 1,0)
  }
}


colnames(mat)<-paste0("Exc_",colnames(mat))

ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,5,20),c("white", "# d3eff5","# 00518a")), name="Odds Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,  row_names_side = "left",row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ), cell_fun = function(j, i, x, y, w, h, fill) {
    if(sig_mat[i, j] >0) {
        grid.text("*", x, y, gp=gpar(fontsize=10), vjust="center")
    } })


library(stringr)
pdf("~/scMultiomics_AD/Exc_subcluster_GObp.pdf", width=9, height=3.5)
   ht 
dev.off()

Boxplot

tab<-prop.table(table(Excitatory$azimuth, Excitatory$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))


tests<-list()
for (i in 1:nrow(tab)){
  ct<-rownames(tab)[i]
  df.tmp<-df[which(df$Var2==ct),]
  tests[[i]]<-t.test(df.tmp[which(df.tmp$Status=="Ctrl"),]$Freq, df.tmp[which(df.tmp$Status=="AD"),]$Freq)$p.value
}


#    L2-3 Intratelencephalon-Projecting 1    1.000
#    L2-3 Intratelencephalon-Projecting 2    1.000
#      L5 Intratelencephalon-Projecting 2    1.000
#      L5 Intratelencephalon-Projecting 1    1.000
#                   L6 THEMIS+ LINC00343+    1.000
#           L6 Corticothalamic-Projecting    1.000
#      L5 Intratelencephalon-Projecting 3    1.000
#  L6 Car3+ Intratelencephalon-Projecting    1.000
#                   L5-6  Near-Projecting    1.000
#        L5 Extratelencephalon-Projecting    0.419

stat.test <- df %>%
  group_by(Var2) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")

pdf("~/scMultiomics_AD/Ex_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()

Oli

Oligodendrocytes<-subset(data, subset=predicted.id=="Oligodendrocytes")

   DefaultAssay(Oligodendrocytes)<-"PC"
     Oligodendrocytes <- SCTransform(Oligodendrocytes,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F) %>%      RunPCA(ndims=30)
    Oligodendrocytes<-RunUMAP(Oligodendrocytes,reduction="sub.harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
   # atac
     DefaultAssay(Oligodendrocytes)<-"CTpeaks"
   Oligodendrocytes<-RunTFIDF(Oligodendrocytes) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
    Oligodendrocytes<-RunUMAP(Oligodendrocytes,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
   # wnn
   Oligodendrocytes<-FindMultiModalNeighbors(Oligodendrocytes, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
   Oligodendrocytes<-RunUMAP(Oligodendrocytes,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
   Oligodendrocytes<-FindClusters(Oligodendrocytes, graph.name="wknn", algorithm=1, resolution=0.35)

   
Oligodendrocytes$batch<-ifelse(Oligodendrocytes$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")

S2F) UMAP

Oligodendrocytes$subs<-paste0("Oli_", Oligodendrocytes$seurat_clusters)
pdf("~/scMultiomics_AD/Olig_subclusters.pdf")
DimPlot(Oligodendrocytes, group.by="subs",reduction="wnn.umap", cols=c( "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1"), pt.size=2, label=T)
dev.off()

DEGs

DEGs<-FindAllMarkers(Oligodendrocytes,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")

GO

Supp Fig 5

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.25),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")

In<-list()
for (i in 1:4){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}


head(In[[i]][["GO_Molecular_Function_2021"]][,1:4])
head(In[[i]][["GO_Biological_Process_2021"]][,1:4])
head(In[[i]][["GO_Cellular_Component_2021"]][,1:4])
head(In[[i]][["KEGG_2021_Human"]][,1:4])
head(In[[i]][["Azimuth_Cell_Types_2021"]][,1:4])
head(In[[i]][["Allen_Brain_Atlas_10x_scRNA_2021"]][,1:4])

a<-In[[1]][["GO_Biological_Process_2021"]]
b<-In[[2]][["GO_Biological_Process_2021"]]
c<-In[[3]][["GO_Biological_Process_2021"]]
d<-In[[4]][["GO_Biological_Process_2021"]]

a$i<-0
b$i<-1
c$i<-2
d$i<-3

df<-rbind(a,b,c,d)
df_all<-df
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Olig_subclusters_GO_BP.csv")



df2<-df[!grepl("cycle", df$Term),]
top<-df %>% group_by(i) %>% top_n(n=2, wt=Combined.Score)
term<-top$Term
term<-sort(unique(term))


mat<-matrix(nrow=length(unique(term)),ncol=4)
rownames(mat)<-unique(term)
colnames(mat)<-unique(df$i)

for (i in 1:nrow(mat)){
  for (j in 1:ncol(mat)){
    gene_tmp<-df_all[which(df_all$Term==rownames(mat)[i]),]
    gene_tmp<-gene_tmp[which(gene_tmp$i==colnames(mat)[j]),]
    mat[i,j]<-ifelse(nrow(gene_tmp)>0, gene_tmp$Odds.Ratio,1)
  }
}


rn<-rownames(mat)
rn<-sapply(strsplit(rn, " (", fixed=T), `[`,1)
rownames(mat)<-rn


ht=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(0,60,300),c("white", "# d3eff5","# 00518a")), name="Odds Ratio",  show_column_names=T, show_row_names=T, column_title=NULL,  row_names_side = "left",row_names_max_width = max_text_width(
        rownames(mat), 
        gp = gpar(fontsize = 12)
    ))


library(stringr)
pdf("~/scMultiomics_AD/Olig_subcluster_GObp.pdf", width=8, height=6)
   ht 
dev.off()



df<-read.csv("~/scMultiomics_AD/enrichr/Olig_subclusters_GO_BP.csv")
df<-df[which(df$i==2),]
df<-df[which(df$Adjusted.P.value<0.01),]
df$term<-sapply(strsplit(df$Term, " (", fixed=T), `[`,1)
df<-df[order(df$Odds.Ratio, decreasing=T),]
df$term<-factor(df$term, levels=rev(df$term))

pdf("~/scMultiomics_AD/Olig_GO_bp_cluster2.pdf", width=12, height=6)
ggplot(df, aes(x=Odds.Ratio, y=term, fill=-log10(Adjusted.P.value)))+geom_bar(stat="identity")+theme_classic()+scale_fill_gradient2(low="white", mid="white",high="# 0F7010", limits=c(0,5))+theme(axis.text=element_text(size=12))
dev.off()

boxplot

tab<-prop.table(table(subs[[4]]$subs, subs[[4]]$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))


stat.test <- df %>%
  group_by(Var2) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")


#  0.882
#  0.295
#  0.684
#  0.31 



pdf("~/scMultiomics_AD/Olig_CT_stat_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()

Bar plot

Supp Fig 5

meta<-Oligodendrocytes@meta.data
tab<-prop.table(table(meta$cluster, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 4)


Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)

lev<-levels(df$Var2)


df$Var2<-factor(df$Var2, levels=lev)


tab2<-as.data.frame(table(meta$cluster))
tab2.m<-merge(tab2, cols.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)

tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]


df<-df[order(match(df$Var2, lev2)),]



pdf("~/scMultiomics_AD/Olig_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="none")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))

p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Num. Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)
p1+p2
dev.off()

S2G) heatmap

olig_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Oligodendrocytes.csv")
olig_degs<-olig_degs[which(olig_degs$cat=="up"),]
top <- olig_degs %>% group_by(cluster) %>% top_n(n=10, wt=avg_log2FC)
top$clust<-paste("Oli_", top$cluster, sep="")

avg_Exp<-as.data.frame(AverageExpression(Oligodendrocytes, features=top$gene, group.by="cluster", assay="PC"))

colnames(avg_Exp)<-gsub("PC.","", colnames(avg_Exp))
mat_scaled<-t(apply(as.matrix(avg_Exp),1,scale))
colnames(mat_scaled)<-colnames(avg_Exp)


ha<-HeatmapAnnotation(cluster=colnames(mat_scaled), show_legend=F, col=list(cluster=c("Oli_0"="lightsalmon", "Oli_1"="darksalmon", "Oli_2"="coral", "Oli_3"="brown1")))
ra<-rowAnnotation(cluster=top$clust, show_legend=F, col=list(cluster=c("Oli_0"="lightsalmon", "Oli_1"="darksalmon","Oli_2"="coral", "Oli_3"="brown1")))

ht=Heatmap(mat_scaled,cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), top_annotation=ha, name="Z", left_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)


pdf("~/scMultiomics_AD/Olig_top10DEG_heatmap.pdf", width=4, height=6)
ht
dev.off()


ht2=Heatmap(t(mat_scaled),cluster_columns=T,cluster_rows=F, col=colorRamp2(c(-1.5,0,1.5),viridis(3)), left_annotation=ha, name="Z", top_annotation=ra, show_column_names=T, show_row_names=T, column_title=NULL, row_title_gp=gpar(fontsize=10),row_names_gp=gpar(fontsize=7), row_split=top$clust)

pdf("~/scMultiomics_AD/Olig_top10DEG_heatmap_transpose.pdf", width=4, height=6)
ht
dev.off()

Mic

   DefaultAssay(Microglia)<-"PC"
     Microglia <- SCTransform(Microglia,assay="PC",new.assay.name="SCT",vars.to.regress = c("percent.mt"),verbose=F)   
     DefaultAssay(Microglia)<-"SCT"
    Microglia<-RunPCA(Microglia,ndims=40)
   # Microglia<-RunHarmony(Microglia, group.by.vars="id",reduction="pca",assay.use="SCT", reduction.save="sub.harmony.rna", lambda=1,  theta=2, max.iter.harmony=20)  # converged on 14
   Microglia<-RunUMAP(Microglia,reduction="harmony.rna",dims=1:30,reduction.name="sub.harmony.rna.umap")
   # atac
     DefaultAssay(Microglia)<-"CTpeaks"
   Microglia<-RunTFIDF(Microglia) %>% FindTopFeatures(min.cutoff='q50')%>% RunSVD()
    Microglia<-RunUMAP(Microglia,reduction="harmony.atac",dims=2:50,reduction.name="sub.harmony.atac.umap")
   # wnn
   Microglia<-FindMultiModalNeighbors(Microglia, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50))
   Microglia<-RunUMAP(Microglia,nn.name="weighted.nn", reduction.name="wnn.umap",reduction.key="wnnUMAP_")
   Microglia<-FindClusters(Microglia, graph.name="wknn", algorithm=1, resolution=0.45)

   
Microglia$batch<-ifelse(Microglia$id %in% c("1224","1238","3586","4064","4627","4674","5640","4481"), "1","2")

S2A) UMAP

Microglia$subs<-paste0("Mic_", Microglia$seurat_clusters)
pdf("~/scMultiomics_AD/Mic_subclusters.pdf")
DimPlot(Microglia, group.by="subs",reduction="wnn.umap", pt.size=2, label=F)
dev.off()

DEGs

DEGs<-FindAllMarkers(Microglia,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
Mic_no4<-subset(Microglia, wknn_res.0.45==4, invert=T)
Mic_no4<-NormalizeData(Mic_no4)
Idents(Mic_no4)<-Mic_no4$wknn_res.0.45
DEGs<-FindAllMarkers(Mic_no4,min.pct=0.1,logfc.threshold = 0.1,  assay="PC", test.use="MAST",latent.vars=c("Age","Sex"))
DEGs<-DEGs[which(DEGs$p_val_adj<0.01 & abs(DEGs$avg_log2FC)>0.5),]
DEGs$cat<-ifelse(DEGs$avg_log2FC>0, "up","down")

write.csv(DEGs, "~/scMultiomics_AD/DEGs/DEGs_Microglia_removeCluster.csv")

S2B) heatmap

mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")

mic_degs<-mic_degs[order(mic_degs$cat,  -mic_degs$cluster,decreasing=T),]
mic_degs2<-mic_degs[which(mic_degs$cat=="up"),]


top_genes<-mic_degs2 %>%group_by(cluster) %>% top_n(n=20, wt=abs(avg_log2FC))
top_genes<-rbind(top_genes, mic_degs2[which(mic_degs2$gene %in% c( "ACSL1", "CD163", "CD44", "CR1", "CEMIP2", "DAB2", "FOXP1", "LYVE1", "MAFB", "MITF", "MYO5A", "NOTCH2", "SLC11A1", "TGFBI")),])
top_genes<-top_genes[!duplicated(top_genes$gene),]


# avgExp<-as.data.frame(AverageExpression(Microglia,group.by="wknn_res.0.45",features=top_genes$gene,assay="RNA",slot="data"))
avgExp<-as.data.frame(AverageExpression(Mic_no4,group.by="wknn_res.0.45",features=top_genes$gene,assay="RNA",slot="data"))

names<-colnames(avgExp)
names<-gsub("RNA.","",names)
colnames(avgExp)<-names
tmp<-top_genes[which(top_genes$gene %in% rownames(avgExp)),]
avgExp<-avgExp[tmp$gene,]


avgExp.scale<-t(apply(avgExp,1, scale))
colnames(avgExp.scale)<-colnames(avgExp)
ha<-HeatmapAnnotation(cluster=paste0("Mic_",colnames(avgExp)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6")), show_legend=F)

ra<-rowAnnotation(cluster=paste0("Mic_",as.factor(top_genes$cluster)), col=list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d", "Mic_3"="# 00b0f6")),show_legend=T)


colnames(avgExp.scale)<-paste("Mic_",colnames(avgExp.scale), sep="")
ht=Heatmap(avgExp.scale, cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$cluster,  name="Z",  show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 5), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ra, top_annotation=ha)

pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_dropCluster2.pdf", width=5,height=8)
ht
dev.off()



ha<-rowAnnotation(cluster=rownames(t(avgExp.scale)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6")), show_legend=F)
ra<-HeatmapAnnotation(cluster=paste0("Mic_",as.factor(top_genes$cluster)), col=list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d", "Mic_3"="# 00b0f6")),show_legend=T)

ht2=Heatmap(t(avgExp.scale), cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$cluster,  name="Z",  show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 10),column_names_gp=gpar(fontsize=6), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ha, top_annotation=ra)

pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_dropCluster2.pdf", width=7,height=3)
ht2
dev.off()
mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
mic_degs<-mic_degs[order(mic_degs$cat,  -mic_degs$cluster,decreasing=T),]
mic_degs_H<-mic_degs[which(mic_degs$cat=="up"),]
mic_degs_H<-mic_degs_H[which(mic_degs_H$cluster %in% c(0,1,2)),]


avgExp<-as.data.frame(AverageExpression(Microglia,group.by="wknn_res.0.45",features=mic_degs_H$gene,assay="PC",slot="data"))
names<-colnames(avgExp)
names<-gsub("PC.","",names)
colnames(avgExp)<-names
tmp<-mic_degs_H[which(mic_degs_H$gene %in% rownames(avgExp)),]
avgExp<-avgExp[tmp$gene,]


avgExp.scale<-t(apply(avgExp,1, scale))
colnames(avgExp.scale)<-colnames(avgExp)
colnames(avgExp.scale)<-paste("Mic_",colnames(avgExp.scale), sep="")

ha<-HeatmapAnnotation(cluster=paste0("Mic_",colnames(avgExp)), col= list(cluster=c("Mic_0"="# f9786e","Mic_1"="# a3a500","Mic_2"="# 06bf7d","Mic_3"="# 00b0f6","Mic_4"="# e86bf3")), show_legend=F)
ra<-rowAnnotation(cluster=as.factor(mic_degs_H$cluster), col=list(cluster=c("0"="forestgreen","1"="darkolivegreen3","2"="darkseagreen3")),show_legend=T)


ht=Heatmap(avgExp.scale, cluster_rows=T,cluster_columns=F,col=colorRamp2(c(-1,0,1.5),viridis(3)), row_split=tmp$subtype.y,  name="Z",  show_column_names=T, show_row_names=T,row_names_gp = gpar(fontsize = 6), row_title_gp=gpar(fontsize=0), show_column_dend = FALSE, show_row_dend = FALSE, left_annotation=ra, top_annotation=ha)

pdf("~/scMultiomics_AD/Microglia_Degs_overlap_heatmap_0v1v2.pdf", width=4,height=3)
ht
dev.off()

GO

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia.csv")
degs<-degs[which(degs$p_val_adj<0.01 & abs(degs$avg_log2FC)>0.5),]
degs_up<-degs[which(degs$cat=="up"),]
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human", "Azimuth_Cell_Types_2021", "Allen_Brain_Atlas_10x_scRNA_2021")

In<-list()
for (i in 4:5){
In[[i]]<-enrichr(degs_up[which(degs_up$cluster== i-1),]$gene, dbs)
}


head(In[[i]][["GO_Molecular_Function_2021"]][,1:4])
head(In[[i]][["GO_Biological_Process_2021"]][,1:4])
head(In[[i]][["GO_Cellular_Component_2021"]][,1:4])
head(In[[i]][["KEGG_2021_Human"]][,1:4])
head(In[[i]][["Azimuth_Cell_Types_2021"]][,1:4])
head(In[[i]][["Allen_Brain_Atlas_10x_scRNA_2021"]][,1:4])

a<-In[[4]][["GO_Biological_Process_2021"]]
b<-In[[5]][["GO_Biological_Process_2021"]]


a$i<-3
b$i<-4

df<-rbind(a,b)
df<-df[which(df$Adjusted.P.value<0.05),]
write.csv(df, "~/scMultiomics_AD/enrichr/Mic_subclusters_GO_BP_cluster3_4.csv")


mic_down<-enrichr(mic_degs[which(mic_degs$cat=="down" & mic_degs$cluster==4),]$gene, dbs)

write.csv(mic_down[["GO_Biological_Process_2021"]], "~/scMultiomics_AD/enrichr/Mic_cluster4down_GO_BP.csv")
mic_degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_Microglia_removeCluster.csv")

mic_degs2<-mic_degs[which(mic_degs$cat=="up"),]

clus3<-enrichr(mic_degs2[which(mic_degs2$cluster==3),]$gene, dbs)



df<-clus3[["GO_Biological_Process_2021"]]

df<-df[which(df$Adjusted.P.value<0.05),]
df$term<-sapply(strsplit(df$Term, " (", fixed=T), `[`,1)
df<-df[order(df$Odds.Ratio, decreasing=T),]
df$term<-factor(df$term, levels=rev(df$term))

pdf("~/scMultiomics_AD/Mic_GO_bp_cluster2.pdf", width=12, height=6)
ggplot(df, aes(x=Odds.Ratio, y=term, fill=-log10(Adjusted.P.value)))+geom_bar(stat="identity")+theme_classic()+scale_fill_gradient2(low="white", mid="# D4F9B0",high="# 0F7010", limits=c(0,5))+theme(axis.text=element_text(size=12))
dev.off()

GO BP 1 receptor-mediated endocytosis (GO:0006898) 2 clathrin-dependent endocytosis (GO:0072583) 3 membrane organization (GO:0061024) 4 connective tissue development (GO:0061448) 5 transmembrane receptor protein tyrosine kinase signaling pathway (GO:0007169) 6 hyaluronan catabolic process (GO:0030214) 7 positive regulation of epithelial cell migration (GO:0010634) 8 positive regulation of protein phosphorylation (GO:0001934) 9 hyaluronan metabolic process (GO:0030212)

KEGG Fc gamma R-mediated phagocytosis

S2C) Boxplot

tab<-prop.table(table(Microglia$wknn_res.0.45, Microglia$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))
df$cluster<-paste0("Mic_", df$Var2)



#  0.0358 
#  0.00904
#  0.00449
#  0.0133 
#  0.706  


stat.test <- df %>%
  group_by(cluster) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "cluster")

pdf("~/scMultiomics_AD/Mic_CT_stat_prop_tt.pdf", width=6,height=5)
ggboxplot(df, y="Freq",x="cluster", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("firebrick","navyblue"))
dev.off()

FIG 1

B) All subcluster plot

data<-readRDS("~/scMultiomics_AD/final.rds")
Microglia<-readRDS("~/scMultiomics_AD/Microglia_subcluster.rds")
subs<-readRDS("~/scMultiomics_AD/subclusters.rds")

mic_clusters<-Microglia@meta.data
subs[[5]]<-AddMetaData(subs[[5]],mic_clusters)
subs[[5]]$cluster<-ifelse(is.na(subs[[5]]$seurat_clusters)==T, "NK", subs[[5]]$wknn_res.0.45)
subs[[5]]$seurat_clusters<-subs[[5]]$cluster

fullmeta<-c()
for (i in 1:length(subs)){
  subs[[i]]$subs<-paste0(substr(subs[[i]]$predicted.id,start=1,stop=3),"_", subs[[i]]$seurat_clusters,sep="")
  meta<-as.data.frame(subs[[i]]@meta.data[,c("subs")])
  rownames(meta)<-rownames(subs[[i]]@meta.data)
  fullmeta<-rbind(fullmeta,meta)
}



mic_meta<-read.csv("~/scMultiomics_AD/microglia_metadata.csv")
mic_meta$subs<-paste0("Mic_", mic_meta$seurat_clusters)
mic_subs<-as.data.frame(mic_meta$subs)
rownames(mic_subs)<-mic_meta$X.1

a=as.data.frame(subs[[1]]$subs) 
b=as.data.frame(subs[[2]]$subs) 
c=as.data.frame(subs[[3]]$subs)
d=as.data.frame(subs[[4]]$subs)
e=mic_subs

colnames(a)<-"subs"
colnames(b)<-"subs"
colnames(c)<-"subs"
colnames(d)<-"subs"
colnames(e)<-"subs"

fullmeta<-rbind(a,b,c,d,e)


data<-AddMetaData(data, fullmeta)
data$subs<-ifelse(is.na(data$subs),substr(data$predicted.id,start=1,stop=3),data$subs)

pdf("~/scMultiomics_AD/All_subclusters_plot_wLegend.pdf", width=9, height=10)
DimPlot(data, reduction="wnn.umap",group.by="subs", label=T, pt.size=0.1,cols=c(
      "Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1",
                                                                        
               "End"="gray54",     
      
               "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3", "Exc_10"="white",
               
               "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1",  
               
                      "Mic_0"="darkorchid4", 
               "Mic_1"="magenta",    "Mic_2"="magenta4",
               "Mic_3"="orchid", "Mic_4"="purple",  
               
               "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1",
               
               "OPC"="firebrick",    "Per"="lightpink"))&ggtitle("WNN")& theme(legend.position="none")
dev.off()

D) scREAD heatmap

BrainMarkers<-read.csv("~/scREAD/scREAD_BrainMarkers.csv", header=T)
BrainMarkers<-BrainMarkers[which(BrainMarkers$Name!="PDGRFA"),]

DefaultAssay(data)<-"RNA"
data<-NormalizeData(data, assay="RNA") %>% ScaleData(features=BrainMarkers$Name)

avgExp_PC<-as.data.frame(AverageExpression(data, features=BrainMarkers$Name, group.by=c("predicted.id","id"), slot="data", assay="RNA"))


colnames(avgExp_PC)<-gsub("RNA.","",colnames(avgExp_PC))

df<-data.frame(celltype=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,1), 
               id=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,2))
df$tmp<-substr(df$celltype,start=1,stop=3)

labels<-BrainMarkers
labels$tmp<-substr(labels$List,start=1,stop=3)

# Normalize
mat_scaled<-t(apply(avgExp_PC,1,scale))
mat_scaled<-t(scale(t(avgExp_PC)))


library(RColorBrewer)
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col=sample(col_vector, 15)
names(col)<-unique(df$id)
ha<-rowAnnotation(celltype=labels$tmp,col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink")), show_legend=F)
ha2<-HeatmapAnnotation(celltype=df$tmp,ID=df$id, col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"), ID=col), show_legend=F)
ht=Heatmap(mat_scaled,cluster_rows=F,cluster_columns=F,col=colorRamp2(c(-1,0,2),viridis(3)), row_split=labels$tmp, top_annotation=ha2, name="Z", left_annotation=ha, show_column_names=F, show_row_names=F, column_title=NULL, row_title_gp=gpar(fontsize=14))

lab<-which(rownames(mat_scaled) %in% c("AQP4","NRGN","GAD2","P2RY12","MOBP","VCAN"))

pdf("~/scMultiomics_AD/scREAD_heatmap_byID.pdf", width=5,height=8)
ht+rowAnnotation(gene=anno_mark(at=lab, labels=rownames(mat_scaled[lab,]), side="right", labels_gp=gpar(fontsize=10)))
dev.off()

E) ATAC signal

DefaultAssay(data)<-"ATAC"
annot<-Annotation(data)


DefaultAssay(data)<-"ATAC"
a<-FindRegion(data, "AQP4"  ,extend.downstream=100,extend.upstream=1000)
b<-FindRegion(data, "NRGN"  ,extend.downstream=100,extend.upstream=1000)
c<-FindRegion(data, "GAD2"  ,extend.downstream=100,extend.upstream=1000)
d<-FindRegion(data, "P2RY12",extend.downstream=100,extend.upstream=1000)
e<-FindRegion(data, "MOBP"  ,extend.downstream=100,extend.upstream=1000)  # MOBP
f<-FindRegion(data, "VCAN"  ,extend.downstream=100,extend.upstream=1000) # NEU4

# & theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))

DefaultAssay(data)<-"CTpeaks"
Annotation(data)<-annot[which(annot$gene_name=="AQP4"),]
p1<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =a) & scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5), axis.title.y=element_text(size=10))&ggtitle("AQP4")
anPlot<-AnnotationPlot(data, region=a)&scale_color_manual(values="darkgreen")& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p1<-CombineTracks(list(p1,anPlot),heights=c(5,1))




Annotation(data)<-annot[which(annot$gene_name=="NRGN"),]
p2<-CoveragePlot(data,window=200, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =b)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("NRGN")
anPlot<-AnnotationPlot(data, region=b)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p2<-CombineTracks(list(p2,anPlot),heights=c(5,1))

Annotation(data)<-annot[which(annot$gene_name=="GAD2"),]
p3<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =c)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("GAD2")
anPlot<-AnnotationPlot(data, region=c)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p3<-CombineTracks(list(p3,anPlot),heights=c(5,1))


Annotation(data)<-annot[which(annot$gene_name=="P2RY12"),]
p4<-CoveragePlot(data,window=400, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =d)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("P2RY12")
anPlot<-AnnotationPlot(data, region=d)&scale_color_manual(values="darkgreen")& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p4<-CombineTracks(list(p4,anPlot),heights=c(5,1))

Annotation(data)<-annot[which(annot$gene_name=="MOBP"),]
p5<-CoveragePlot(data,window=600, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"),group.by="predicted.id",annotation=F,peaks=F, region =e, ymax=800)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(),axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("MOBP")
anPlot<-AnnotationPlot(data, region=e)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p5<-CombineTracks(list(p5,anPlot),heights=c(5,1))

Annotation(data)<-annot[which(annot$gene_name=="VCAN"),]
p6<-CoveragePlot(data,window=3000, idents=c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs"), group.by="predicted.id",annotation=F,peaks=F, region =f)& scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))&ggtitle("VCAN")
anPlot<-AnnotationPlot(data, region=f)& theme(strip.text.y.left = element_blank(),strip.background = element_blank(), axis.text.x=element_blank(),plot.title = element_text(hjust = 0.5))
p6<-CombineTracks(list(p6,anPlot),heights=c(5,1))


library(cowplot)
pdf("~/scMultiomics_AD/marker_coverage_plot.pdf", width=12, height=7)
plot_grid(p1,p2,p3,p4,p5,p6, ncol=6)
dev.off()

C) Subcluster prop plot

tab<-prop.table(table(meta$subs, meta$id),1)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","Ctrl","AD","AD","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
Status<-Status[-c(6,13,14)]
df<-as.data.frame(t(tab))
df$Status<-rep(Status, 36)


Ctrl<-brewer.pal(8,"Blues")
AD<-brewer.pal(7, "Reds")
tmp<-df[1:15,]
tmp<-tmp[order(tmp$Status),]
df<-df[-seq(121,135),]
df$Var1<-factor(df$Var1, levels= tmp$Var1)


pdf("~/scMultiomics_AD/Allsubs_stat_prop_bar.pdf", width=5,height=6)
ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0))
dev.off()



lev<-levels(df$Var2)
lev2<-lev[c(1,2,3,4,5,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,6,36)]

df$Var2<-factor(df$Var2, levels=lev2)


tab2<-as.data.frame(table(meta$subs))
tab2.m<-merge(tab2, hex.df, by.x="Var1", by.y="subs")
tab2.m$Var1<-factor(df$Var1, levels= tmp$Var1)
tab2.m<-tab2.m[order(-tab2.m$Var1),]
lev<-levels(tab2.m$Var1)
lev<-lev[c(1,2,3,4,5,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,6,36)]

tab2.m$Var1<-factor(tab2.m$Var1, levels=lev)
tab2.m<-tab2.m[order(match(tab2.m$Var1, lev)),]


df<-df[order(match(df$Var2, lev2)),]



pdf("~/scMultiomics_AD/Allsubs_stat_prop_bar_wTotalCells_log10.pdf", width=5,height=6)
p1=ggplot(df, aes(x=Freq*100,y=Var2,fill=Var1))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c(AD,Ctrl))+xlab("%")+ylab("")+theme(legend.title=element_text(size=0), legend.position="left")+ggtitle("Donor")+scale_y_discrete(limits=rev(levels(df$Var2)))

p2=ggplot(tab2.m, aes(x=Freq, y=Var1, fill=Var1))+geom_bar(stat="identity")+theme_classic()+theme(legend.position="none", axis.text.y=element_text(size=0))+ggtitle("Number of Cells")+ylab("")+scale_fill_manual(values=tab2.m$cols)+xlab(" Count")+scale_y_discrete(limits=rev(levels(tab2.m$Var1)))+geom_vline(xintercept=100)+scale_x_continuous(breaks=c(0,6000,12000))
p1+p2
dev.off()

F) CorPlot

Idents(data)<-data$id
df2<-data@meta.data[,c("id","Status")]
df2<-df2[!duplicated(df2$id),]
df2<-df2[order(df2$Status),]

Idents(data)<-factor(Idents(data), levels=df2$id)
DefaultAssay(data)<-"PC"
rs<-rowSums(data)

avgExp_PC<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","id"), assay="PC", features=names(rs[which(rs>20)])))



colnames(avgExp_PC)<-gsub("PC.","",colnames(avgExp_PC))
 colnames(avgExp_PC)[89:103]<-c("OPCs_1224",
                                 "OPCs_1230",
                                 "OPCs_1238",
                                 "OPCs_3329",
                                 "OPCs_3586",
                                 "OPCs_4305",
                                 "OPCs_4313",
                                 "OPCs_4443",
                                 "OPCs_4481",
                                 "OPCs_4482",
                                 "OPCs_4627",
                                 "OPCs_HCT17HEX",
                                 "OPCs_HCTZZT",
                                 "OPCs_NT1261",
                                 "OPCs_NT1271")

df<-data.frame(celltype=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,1), 
               id=sapply(strsplit(colnames(avgExp_PC), "_",fixed=T), `[`,2))
df$tmp<-substr(df$celltype,start=1,stop=3)
rownames(df)<-colnames(avgExp_PC)
avgExp_PC2<-avgExp_PC[,!(df$tmp %in% c("End","Per"))]
df<-df[!(df$tmp %in% c("End","Per")),]


cor.mat<-cor(as.matrix(avgExp_PC2),method="pearson")


df.m<-merge(df,df2, by="id", sort=F)
rownames(df.m)<-paste(df.m$celltype,"_", df.m$id, sep="")
df.m<-df.m[rownames(df),]


library(RColorBrewer)
n=18
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col<-sample(col_vector,n)

ha<-HeatmapAnnotation(celltype=df.m$tmp, id=df.m$id, Status=df.m$Status,show_legend=F, col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"),Status=c("AD"="red","Ctrl"="blue"), id=c("1224"    ="# CAB2D6","1230"    ="# B3B3B3","1238"    ="# FED9A6","3329"    ="# A6D854","3586"    ="# CBD5E8","4305"    ="# 7FC97F","4313"    ="# B2DF8A","4443"    ="# FFED6F","4481"    ="# E5D8BD","4482"    ="# FFFF99","4627"    ="# 377EB8","HCT17HEX"="# FC8D62","HCTZZT"  ="# A6CEE3","NT1261"  ="# F1E2CC","NT1271"  ="# FDBF6F") ))
ra<-rowAnnotation(celltype=df.m$tmp, id=df.m$id,Status=df.m$Status,  show_legend=F,col= list(celltype=c("Ast"="darkgoldenrod1","End"="gray54","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick","Per"="lightpink"), Status=c("AD"="red","Ctrl"="blue"),id=c("1224"    ="# CAB2D6","1230"    ="# B3B3B3","1238"    ="# FED9A6","3329"    ="# A6D854","3586"    ="# CBD5E8","4305"    ="# 7FC97F","4313"    ="# B2DF8A","4443"    ="# FFED6F","4481"    ="# E5D8BD","4482"    ="# FFFF99","4627"    ="# 377EB8","HCT17HEX"="# FC8D62","HCTZZT"  ="# A6CEE3","NT1261"  ="# F1E2CC","NT1271"  ="# FDBF6F")))

ht=Heatmap(cor.mat,cluster_rows=T,cluster_columns=T,col=colorRamp2(c(0,0.5,1),plasma(3)),  top_annotation=ha, name="r", left_annotation=ra, show_column_names=F, show_row_names=F, column_title=NULL, row_title_gp=gpar(fontsize=14))

pdf("~/scMultiomics_AD/CT_id_corMat_heatmap.pdf")
ht
dev.off()

Motifs

library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
library(future) 
plan("multicore", workers = 10)
options(future.globals.maxSize = 100000 * 1024^2)



pfm2<-readJASPARMatrix("~/software/cellranger-arc-2.0.0/GRCh38/regions/motifs.pfm", matrixClass="PFM")


DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"CTpeaks"
Annotation(data)<-annotation
#  add motif information
data <- AddMotifs(
  object = data,
  genome = BSgenome.Hsapiens.UCSC.hg38,
  pfm = pfm2
)

motifs<-Motifs(data)
saveRDS(motifs, "~/scMultiomics_AD/motifs_object.rds")

chromVAR

counts<-readRDS("~/scMultiomics_AD/motif/FilteredPeaks500b_assay.rds")
motifs<-readRDS("~/scMultiomics_AD/motif/FilteredPeaks500b_motifs.rds")


data[["peaks500"]]<-CreateChromatinAssay(counts=counts,  fragments=Fragments(data))
DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"peaks500"
Annotation(data)<-annotation

Motifs(data)<-motifs
library(BSgenome.Hsapiens.UCSC.hg38)
# ad<-subset(data, subset=Status=="AD")
# ad<-RunChromVAR(ad, BSgenome.Hsapiens.UCSC.hg38)

# ctrl<-subset(data, subset=Status=="Ctrl")
# ctrl<-RunChromVAR(ctrl)

data<-RunChromVAR(data, BSgenome.Hsapiens.UCSC.hg38)

differential.activity <- FindMarkers(
  object = data,
  ident.1 = 'AD',
  ident.2 = 'Ctrl',
  mean.fxn = rowMeans,
  fc.name = "avg_diff"
)



Idents(data)<-data$predicted.id
differential.activity.ct <- FindAllMarkers(
  object = data,
  mean.fxn = rowMeans,
  fc.name = "avg_diff"
)
differential.activity.ct<-differential.activity.ct[which(differential.activity.ct$avg_diff>0),]

write.csv(differential.activity.ct,"~/scMultiomics_AD/motif/Differential_activity_celltypes.csv")

Format for LDSC

redo<-c2[which(c2$CT %in% c("Excitatory","Inhibitory","Microglia","Astrocytes","Oligodendrocytes","OPCs")),]
redo$classify<-paste(seqnames(redo),"-",start(redo),"-",redo$group,sep="")
redo2<-redo[!duplicated(redo$classify),]
redo<-as.data.frame(redo2)

table(redo$k27)




# redo$K27<-ifelse(redo$k27=="None","No_overlap","overlapped")
redo_format<-redo2[,c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21)]
redo_format$Astrocytes.k27<-grepl("Astrocytes",redo_format$k27)
redo_format$Excitatory.k27<-grepl("Neuron",redo_format$k27) | grepl("GLU", redo_format$k27)
redo_format$Inhbitory.k27<-grepl("Neuron",redo_format$k27) | grepl("GABA",redo_format$k27)
redo_format$Microglia.k27<-grepl("Microglia",redo_format$k27)
redo_format$Oligodendrocytes.k27<-grepl("Oligodendrocytes",redo_format$k27)
write.csv(redo_format,"~/scMultiomics_AD/celltype_specific_links_anno_hg38.csv")


tmp<-c2
tmp$classify<-paste(seqnames(tmp),"-",start(tmp),"-",tmp$group,sep="")
tmp$K27<-ifelse(tmp$k27=="None","No_overlap","overlapped")
tmp<-as.data.frame(tmp)
tmp<-tmp[!duplicated(tmp$classify),]
redo_format<-tmp[,c(1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,23,21)]
redo_format$Astrocytes.k27<-grepl("Astrocytes",redo_format$k27)
redo_format$Excitatory.k27<-grepl("Neuron",redo_format$k27) | grepl("GLU", redo_format$k27)
redo_format$Inhbitory.k27<-grepl("Neuron",redo_format$k27) | grepl("GABA",redo_format$k27)
redo_format$Microglia.k27<-grepl("Microglia",redo_format$k27)
redo_format$Oligodendrocytes.k27<-grepl("Oligodendrocytes",redo_format$k27)
write.csv(redo_format,"~/scMultiomics_AD/all_links_anno_hg38.csv")




write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Astrocytes==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Astrocytes_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

write.table(as.data.frame(unique(c2[which(c2$Excitatory==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Excitatory==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Excitatory_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Inhibitory==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Inhibitory_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

write.table(as.data.frame(unique(c2[which(c2$Microglia==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Microglia==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Microglia_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$Oligodendrocytes==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/Oligodendrocytes_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

write.table(as.data.frame(unique(c2[which(c2$OPCs==T  ),] ))[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_all.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="AD"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_AD.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="common"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_common.bed",quote=F,row.names=F, col.names=F, sep="\t" )
write.table(as.data.frame(unique(c2[which(c2$OPCs==T & c2$group=="Ctrl"),]) )[,c(1,2,3)], "~/scMultiomics_AD/LDSC_bed/OPCs_Ctrl.bed",quote=F,row.names=F, col.names=F, sep="\t" )

TRIOS

  1. Call motifs in linked-peaks
  2. Filter links
    • 100kb of TSS
    • exclude links to peaks w/ >100 motifs
    • Motif Score > 12
    • overlaps H3K27ac
  3. Split cells into AD and control
  4. Create meta cells with high resolution clustering
  5. Get average expression and accessibility for each meta cell
  6. Correlate TF expression - gene expression and TF expression - peak accessibility using meta cells
  7. Define a significant trio as
    • significant and positive Peak-TF cor
    • significant Gene-TF cor

processing

Distance to linked gene

# get distance to linked-gene TSS so that you can remove links greater than 100kb away from TSS
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$TSS<-ifelse(strand(gtf)=="+", start(gtf), end(gtf))
TSS.df<-data.frame(gene=gtf$gene_name, TSS=gtf$TSS)
tmp<-merge(links, TSS.df, by="gene")
tmp$p1<-abs(tmp$start-tmp$TSS)
tmp$p2<-abs(tmp$end-tmp$TSS)
tmp$distLinkedGene<-ifelse(tmp$p1<tmp$p2, tmp$p1, tmp$p2)

Call Motifs

pfm<-readJASPARMatrix("motifs.pfm", matrixClass="PFM")
DefaultAssay(data)<-"CTpeaks"
peaks<-granges(data)
peaks<-resize(peaks, width=500, fix="center") #resize peaks to 500bp before calling motifs
counts<-FeatureMatrix(fragments=Fragments(data), features=peaks,cells=colnames(data))
data[["peaks500"]]<-CreateChromatinAssay(counts=macs2_counts,  fragments=Fragments(data))

DefaultAssay(data)<-"ATAC"
annotation<-Annotation(data)
DefaultAssay(data)<-"peaks500"
Annotation(data)<-annotation
# add motif information
data <- AddMotifs(
  object = data,
  genome = BSgenome.Hsapiens.UCSC.hg38,
  pfm = pfm
)

motifs<-Motifs(data)

tmp<-motifs@data
colnames(tmp)<-sapply(strsplit(colnames(tmp), " ",fixed=T), `[`,1)
nz<- simplify2array(apply(tmp, 1,  function(x) paste(colnames(dummy)[x != 0], collapse = ",") ))

olap<-findOverlaps(nz, GRanges(annot_peaks), minoverlap=200) # annot_peaks is indexed peaks
nz$index<-annot_peaks[subjectHits(olap),]$index
nz.2<-nz[which(nz$num<=100),] #remove links with peaks that had more than 100 motif calls

#only keep links within 100kb and peaks with motif calls
testing<-tmp[which( (tmp$index %in% nz.2$index)  & tmp$distLinkedGene<100000),]
# format for motif-gene-peak links before correlation calculation
mot<-strsplit(nz.2$nz,",")
mot2<-unlist(mot)
c<-c()
for (i in 1:length(nz.2)){
  c<-c(c, rep(nz.2[i,]$index, nz.2[i,]$num)) # repeat index for number of motifs in peak
}
df<-data.frame(motif=mot2, index=c)
testing<-merge(testing, df, by="index")
testing$TF<-sapply(strsplit(testing$motif,"_"), `[`,1) # remove motif names
testing$TF<-sapply(strsplit(testing$TF,":"), `[`,1) # if 2 tfs just take first
testing$TF<-toupper(testing$TF) # make all upper case
testing<-testing[which(testing$TF %in% rownames(data[["RNA"]])),]
testing$peak<-paste0(testing$seqnames,"-",testing$start,"-",testing$end)# add peak name in same format as seurat object

motif score

  • pull motif score from signac wrapper of motifmatchr
  • Filter trios before correlation:
    • motif score >12
    • Abs(correlation of peak-gene link) > 0.3
    • Qval of peak-gene link > 20
    • linked-peak overlaps H3K27ac
  • save AD and control trios to test separately
m=motifs@positions #positions contains -log10(q-value) for each motif call

for (i in 1:length(m)){
  m[[i]]$name<-names(m[i])
}
# format motif as TF name
u<-unlist(m)
u$Peak_motif<-paste0(seqnames(u),"-", start(u),"-", end(u),"-",u$name)
u<-u[!duplicated(u$Peak_motif),]
u$TF=sapply(strsplit(u$name,"_"),`[`,1)
u$TF<-sapply(strsplit(u$TF,":"), `[`,1) # if 2 tfs just take first
u$TF<-toupper(u$TF)
# overlap with indexed peaks. Will use peak index to determine which motif scores go with which peak
olap<-findOverlaps(u, GRanges(annot_peaks), type="within")
u<-u[queryHits(olap)]
u$index<-annot_peaks[subjectHits(olap),]$index
saveRDS(u, "~/scMultiomics_AD/motif/Motif_Score_indexPeaks.rds")



# max accessibility of linked-peak by cluster
data<-AddMetaData(data, fullmeta)
data$subs<-ifelse(is.na(data$subs),substr(data$predicted.id,start=1,stop=3),data$subs)
Idents(data)<-data$subs
avgAcc<-as.data.frame(AverageExpression(data, group.by="subs", assay="CTpeaks", features=testing$peak))
max<-as.data.frame(apply(avgAcc, 1, function(x) max(x)))
max$peak<-rownames(max)
colnames(max)[1]<-"MaxAcc"


# ADD
testing2<-merge(testing, max, by="peak")
u<-as.data.frame(u)
u2u<-u[,c(6,7,9,10,1,2,3)]
colnames(u2u)<-c("Motif_score","motif","TF","index","motif_seqnames","motif_start","motif_end")
testing2<-merge(testing2, u2u, by=c("motif","TF","index"))
testing2$GRN<-paste0(testing2$index,"-",testing2$motif,"-", testing2$gene)


testing3<-testing2[which(testing2$Motif_score>12 & abs(testing2$score)>0.3 & testing2$Qval>20 & testing2$k27_num>0),]

saveRDS(testing3[which(testing3$group!="Ctrl"),], "~/scMultiomics_AD/GRN/a10.rds")
saveRDS(testing3[which(testing3$group!="AD"),], "~/scMultiomics_AD/GRN/c10.rds")

Meta cell

  • Split cells into AD and Control
  • Cluster cells off of the WKNN (combined RNA+ATAC)
  • Define meta cells by clustering at a high resolution so that you have average of approx 20 cells
  • Take average expression and accessibility for each meta cell
ad<-subset(data, subset=Status=="AD")
ad<-FindMultiModalNeighbors(ad, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50), k.nn=50, return.intermediate=T)
ad<-FindClusters(ad, graph.name="wknn", resolution=100, group.singletons=T)

ad<-NormalizeData(ad)
avgExp<-as.data.frame(AverageExpression(ad, group.by="seurat_clusters", assay="RNA"))
avgAcc<-as.data.frame(AverageExpression(ad, group.by="seurat_clusters",assay="CTpeaks"))
write.csv(avgExp, "~/scMultiomics_AD/GRN/AD_MetaCell_Expresion.csv")
write.csv(avgAcc, "~/scMultiomics_AD/GRN/AD_MetaCell_Accessibility.csv")




ctrl<-subset(data, subset=Status=="Ctrl")
ctrl<-FindMultiModalNeighbors(ctrl, reduction.list=list("harmony.rna","harmony.atac"), dims.list=list(1:30,2:50), k.nn=50, return.intermediate=T)
ctrl<-FindClusters(ctrl, graph.name="wknn", resolution=100, group.singletons=T)

ctrl<-NormalizeData(ctrl)
avgExp<-as.data.frame(AverageExpression(ctrl, group.by="seurat_clusters", assay="RNA"))
avgAcc<-as.data.frame(AverageExpression(ctrl, group.by="seurat_clusters",assay="CTpeaks"))
write.csv(avgExp, "~/scMultiomics_ctrl/GRN/Ctrl_MetaCell_Expression.csv")
write.csv(avgAcc, "~/scMultiomics_ctrl/GRN/Ctrl_MetaCell_Accessibility.csv")

parallel

  • for each tf:
    • max_access=maximum accessibility of peak with motif
    • motif_score=list of motif scores for every peak with motif
    • P-TF_cor=correlation score between TF exp and Peak acc
    • G-TF_cor=correlation score between TF exp and gene exp
    • G-TF_pval= p-value for correlation between TF and gene
    • P-TF_pval= p-value for correlation between TF and peak

Jobs were split into batches of 10,000 trios

# !/usr/bin/env Rscript

library(Seurat)
library(Signac)
library(GenomicRanges)
library(WGCNA)



args = commandArgs(trailingOnly=TRUE)
n=args[1]
n=as.numeric(i)
group=args[2]

data<-readRDS("~/scMultiomics_AD/final.rds")

ctrl<-subset(data, subset=Status=="AD")
c10<-readRDS("~/scMultiomics_AD/GRN/a10.rds")




DefaultAssay(ctrl)<-"RNA"
ctrl<-NormalizeData(ctrl)
DefaultAssay(ctrl)<-"CTpeaks"
ctrl<-RunTFIDF(ctrl)

part<-seq(1, nrow(c10), by=10000)
i=part[n]

j=i+9999 

cor<-c()
pval<-c()
cor2<-c()
pval2<-c()
for (i in i:j){
  geneT<-c10$gene[i]
  tfT<-c10$TF[i]
  test<-cor.test(as.numeric(data[["RNA"]][geneT,]), as.numeric(data[["RNA"]][tfT,]), method="pearson")
  cor<-c(cor,test$estimate)
  pval<-c(pval,test$p.value)
  peakT<-c10$peak[i]
  test2<-cor.test(as.numeric(data[["CTpeaks"]][peakT,]), as.numeric(data[["RNA"]][tfT,]), method="pearson")
  cor2<-c(cor2,test2$estimate)
  pval2<-c(pval2,test2$p.value)
}


df<-data.frame(`G-TF_cor`=cor, `G-TF_pval`=pval, `P-TF_cor`=cor2, `P-TF_pval`=pval2)
df$grn<-c10[i:j,]$GRN
write.csv(df, paste0("~/scMultiomics_AD/GRN/",group,"_gene_tf_peak_score_",n,".csv"))

Format

  • Trios were split into batches of 10k trios. These files need to be read in and combined.
# Trio correlations were split into batches of 10,000 trios. 
# AD_list is list of AD trio batch outputs
# Ctrl_list is list of Ctrl trio batch outputs


files=as.list(readLines("~/scMultiomics_AD/GRN/AD_list.csv"))
AD_list=lapply(files,function(x){
  FT=read.csv(x)})

files=as.list(readLines("~/scMultiomics_AD/GRN/Ctrl_list.csv"))
Ctrl_list=lapply(files,function(x){
  FT=read.csv(x)})

AD<-c(1,1,1,1,1,1) #dummy variable to combine data frames
for (i in 1:length(AD_list)){
  AD<-rbind(AD,AD_list[[i]])
}
AD<-AD[-1,] # remove dummy 

Ctrl<-c(1,1,1,1,1,1)
for (i in 1:length(Ctrl_list)){
  Ctrl<-rbind(Ctrl,Ctrl_list[[i]])
}
Ctrl<-Ctrl[-1,]



# add scores back to links
all<-readRDS("~/scMultiomics_AD/GRN/ALL_wMotifScore_MaxAcc.rds" ) #links with motif score and accessibility
mer<-merge(all, AD,by.x="GRN",by.y="grn", all.x=T)
mer<-merge(mer, Ctrl, by.x="GRN", by.y="grn",all.x=T)
mer<-mer[mer$GRN %in% c(AD2$grn, Ctrl2$grn),]
mer2<-mer


# scale motif scoring so that it's normalized for each motif instead of across all motifs. Because repetitive motifs biased towards lower scoring
maxs<-aggregate(Motif_score~motif, mer2, max)
colnames(maxs)[2]<-"Maxs"
mins<-aggregate(Motif_score~motif, mer2, min)
colnames(mins)[2]<-"Mins"
mer2<-merge(mer2, maxs, by="motif")
mer2<-merge(mer2, mins, by="motif")
mer2$Z_Motif_score<- (mer2$Motif_score-mer2$Mins)/(mer2$Maxs-mer2$Mins)

# normalize accessibility
rs<-rowSums(data[["CTpeaks"]])
rs.df<-as.data.frame(rs)
rs.df$peak<-rownames(rs.df)
rs.df$norm<-minmax.normalisation(log2(rs.df$rs)) # normalize on log2 so that outliers/promoters don't left skew distribution

mer2<-merge(mer2,rs.df, by="peak")

Define Trios

  • A is all trios
  • B is all significant trios with positive sig P-TF cor and sig G-TF cor
  • C2 is only AD and control have different gene-tf effect directions
  • D2 is only significant in AD
  • E2 is only significant in control
a<-mer2[!duplicated(mer2$GRN),]

# Is Gene-TF cor significant?
a$AD_sig<-a$G.TF_pval.x<0.001
a$Ctrl_sig<-a$G.TF_pval.y<0.001

# Is P-TF cor significant?
a$AD_sigP<-a$P.TF_pval.x<0.001
a$Ctrl_sigP<-a$P.TF_pval.y<0.001

# restrict so that only get ones where P-TF and G-TF agree by status
b<-a[which((a$AD_sig==T & a$AD_sigP==T & a$P.TF_cor.x>0.2) | (a$Ctrl_sig==T & a$Ctrl_sigP==T & a$P.TF_cor.y>0.2)),]

# ones that are significant in both
c<-b[which(b$AD_sig==T & b$Ctrl_sig==T),] 
c$sign<-sign(c$G.TF_cor.x)==sign(c$G.TF_cor.y)
c2<-c[which(c$sign==F),] # 34 are different directions

# Only significant in AD
d<-b[which(b$AD_sig==T & b$Ctrl_sig==F),]
d2<-d[which(d$Ctrl_sigP==F),] # Peak-TF is also not significant in Ctrl

# Only significant in Ctrl
e<-b[which(b$AD_sig==F & b$Ctrl_sig==T),]
e2<-e[which(e$AD_sigP==F),] # Peak-TF is also not significant in AD


b$category<-ifelse(b$GRN %in% c2$GRN, "DiffDir",
                   ifelse(b$GRN %in% d2$GRN, "AD-only",
                          ifelse(b$GRN %in% e2$GRN, "Ctrl-only","Sig")))
trios<-as.data.frame(b)
write.table(trios, "~/scMultiomics_AD/Significant_trios.txt", sep="\t", row.names=F, quote=F)

Fig4B: annotation

load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-unique(b)
ol_enc=findOverlaps(GRanges(u2),enc)

c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$link),]
d2<-merge(as.data.frame(c2_enc), as.data.frame(u2), by="link", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)



#grouped together
tmp<-d2
#tmp$numCT<-str_count(tmp$CT.x,",")
#tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)


pdf("~/scMultiomics_AD/Trio_peak_annot_encode.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_manual(values=c("dodgerblue","gold","lightpink","orange","red","forestgreen"),labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]


u2<-unique(b)
all<-import("~/scMultiomics_AD/CT_filtered_peaks.srt.bed")
over<-findOverlaps(all, u2)
all$Linked<-F
all[queryHits(over)]$Linked<-T
all$index<-seq(1, length(all))

ol_enc=findOverlaps(GRanges(all),enc)
b_enc<-all[queryHits(ol_enc),]
b_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
b_enc<-b_enc[!duplicated(b_enc$index),]
all$encLab<-"None"
d2<-c(b_enc, all[!(all$index %in% b_enc$index)])


t<-table(d2$encLab, d2$Linked)

## CTCF
chi<-rbind(t[1,],colSums(t[-1,]))
chisq.test(chi)
pval<-phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F)
FE<-(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2])
## dELS
chi<-rbind(t[2,],colSums(t[-2,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## DNAase
chi<-rbind(t[3,],colSums(t[-3,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## pELS
chi<-rbind(t[4,],colSums(t[-4,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## PLS
chi<-rbind(t[5,],colSums(t[-5,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))

names(FE)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
names(pval)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")

pval2<-c(2.2e-16,2.2e-16,2.692e-10,5.903e-07,1.227e-13)
names(pval2)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")

Heatmap of correlation values of AD and control-specific trios identified in microglia (left) and excitatory/inhibitory neurons (right) for TFs involved in at least 3 trios.

AD/Ctrl-only

Fig4F: heatmap

c2$class<-"DiffDir"
d2$class<-"ADonly"
e2$class<-"Ctrlonly"

# changed<-c(c2,d2,e2)
changed<-c(c2,d2,e2)
t<-table(changed$TF)
changed$Name<-paste0(changed$TF,"-", changed$gene)
changed<-changed[which(changed$TF %in% c("MEF2C",names(t[t>10]))),]
changed$CTspec<-ifelse(changed$CT %in% c("Inhibitory","Excitatory,Inhibitory","Inhbitory,Excitatory"), "Neuron",
                       ifelse(changed$CT %in% c("Microglia","Excitatory","Oligodendrocytes","OPCs","Astrocytes"), changed$CT, "shared"))

# add cluster
clusters<-read.table("~/JASPAR/clusters_motif_names.tab")
clust<-strsplit(clusters$V2,",")
len<-lapply(clust, length)
clust<-unlist(clust)
len<-unlist(len)
names<-c()
for (i in 1:length(len)){
  names<-c(names, rep(clusters$V1[i], len[i]))
}
clusters<-data.frame(cluster=names, motif2=clust)
clusters<-clusters[!duplicated(clusters$motif2),]
changed$motif2<-sapply(strsplit(changed$motif, "_"),`[`,1)
changed<-merge(changed, clusters, by="motif2")
changed<-changed[which(abs(changed$score.y)>0.3 | abs(changed$score.x)>0.3),]
changed$GRN<-paste0(changed$index,"-",changed$TF,"-", changed$gene)
changed<-changed[!duplicated(changed$GRN),]

# plot
mat<-as.matrix(cbind(changed$P.TF_cor.x,changed$G.TF_cor.x,changed$P.TF_cor.y, changed$G.TF_cor.y))
colnames(mat)<-rep(c("Peak-TF","Gene-TF"), 2)
rownames(mat)<-changed$TF
ha2<-HeatmapAnnotation(Score=rep(c("Peak-TF","Gene-TF"), 2), col= list(Score=c("Peak-TF"="mediumseagreen","Gene-TF"="orange")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), each=2), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
ha<-c(ha, ha2)


ra<-rowAnnotation(Group=changed$CTspec, TF=changed$TF, Class=changed$class, Cluster=changed$cluster, col=list(Group=c("shared"="grey50","Astrocytes"="darkgoldenrod1","Neuron"="seagreen", "Microglia"="mediumorchid3","Oligodendrocytes"="coral3","Excitatory"="cornflowerblue"),  Class=c("ADonly"="red4", "Ctrlonly"="dodgerblue","DiffDir"="gold")))


ht1=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.2,0.1,0.4), plasma(3)), name="r",  show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha,   right_annotation=ra, use_raster=T,raster_quality=5,row_split=changed$CTspec, row_title=NULL)

lab<-which(changed$gene %in% "SPP1")
pdf("~/scMultiomics_AD/Triangle_correlations.pdf", width=10, height=10)
# rowAnnotation(gene=anno_mark(at=lab, labels=changed[lab,]$Name, side="left", labels_gp=gpar(fontsize=10)))+
ht1
dev.off()










CT="Excitatory"
ra<-rowAnnotation(TF=changed[which(changed$CTspec==CT),]$TF,Class=changed[which(changed$CTspec==CT),]$class, show_legend=T, col=list( Class=c("ADonly"="red4", "Ctrlonly"="dodgerblue","DiffDir"="gold")))
ht1=Heatmap(mat[which(changed$CTspec==CT),], cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.2,0.1,0.4), plasma(3)), name="r",  show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha,   right_annotation=ra,row_title=NULL)


pdf(paste0("~/scMultiomics_AD/Triangle_correlations_",CT,".pdf"), width=10, height=10)
ht1
dev.off()










changed<-b[which(b$category %in% c("AD-only","Ctrl-only")),]
changed$CTspec<-ifelse(changed$CT %in% c("Inhibitory","Excitatory,Inhibitory","Inhbitory,Excitatory","Excitatory"), "Neuron","shared")
changed<-changed[which(changed$CTspec=="Neuron"),]
t<-table(changed$TF)
changed<-changed[which(changed$TF %in% names(t[t>2])),]

mat<-as.matrix(cbind(changed$P.TF_cor.x,changed$G.TF_cor.x,changed$P.TF_cor.y, changed$G.TF_cor.y))
colnames(mat)<-rep(c("Peak-TF","Gene-TF"), 2)
rownames(mat)<-changed$TF
#ha2<-HeatmapAnnotation(Score=rep(c("Peak-TF","Gene-TF"), 2), col= list(Score=c("Peak-TF"="mediumseagreen","Gene-TF"="orange")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), each=2), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
#ha<-c(ha, ha2)


cols<-c(brewer.pal(name = "Set3", n = 8),brewer.pal(name = "Set1", n = 6), brewer.pal(name = "Pastel1", n = 6))
cols=setNames(cols, unique(changed$TF))
cols[2]<-"red"
cols[11]<-"blue"
cols[9]<-"cyan"
ra<-rowAnnotation(TF=changed$TF, show_legend=T, col=list(TF=cols))


ht1=Heatmap(mat, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(-0.1,0.1,0.4), plasma(3)), name="r",  show_column_names=T, show_row_names=F, row_names_gp = gpar(fontsize = 2), top_annotation=ha,   right_annotation=ra, use_raster=T,raster_quality=5,row_split=changed$category, row_title=NULL)

pdf("~/scMultiomics_AD/Triangle_correlations_Neuron_ExIn.pdf", width=5, height=10)
ht1
dev.off()

MEF2C

Fig4C,D

avgExp<-as.data.frame(AverageExpression(data, features=unique(b[which(b$TF=="MEF2C"),]$gene), assay="RNA", group.by="predicted.id"))
avgExp$gene<-rownames(avgExp)
avgExp<-avgExp[,c(1,3,4,5,6,7,9)]
colnames(avgExp)<-c("Ast","Exc","Inh","Mic","Olig","OPCs", "gene")

b$CTspec<-ifelse(b$CT %in% c("Excitatory,Inhibitory","Inhibitory,Excitatory"), "Neuron",
                       ifelse(b$CT %in% c("Microglia","Excitatory","Inhibitory","Oligodendrocytes","OPCs","Astrocytes"), b$CT, "shared"))

mef<-b[which(b$TF=="MEF2C"),]
mef<-mef[order(-abs(mef$score)),]
mef<-mef[!duplicated(mef$gene),]
mef<-mef[,c("gene","CTspec")]

avgExp.g<-merge(avgExp, mef, by="gene")
rownames(avgExp.g)<-avgExp.g$gene
CT<-avgExp.g$CTspec
avgExp.g<-avgExp.g[,c(2,3,4,5,6,7)]



CT<-ifelse(CT %in% c("shared","Astrocytes"), "other",CT)
ra<-rowAnnotation(Group=CT,  col=list(Group=c("Microglia"="mediumorchid3","Neuron"="yellow", "other"="grey54", "Excitatory"="cornflowerblue","Inhibitory"="seagreen3")))
mat.scale<-t(scale(t(avgExp.g), center=T))

ht=Heatmap(mat.scale,cluster_columns=F,cluster_rows=T, col=colorRamp2(c(-1.8,0,2),viridis(3)),  name="Expression Z", show_column_names=T, show_row_names=F, column_title=NULL, row_names_gp=gpar(fontsize=7),right_annotation=ra, use_raster=T,raster_quality=5)

pdf("~/scMultiomics_AD/MEF2C_trios_heatmap.pdf", width=6, height=7)
ht
dev.off()



df<-data.frame(mef2c=data@assays$RNA@data["MEF2C",], ct=data$predicted.id)
df<-df[which(df$ct %in% c("Astrocytes","Excitatory","Inhibitory","Microglia","Oligodendrocytes","OPCs")),]
pdf("~/scMultiomics_AD/MEF2C_boxplot.pdf", width=6, height=4)
ggplot(df, aes(x=ct, y=mef2c, fill=ct))+geom_boxplot()+theme_classic()+ylab("MEF2C Normalize Expression")+scale_fill_manual(values=c("darkgoldenrod1", "cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))+xlab("")+theme(legend.position="none")
dev.off()

Fig4E

####################
#GO
g1<-unique(mef[which(mef$Microglia==T),]$gene)
g2<-unique(mef[which(mef$Excitatory==T),]$gene)
dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")
go1<-enrichr(g1,dbs)
go2<-enrichr(g2,dbs)

go1.1<-go1[["GO_Biological_Process_2021"]]
go2.1<-go2[["GO_Biological_Process_2021"]]

go1.1<-go1.1[which(go1.1$Adjusted.P.value<0.01),]
go2.1<-go2.1[which(go2.1$Adjusted.P.value<0.01),]
c
go1.1$CT<-"Microglia"
go2.1$CT<-"Neuron"

go<-rbind(go1.1,go2.1)
go$Name<-sapply(strsplit(go$Term, "(G", fixed=T), `[`,1)
go<-go[order(go$CT, go$Odds.Ratio),]
go$Name<-factor(go$Name, levels=go$Name)
go.top<-go %>% group_by(CT) %>% top_n(n=5, wt=Odds.Ratio)
pdf("~/scMultiomics_AD/MEF2C_target_GOenrichment_byCT.pdf")
ggplot(go, aes(x=Odds.Ratio, y=Name, fill=CT))+geom_bar(stat="identity")+theme_classic()+scale_fill_manual(values=c("mediumorchid3","yellow2"))
dev.off()

olap luciferase

trio_val<-subsetByOverlaps(b, gRNA[which(gRNA$Significant==T),])
noquote(t(t(trio_val$GRN)))

3 trios that also overlap significant luciferase region

“269545-MEF2C_MA0497.1-CCSER1”
“138088-RARB_MA1552.1-JPH3”
“213871-SOX10_MA0442.2-ADAMTS1”

FIG 3

A) acc vs exp

c2.df<-as.data.frame(c2)
c2.df$score<-ifelse(c2.df$group=="AD", c2.df$score.x,
                    ifelse(c2.df$group=="Ctrl", c2.df$score.y, (c2.df$score.x+ c2.df$score.y)/2))
c2.df<-c2.df[order(-c2.df$score),]
c2.df<-c2.df[!duplicated(c2.df$gene),]
c2.df$peak<-paste0(c2.df$seqnames,"-",c2.df$start-1,"-", c2.df$end)
c2.df<-c2.df[!duplicated(c2.df$peak),]
c2.df$dir<-ifelse(c2.df$score<0, "down","up")
c2.df<-c2.df[which(c2.df$dir=="up"),]


acc<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","Status"),assay="CTpeaks",features=c2.df$peak))
gex<-as.data.frame(AverageExpression(data, group.by=c("predicted.id","Status"), assay="RNA", features=c2.df$gene))

acc<-acc[,-c(3,4,15,16)]
gex<-gex[,-c(3,4,15,16)]


acc.scale<-t(apply(acc,1, scale))
gex.scale<-t(apply(gex,1,scale))
colnames(acc.scale)<-colnames(acc)
colnames(gex.scale)<-colnames(gex)

acc.scale<-acc.scale[complete.cases(gex.scale),]
gex.scale<-gex.scale[complete.cases(gex.scale),]
c2.df<-c2.df[which(c2.df$gene %in% rownames(gex.scale)),]

ha2<-HeatmapAnnotation(celltype=rep(c("Ast","Exc","Inh","Mic","Oli","OPC"), each=2), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
ha<-HeatmapAnnotation(Status=rep(c("AD","Ctrl"), 6), col=list(Status=c("AD"="red","Ctrl"="blue")), show_legend=FALSE)
ha<-c( ha, ha2)

ra<-rowAnnotation(Group=c2.df$group,  `Score`=c2.df$score, col=list(Group=c("common"="grey50","AD"="red","Ctrl"="blue"), Dir=c("up"="yellow","down"="purple"), `Score`=colorRamp2(c(0,0.25,0.75,1),c("white",rev(mako(3))))))

ht1=Heatmap(acc.scale, cluster_rows=T,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(quantile(acc.scale, probs=0.05),0,2), c("## 290230","## CC4678FF","## F0F921FF" )), name="Accessibility Z",  show_column_names=F, show_row_names=F, column_title=NULL, top_annotation=ha,  row_dend_reorder=T, use_raster=T,raster_quality=5,row_split=c2.df$group)

ht2=Heatmap(gex.scale, cluster_rows=F,show_row_dend=F,cluster_columns=F,col=colorRamp2(c(quantile(gex.scale, probs=0.05, na.rm=T),0,quantile(gex.scale, probs=0.95, na.rm=T)),viridis(3)), name="Expression  Z",  show_column_names=F, show_row_names=F, column_title=NULL,row_names_gp = gpar(fontsize = 3), top_annotation=ha, right_annotation=ra,  row_title_gp=gpar(fontsize=0), use_raster=T, raster_quality=5)


pdf("~/scMultiomics_AD/Acc_GEX_link_filtered.pdf")
ht1+ht2
dev.off()

B) gene/peak hist

a10.gr<-c2.2[which(c2.2$Freq>=80 & c2.2$group !="Ctrl"),]
c10.gr<-c2.2[which(c2.2$Freq>=80 & c2.2$group !="AD"),]


t1<-as.data.frame(table(a10.gr$gene))
t2<-as.data.frame(table(c10.gr$gene))
t<-merge(t1,t2,by="Var1",all=T) 
colnames(t)<-c("Var1","AD","Ctrl")
t<-melt(t)


p1<-as.data.frame(table(a10.gr$start))
p2<-as.data.frame(table(c10.gr$start))
p<-merge(p1,p2,by="Var1",all=T) 
colnames(p)<-c("Var1","AD","Ctrl")
p<-melt(p)



pdf("~/scMultiomics_AD/AD_linksPerGene_filt.pdf", height=5,width=7)
a=ggplot(t, aes(x=log(value), color=variable,fill=variable))+geom_density(aes(y = ..count..), alpha=0.2, adjust=2)+xlab("log(Links per Gene)")+theme_classic()+
  scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
  theme(axis.text=element_text(size=8), legend.position="none")+ylab("counts")+
  geom_vline(xintercept=log(4), linetype="dashed" )+geom_vline(xintercept=log(13))+geom_vline(xintercept=log(28), linetype="dashed")
b=ggplot(p, aes(x=log(value), color=variable,fill=variable))+geom_density( aes(y = ..count..),alpha=0.2, adjust=4)+xlab("log(Links per Peak)")+theme_classic()+
  scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
  theme(axis.text=element_text(size=8), legend.position="none") +ylab("counts")+
  geom_vline(xintercept=log(1), linetype="dashed" )+geom_vline(xintercept=log(2))+geom_vline(xintercept=log(3), linetype="dashed")
a+b
dev.off()

t$value2<-ifelse(t$value>39,40,t$value)
p$value2<-ifelse(p$value>3, 4,p$value)
pdf("~/scMultiomics_AD/AD_linksPerGene_Perm80.pdf", height=5,width=7)
a=ggplot(t, aes(x=value2, color=variable,fill=variable))+geom_histogram(position="dodge", alpha=0.2, binwidth=2)+xlab("Links per Gene")+theme_classic()+
  scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
  theme(axis.text=element_text(size=8), legend.position="none")+ylab("counts")+
  scale_x_continuous(breaks=c(1,20,40),labels=c("1","20",">40"))
b=ggplot(p, aes(x=value2, color=variable,fill=variable))+geom_bar(position="dodge", alpha=0.2)+xlab("Links per Peak")+theme_classic()+
  scale_fill_manual(values=c("red","blue"))+scale_color_manual(values=c("red","blue"))+
  theme(axis.text=element_text(size=8), legend.position="none") +ylab("counts")+scale_x_continuous(breaks=c(1,2,3,4),labels=c("1","2","3",">3"))
a+b
dev.off()

C) upset plot

new_links<-links_200.anno
a10_ct<-new_links[which(new_links$group !="Ctrl"),]


groups<-strsplit(a10_ct$CT, split=",")
t<-lapply(groups, function(i) i[[1]][!(i[[1]] %in% c("Endothelial","Pericytes"))])
t<-lapply(groups, function(i) i[order(i)])
t<-lapply(t, paste, collapse="&")
t<-as.character(t)
a10_ct$groups<-t

c10_ct<-new_links[which(new_links$group !="AD"),]
groups<-strsplit(c10_ct$CT, split=",")
t<-lapply(groups, function(i) i[[1]][!(i[[1]] %in% c("Endothelial","Pericytes"))])
t<-lapply(groups, function(i) i[order(i)])
t<-lapply(t, paste, collapse="&")
t<-as.character(t)
c10_ct$groups<-t


tab<-table(a10_ct$groups)
tab<-head(tab[order(-tab)], n=15)
tab2<-table(c10_ct$groups)
tab2<-head(tab2[order(-tab2)], n=15)

tab<-tab[names(tab) %in% names(tab2)]
tab2<-tab2[names(tab2) %in% names(tab)]

tab2<-tab2[names(tab)] ## do same order

u<-upset(fromExpression(tab), nintersects=12, nsets=6,   sets.bar.color=c(
  "cornflowerblue","mediumorchid3","seagreen3","coral3","darkgoldenrod1","firebrick"), main.bar.color="red3", text.scale=1.3  , mainbar.y.label="AD Links", mainbar.y.max=60000, show.numbers=F, keep.order=T, order.by=c("freq","degree"))

pdf("~/scMultiomics_AD/AD_FL_upset_filt_XY.pdf", width=8, height=5)
u
dev.off()


u2<-upset(fromExpression(tab2), nintersects=12, nsets=6,  sets.bar.color=c(
  "cornflowerblue","seagreen3","coral3","darkgoldenrod1","mediumorchid3","firebrick"), main.bar.color="blue", text.scale=1.3,  mainbar.y.label="Ctrl Links", mainbar.y.max=60000, show.numbers=F, keep.order=T, order.by=c("freq","degree"))

pdf("~/scMultiomics_AD/Ctrl_FL_upset_filt_XY.pdf", width=8, height=5)
u2
dev.off()

D) annotation

load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]
u2<-unique(c2.2)
ol_enc=findOverlaps(GRanges(u2),enc)

c2_enc<-u2[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$link),]
d2<-merge(c2_enc, u2, by="link", all=T)
d2$encLab<-ifelse(is.na(d2$encLab)==T, "None",d2$encLab)



## grouped together
tmp<-d2
## tmp$numCT<-str_count(tmp$CT.x,",")
## tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$encLab, tmp$Astrocytes.y),2)
m<-prop.table(table(tmp$encLab, tmp$Microglia.y),2)
e<-prop.table(table(tmp$encLab, tmp$Excitatory.y),2)
i<-prop.table(table(tmp$encLab, tmp$Inhibitory.y),2)
o<-prop.table(table(tmp$encLab, tmp$Oligodendrocytes.y),2)
p<-prop.table(table(tmp$encLab, tmp$OPCs.y),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)


pdf("~/scMultiomics_AD/Link_annot_byCT_filtered_encode2.pdf", width=8, height=3)
ggplot(df, aes(x=value, y=variable, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_manual(values=c("dodgerblue","gold","lightpink","orange","red","forestgreen"),labels=c("CTCF-only","dELS","DNase-H3K4me3","pELS","PLS","other"))+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()




## up and down split
d2$score<-ifelse(d2$group=="AD",d2$score.x, d2$score.y)
tmp<-d2[which(d2$score>0),]
tmp$numCT<-str_count(tmp$CT,",")
tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$ann, tmp$Astrocytes),2)
m<-prop.table(table(tmp$ann, tmp$Microglia),2)
e<-prop.table(table(tmp$ann, tmp$Excitatory),2)
i<-prop.table(table(tmp$ann, tmp$Inhibitory),2)
o<-prop.table(table(tmp$ann, tmp$Oligodendrocytes),2)
p<-prop.table(table(tmp$ann, tmp$OPCs),2)
df<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df<-as.data.frame(df)
df$annot<-rownames(df)
df<-melt(df)

tmp<-d2[which(d2$score<0),]
tmp$numCT<-str_count(tmp$CT,",")
tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table(tmp$ann, tmp$Astrocytes),2)
m<-prop.table(table(tmp$ann, tmp$Microglia),2)
e<-prop.table(table(tmp$ann, tmp$Excitatory),2)
i<-prop.table(table(tmp$ann, tmp$Inhibitory),2)
o<-prop.table(table(tmp$ann, tmp$Oligodendrocytes),2)
p<-prop.table(table(tmp$ann, tmp$OPCs),2)
df2<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df2)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df2<-as.data.frame(df2)
df2$annot<-rownames(df2)
df2<-melt(df2)

df$dir<-"pos"
df2$dir<-"neg"
df<-rbind(df, df2)

pdf("~/scMultiomics_AD/Link_annot_byCT_filtered_encode.pdf", width=5, height=8)
ggplot(df, aes(y=value, x=dir, fill=annot))+geom_bar(stat="identity")+xlab("Proportion of Linked peaks")+ylab("Cell type")+theme_classic()+scale_fill_brewer(palette="Dark2")+theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))+facet_wrap(~variable)
dev.off()
enrichment
load("~/iCellGABA_CRE_ENCODEstyle_GR.Rvar")
enc=full_annot_encode_cre_def_GR
enc<-enc[!grepl("iCell",enc$accesionLabel),]

c2<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_filt0.02_wPerm_wQval.rds")
c2.2<-GRanges(c2)
u2<-unique(c2.2)
all<-import("~/scMultiomics_AD/CT_filtered_peaks.srt.bed")
over<-findOverlaps(all, u2)
all$Linked<-F
all[queryHits(over)]$Linked<-T
all$index<-seq(1, length(all))

ol_enc=findOverlaps(GRanges(all),enc)
c2_enc<-all[queryHits(ol_enc),]
c2_enc$encLab<-enc[subjectHits(ol_enc),]$encodeLabel
c2_enc<-c2_enc[!duplicated(c2_enc$index),]
all$encLab<-"None"
d2<-c(c2_enc, all[!(all$index %in% c2_enc$index)])


t<-table(d2$encLab, d2$Linked)

## CTCF
chi<-rbind(t[1,],colSums(t[-1,]))
chisq.test(chi)
pval<-phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F)
FE<-(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2])
## dELS
chi<-rbind(t[2,],colSums(t[-2,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## DNAase
chi<-rbind(t[3,],colSums(t[-3,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## pELS
chi<-rbind(t[4,],colSums(t[-4,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))
## PLS
chi<-rbind(t[5,],colSums(t[-5,]))
chisq.test(chi)
pval<-c(pval,phyper(q =( chi[1,2]-1)/1000000, m = chi[1,1]/1000000,n = chi[2,1]/1000000, k = (chi[1,2]+chi[2,2])/1000000, lower.tail=F))
FE<-c(FE,(chi[1,2]*chi[2,1])/(chi[1,1]*chi[2,2]))

names(FE)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")
names(pval)<-c("CTCF","dELS","DNase-H3K4me3","pELS","PLS")

CTCF 2.2<e-16 DELS=3.058e-09

Total ATAC UMIs,1199259332 Mean ATAC UMIs per cell,11385 Mean Peaks per cell,5499 Total ATAC Peaks,150561 Total GEX UMIs,1984992816 Mean GEX UMIs per cell,18845 Mean Genes per cell,3276 Total Genes Expressed,27264

E) K27 olap

c2$ATAC_num<-str_count(c2$CT, ",")+1
c2$k27_num<-str_count(c2$k27, "&")+1



Astrocytes<-unique(c2[grepl("Astrocytes",c2$CT),])
Astrocytes$found<-grepl("Astrocytes",Astrocytes$k27)
Astrocytes$specific<-ifelse(Astrocytes$ATAC_num==1, TRUE, F)
Atab<-as.data.frame(table(Astrocytes$specific, Astrocytes$found))
colnames(Atab)<-c("Specific","K27","Freq")
Atab$ct<-"Ast"

Excitatory<-unique(c2[grepl("Excitatory",c2$CT),])
Excitatory$found<-grepl("GLU",Excitatory$k27) | grepl("Neuron", Excitatory$k27)
Excitatory$specific<-ifelse(Excitatory$ATAC_num==1, TRUE, F)
Etab<-as.data.frame(table(Excitatory$specific, Excitatory$found))
colnames(Etab)<-c("Specific","K27","Freq")
Etab$ct<-"Exc"


Inhibitory<-unique(c2[grepl("Inhibitory",c2$CT),])
Inhibitory$found<-grepl("GABA",Inhibitory$k27) | grepl("Neuron", Inhibitory$k27)
Inhibitory$specific<-ifelse(Inhibitory$ATAC_num==1, TRUE, F)
Itab<-as.data.frame(table(Inhibitory$specific, Inhibitory$found))
colnames(Itab)<-c("Specific","K27","Freq")
Itab$ct<-"Inh"


Oligodendrocytes<-unique(c2[grepl("Oligodendrocytes",c2$CT),])
Oligodendrocytes$found<-grepl("Oligodendrocytes",Oligodendrocytes$k27)
Oligodendrocytes$specific<-ifelse(Oligodendrocytes$ATAC_num==1, TRUE, F)
Otab<-as.data.frame(table(Oligodendrocytes$specific, Oligodendrocytes$found))
colnames(Otab)<-c("Specific","K27","Freq")
Otab$ct<-"Oli"


Microglia<-unique(c2[grepl("Microglia",c2$CT),])
Microglia$found<-grepl("Microglia",Microglia$k27)
Microglia$specific<-ifelse(Microglia$ATAC_num==1, TRUE, F)
Mtab<-as.data.frame(table(Microglia$specific, Microglia$found))
colnames(Mtab)<-c("Specific","K27","Freq")
Mtab$ct<-"Mic"


df<-rbind(Atab, Etab, Itab, Otab, Mtab)




df$color<-paste0(df$Specific,"-",df$K27)
df$color<-ifelse(df$color=="FALSE-FALSE","TRUE-FALSE", df$color)

pdf("~/scMultiomics_AD/Linked_peaks_olapk27_barPlot_filtered_XY.pdf")
ggplot(df, aes(y=Freq/1000, x=Specific,  fill=K27))+geom_bar(stat="identity", color="grey40")+ theme_classic()+ylab("##  Linked peaks (x1,000)") + xlab("")+scale_x_discrete(labels=c("Shared", "Specific"))+
  theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15))+facet_wrap(~ct,  ncol=5,  strip.position="bottom") +scale_fill_manual(labels=c("No Overlap","Overlap H3K27ac"),values=c("grey80","darkseagreen2"))
dev.off()



df$ctSpec<-ifelse(df$Specific==T, "Specific","Shared")

pdf("~/scMultiomics_AD/Linked_peaks_olapk27_barPlot_filtered_split.pdf")
ggplot(df, aes(y=Freq/1000, x=ct,  fill=K27))+geom_bar(stat="identity", color="grey40")+ theme_classic()+ylab("##  Linked peaks (x1,000)") + xlab("")+
  theme(axis.text.x=element_text(angle=90, size=15), plot.title=element_text(hjust=0.5), legend.position="top", axis.title.y=element_text(size=17), legend.title=element_text(size=0), legend.text=element_text(size=15), axis.text.y=element_text(size=15))+scale_fill_manual(labels=c("No Overlap","Overlap H3K27ac"),values=c("grey80","darkseagreen2"))+facet_wrap(~ctSpec, scales="free")
dev.off()











df2<-df[which(df$Specific==T),]
rr<-c()
for(i in seq(1,9, by=2)){
  r<-df2[i+1,3]/(df2[i,3]+df2[i+1,3])
  rr<-c(rr,r)
}  

## mean specific 0.7870
## mean non-specific 0.6746



Atab<-as.data.frame(table(Astrocytes$found))
colnames(Atab)<-c("K27","Freq")
Atab$ct<-"Ast"

Etab<-as.data.frame(table( Excitatory$found))
colnames(Etab)<-c("K27","Freq")
Etab$ct<-"Exc"

Itab<-as.data.frame(table(Inhibitory$found))
colnames(Itab)<-c("K27","Freq")
Itab$ct<-"Inh"

Otab<-as.data.frame(table( Oligodendrocytes$found))
colnames(Otab)<-c("K27","Freq")
Otab$ct<-"Oli"

Mtab<-as.data.frame(table( Microglia$found))
colnames(Mtab)<-c("K27","Freq")
Mtab$ct<-"Mic"


df2<-rbind(Atab, Etab, Itab, Otab, Mtab)
rr<-c()
for(i in seq(1,9, by=2)){
  r<-df2[i+1,2]/(df2[i,2]+df2[i+1,2])
  rr<-c(rr,r)
}  

SUPP

S1 C) boxplot

tab<-prop.table(table(data$predicted.id, data$id),2)
Status<-c("Ctrl","Ctrl","Ctrl","AD","Ctrl","AD","AD","AD","AD","AD","AD","Ctrl","Ctrl","Ctrl","Ctrl")
df<-as.data.frame(t(tab))
df$Status<-rep(Status, nrow(tab))


stat.test <- df %>%
  group_by(Var2) %>%
  t_test(Freq ~ Status) %>%
  adjust_pvalue() %>%
  add_significance("p.adj")
stat.test <- stat.test %>% add_xy_position(x = "Var2")

pdf("~/scMultiomics_AD/ALL_CT_BOXPLOT_prop_tt.pdf", width=10,height=5)
ggboxplot(df, y="Freq",x="Var2", fill="Status", alpha=0.8)+stat_pvalue_manual(stat.test, label="p")+xlab("")+scale_fill_manual(values=c("red","blue"))
dev.off()

S1A,B)

md<-data@meta.data
cells=rownames(md[which(md$subs !="Exc_10"),])


pdf("~/scMultiomics_AD/All_subclusters_WNN.pdf", width=15, height=5)
p1<-DimPlot(data, cells=cells,reduction="harmony.rna.umap",group.by="subs",  pt.size=0.1,cols=c(
      "Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1",
               "End"="gray54",     
               "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
               "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1",  
                     "Mic_0"="purple", "Mic_1"="darkorchid4", 
               "Mic_2"="magenta",    "Mic_3"="magenta4",
               "Mic_4"="orchid",                        
               "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1",
               
               "OPC"="firebrick",    "Per"="lightpink"))&ggtitle("RNA")& theme(legend.position="none", plot.title = element_text(size=30))
p2<-DimPlot(data, cells=cells,reduction="harmony.atac.umap",group.by="subs",  pt.size=0.1,cols=c(
      "Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1",
                                                                        
               "End"="gray54",     
      
               "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
               
               "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1",  
               
                    "Mic_0"="purple",  "Mic_1"="darkorchid4", 
               "Mic_2"="magenta",    "Mic_3"="magenta4",
               "Mic_4"="orchid",   
               
               "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1",
               
               "OPC"="firebrick",    "Per"="lightpink"))&ggtitle("ATAC")& theme(legend.position="none",plot.title = element_text(size=30))

p3<-DimPlot(data, cells=cells,reduction="wnn.umap",group.by="subs", pt.size=0.1,cols=c(
      "Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1",
                                                                        
               "End"="gray54",     
      
               "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
               
               "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1",  
               
                    "Mic_0"="purple",  "Mic_1"="darkorchid4", 
               "Mic_2"="magenta",    "Mic_3"="magenta4",
               "Mic_4"="orchid", 
               
               "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1",
               
               "OPC"="firebrick",    "Per"="lightpink"))&ggtitle("WNN")& theme(legend.position="none", plot.title = element_text(size=30))
p4<-FeaturePlot(data,cells=cells, features="ATAC.weight", reduction="wnn.umap")+ggtitle("RNA-ATAC weight")+scale_color_viridis()+ theme(plot.title = element_text(size=30))

plot_grid(p1,p2,p3, ncol=3)
dev.off()
pdf("~/scMultiomics_AD/All_subclusters_WNN_part2.pdf", width=10, height=10)
p4
p1+theme(legend.position="bottom")
dev.off()

S5

p1<-DimPlot(data, cells=cells,reduction="wnn.umap",group.by="subs", label=F, pt.size=0.1,cols=c(
      "Ast_0"="darkgoldenrod1","Ast_1"="darkgoldenrod3", "Ast_2"="gold2",
      "Ast_3"="goldenrod","Ast_4"="goldenrod1",
                                                                        
               "End"="gray54",     
      
               "Exc_0"="royalblue1",       "Exc_1"="deepskyblue2",     "Exc_2"="steelblue1", "Exc_3"="steelblue3",       "Exc_4"="cornflowerblue",       "Exc_5"="royalblue3",       "Exc_6"="cadetblue1", "Exc_7"="cadetblue3",
      "Exc_8"="dodgerblue","Exc_9"="dodgerblue3",
               
               "Inh_0"="palegreen",  "Inh_1"="springgreen",  "Inh_2"="darkolivegreen3",       "Inh_3"="seagreen3", "Inh_4"="forestgreen",       "Inh_5"="darkseagreen",       "Inh_6"="palegreen4",       "Inh_7"="darkolivegreen1",  
               
                      "Mic_1"="darkorchid4", 
               "Mic_2"="magenta",    "Mic_3"="magenta4",
               "Mic_4"="orchid", "Mic_0"="purple", 
               
               "Oli_0"="lightsalmon", "Oli_1"="darksalmon", 
               "Oli_2"="coral", "Oli_3"="brown1",
               
               "OPC"="firebrick",    "Per"="lightpink"))&ggtitle("Cell type")& theme(legend.position="none")
p2<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Status",pt.size=0.1, cols=c("AD"="firebrick", "Ctrl"="dodgerblue"))&ggtitle("Disease State")& theme(legend.position="bottom")
p3<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Sex", pt.size=0.1, cols=c("F"="khaki", "M"="seagreen"))&ggtitle("Sex")& theme(legend.position="bottom")
p4<-FeaturePlot(data, cells=cells,reduction="wnn.umap", features="Age", pt.size=0.1)&ggtitle("Age")& theme(legend.position="bottom")&scale_color_viridis()
p5<-DimPlot(data, cells=cells,reduction="wnn.umap", group.by="Braak", pt.size=0.1, cols=c("0"="dodgerblue","4"="tomato", "5"="red", "6"="firebrick4"))&ggtitle("Braak")&theme(legend.position="bottom")





pdf("~/scMultiomics_AD/WNN_diseasestat.pdf", width=6, height=6)

p2

dev.off()

##  extra<-read.csv("~/scMultiomics_AD/meta_RIN_PMI.csv")
##  extra[16,1]<-"HCTZZT"
##  meta<-read.csv("~/scMultiomics_AD/MetaData_wNK.csv")
##  rownames(meta)<-meta$X
##  md.m<-merge(meta, extra, by.x="id",by.y="ID")
##  rownames(md.m)<-md.m$X
##  data<-AddMetaData(data, md.m)
##  


p1<-FeaturePlot(data,cells=cells, reduction="wnn.umap", features="PMI..h.", pt.size=0.1)&ggtitle("PMI")&scale_color_viridis(option="mako"
)& theme(legend.position="bottom")

p2<-FeaturePlot(data, cells=cells,reduction="wnn.umap", features="RIN", pt.size=0.1)&ggtitle("RIN")&scale_color_viridis(option="rocket"
)& theme(legend.position="bottom")

pdf("~/scMultiomics_AD/WNN_moreStats.pdf", width=6, height=6)
p1
p2
dev.off()

snp-olap

GWAS_snps<-import("~//FUMA_hg38_LD.bed", format="bed")
snp_tmp<-read.csv("~/AD_Wightman/GWAS_AD_Wightman_SNPs_including_linkage_disequilibrium.csv")
seqnames<-paste("chr",sapply(strsplit(snp_tmp$uniqID,":"), `[`, 1),sep="")
start<-sapply(strsplit(snp_tmp$uniqID,":"), `[`, 2)
start<-as.numeric(start)
end<-start+1
df<-data.frame(seqnames=seqnames, start=start, end=end)
snp2<-GRanges(df)
snp2$rs<-snp_tmp$rsID
ch<-import.chain("~/hg19ToHg38.over.chain")
snp38<-liftOver(snp2, ch)
snp38<-unlist(snp38)  ## lost 7
snp1_2<-c(GWAS_snps,snp38)


over<-findOverlaps(unique(c2),unique(snp38))
c3<-unique(c2)
c3$olap<-0
c3[queryHits(over),]$olap<-1
## c3<-c3[which(c3$olap==1),]

tmp<-unique(c3)
## tmp<-tmp[which(tmp$numCT==0),]
a<-prop.table(table( tmp$olap,tmp$Astrocytes),2)
m<-prop.table(table( tmp$olap,tmp$Microglia),2)
e<-prop.table(table( tmp$olap,tmp$Excitatory),2)
i<-prop.table(table( tmp$olap,tmp$Inhibitory),2)
o<-prop.table(table( tmp$olap,tmp$Oligodendrocytes),2)
p<-prop.table(table( tmp$olap,tmp$OPCs),2)

df<-data.frame(ct=c("Ast","Mic","Exc","Inh","Oli","OPC"),count=c(a[2,2],m[2,2],e[2,2],i[2,2],o[2,2],p[2,2]))

a<-table( tmp$olap,tmp$Astrocytes)
m<-table( tmp$olap,tmp$Microglia)
e<-table( tmp$olap,tmp$Excitatory)
i<-table( tmp$olap,tmp$Inhibitory)
o<-table( tmp$olap,tmp$Oligodendrocytes)
p<-table( tmp$olap,tmp$OPCs)


df2<-cbind(a[,2],m[,2],e[,2],i[,2],o[,2],p[,2])
colnames(df2)<-c("Ast","Mic","Exc","Inh","Oli","OPC")
df2<-as.data.frame(df2)
df2<-t(df2)
frac<-paste0(df2[,2],"/",(df2[,1]+df2[,2]))

df$frac<-frac
pdf("~/scMultiomics_AD/Peak_olap_ADsnp.pdf")
ggplot(df, aes(y=ct, x=count*100, fill=ct))+geom_bar(stat="identity")+theme_classic()+ylab("")+xlab("% of Linked-Peaks Overlap AD SNPs")+scale_fill_manual(values=c("darkgoldenrod1","cornflowerblue","seagreen3","mediumorchid3","coral3","firebrick"))+theme(legend.position="none")+geom_text(aes(label=frac), nudge_x=0.05)+xlim(0,0.5)
dev.off()

S3 A,B)

gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$TSS<-ifelse(strand(gtf)=="+", start(gtf), end(gtf))
TSS.df<-data.frame(gene=gtf$gene_name, TSS=gtf$TSS)
tmp<-merge(c2, TSS.df, by="gene")
tmp$p1<-abs(tmp$start-tmp$TSS)
tmp$p2<-abs(tmp$end-tmp$TSS)
tmp$distLinkedGene<-ifelse(tmp$p1<tmp$p2, tmp$p1, tmp$p2)

pdf("~/scMultiomics_AD/Distance_LinkedTSS_byScore.pdf",width=4,height=4)
ggplot(tmp[which(tmp$score>0.2 & tmp$distLinkedGene<=500000),], aes(x=distLinkedGene, y=abs(score)))+geom_bin_2d(bins=100)+theme_classic()+xlab("Distance to Linked-Gene TSS")+ylab("Abs. Score")+scale_fill_continuous(type = "viridis")
dev.off()



pdf("~/scMultiomics_AD/Distance_LinkedTSS_byScore_boxplot.pdf",width=4,height=4)
ggplot(tmp[which(tmp$score>0.2 & tmp$distLinkedGene<=500000),], aes(y=distLinkedGene, x=bin))+geom_boxplot()+theme_classic()+ylab("Distance to Linked-Gene TSS")+xlab("Abs. Score")
dev.off()

## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 
## size
gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]
gtf$size<-width(gtf)
geneSize<-data.frame(gene=gtf$gene_name, size=gtf$size)
t<-as.data.frame(table(c2$gene))
mer<-merge(t, geneSize, by.x="Var1",by.y="gene")
mer$bin40<-ifelse(mer$Freq>=40, ">=40", 
                  ifelse(mer$Freq<=2, "<=2","other"))
t.test(mer[which(mer$bin40==">=40"),]$size, mer[which(mer$bin40=="<=2"),]$size)
t.test(mer[which(mer$bin40==">=40"),]$size, mer[which(mer$bin40=="other"),]$size)
t.test(mer[which(mer$bin40=="other"),]$size, mer[which(mer$bin40=="<=2"),]$size)
## expression
sumR2<-as.data.frame(sumR)
sumR2$Var1<-rownames(sumR2)
mer2<-merge(t, sumR2, by="Var1")
mer2$bin40<-ifelse(mer2$Freq>=40, ">=40", 
                  ifelse(mer2$Freq<=2, "<=2","other"))
t.test(mer2[which(mer2$bin40==">=40"),]$sumR, mer2[which(mer2$bin40=="<=2"),]$sumR)
t.test(mer2[which(mer2$bin40==">=40"),]$sumR, mer2[which(mer2$bin40=="other"),]$sumR)
t.test(mer2[which(mer2$bin40=="other"),]$sumR, mer2[which(mer2$bin40=="<=2"),]$sumR)

aggregate(sumR~bin40, mer2, quantile)
## avg score
score<-aggregate(abs(score)~gene, c2.2, mean)
mer3<-merge(t, score, by.x="Var1",by.y="gene")
mer3$bin40<-ifelse(mer3$Freq>=40, ">=40", 
                  ifelse(mer3$Freq<=2, "<=2","other"))
t.test(mer3[which(mer3$bin40==">=40"),]$`abs(score)`, mer3[which(mer3$bin40=="<=2"),]$`abs(score)`)
t.test(mer3[which(mer3$bin40==">=40"),]$`abs(score)`, mer3[which(mer3$bin40=="other"),]$`abs(score)`)
t.test(mer3[which(mer3$bin40=="other"),]$`abs(score)`, mer3[which(mer3$bin40=="<=2"),]$`abs(score)`)

## ## ## DON'T INCLUDE OTHER
pdf("~/scMultiomics_AD/Score_Expression_GeneSize_by_LinkPerGene.pdf", width=10, height=6)
p1<-ggplot(mer[which(mer$bin40!="other"),], aes(x=bin40, y=log10(size), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Gene Length)")+ stat_compare_means(method = "anova")+theme(legend.position="none")

p2<-ggplot(mer2[which(mer2$bin40!="other" & mer2$sumR<12622522),], aes(x=bin40, y=log10(sumR), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Total UMIs)")+ stat_compare_means(method = "t.test")+theme(legend.position="none")

p3<-ggplot(mer3[which(mer3$bin40!="other"),], aes(x=bin40, y=`abs(score)`, color=bin40))+geom_boxplot()+theme_classic()+ylab("Avg. Abs. Score")+ stat_compare_means(method = "t.test")+theme(legend.position="none")
p1+p2+p3
dev.off()


mer$bin40<-factor(mer$bin40, levels=c("<=2", "other",">=40"))
mer2$bin40<-factor(mer2$bin40, levels=c("<=2", "other",">=40"))
mer3$bin40<-factor(mer3$bin40, levels=c("<=2", "other",">=40"))
mycomp<-list(c("<=2", ">=40"), c("other",">=40"), c("<=2","other"))
pdf("~/scMultiomics_AD/Score_Expression_GeneSize_by_LinkPerGene.pdf", width=10, height=6)
p1<-ggplot(mer, aes(x=bin40, y=log10(size), color=bin40))+geom_boxplot()+stat_compare_means(comparisons=mycomp,method="t.test", label="p.signif")+theme_classic()+theme(legend.position="none")

p2<-ggplot(mer2[which(mer2$sumR<12622522),], aes(x=bin40, y=log10(sumR), color=bin40))+geom_boxplot()+theme_classic()+ylab("log10(Total UMIs)")+ stat_compare_means(comparisons=mycomp,method = "t.test",label="p.signif")+theme(legend.position="none")

p3<-ggplot(mer3, aes(x=bin40, y=`abs(score)`, color=bin40))+geom_boxplot()+theme_classic()+ylab("Avg. Abs. Score")+ stat_compare_means(comparisons=mycomp,method = "t.test",label="p.signif")+theme(legend.position="none")
p1+p2+p3
dev.off()

olap DEG

a10.gr<-c2.2[which( c2.2$group !="Ctrl"),]
c10.gr<-c2.2[which( c2.2$group !="AD"),]



t1<-as.data.frame(table(a10.gr$gene))
t2<-as.data.frame(table(c10.gr$gene))

jD<-merge(t1,t2,by="Var1")
colnames(jD)[2:3]<-c("AD","Ctrl")

degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
degs_up<-degs[which(degs$cat=="up"),]
degs_down<-degs[which(degs$cat=="down"),]
jD$deg<-ifelse(jD$Var1 %in% degs_up$gene, "up",
               ifelse(jD$Var1 %in% degs_down$gene,"down","NS"))

ad_related<-read.csv("~/AD_gwas_genes_012822.csv")
jD$label<-ifelse(jD$Var1 %in% c(ad_related$Gene, "MAPT"), jD$deg, "NS")## only keep degs that are know ad related
jD$size<-as.factor(ifelse(jD$label=="NS", 0, 1))
jD$alpha=as.factor(ifelse(jD$deg=="NS", 0,
                          ifelse(jD$label=="NS", 0.5,1)))


lmod<-lm(Ctrl~0+AD,jD)
jD$residuals<-lmod$residuals
jD$alpha<-as.factor(ifelse(jD$residuals>quantile(jD$residuals, probs=0.99) | jD$residuals<quantile(jD$residuals,probs=0.01),1,0 ))

jD$stresiduals<-rstandard(lmod)
jD2<-jD[which((jD$stresiduals>3| jD$residuals< -3)& jD$deg !="NS"),]


pdf("~/scMultiomics_AD/Peaks_per_gene_wDEGs_80Perm_StRes3.pdf", width=4, height=4)
ggplot(jD, aes(x=AD, y=Ctrl, color=deg))+geom_point(aes(color=deg, alpha=alpha),size=0.5)+ scale_color_manual(values=c("blue","grey50","red"))+xlab("##  of connected Peaks (AD)")+ylab("##  of connected Peaks (Ctrl)")+theme_classic()+theme(legend.position="none", axis.text=element_text(size=12), axis.title=element_text(size=14))+geom_text_repel(data=jD[which(jD$alpha==1&jD$deg!="NS"),], aes(x=AD,y=Ctrl,label=Var1), box.padding=1,point.padding=0,label.padding=1,segment.colour="black", size=3, min.segment.length=0, max.overlaps=60)+geom_abline(slope=1.092114,intercept=0)+geom_abline(slope=1.092114, intercept=24.33, linetype="dashed")+geom_abline(slope=1.092114, intercept=-19.9523, linetype="dashed")+scale_alpha_discrete(range=c(0.1,1))
dev.off()

+geom_text_repel(data=jD[which(jD$label!="NS"),], aes(x=AD,y=Ctrl,label=Var1), box.padding=1,point.padding=1,label.padding=1,segment.colour="black", size=4, min.segment.length=0, max.overlaps=20)+scale_size_discrete(range=c(0.15,1.6))+scale_alpha_discrete(range=c(0.2,1))



jD$residuals<-lmod$residuals
jD$fitted.values<-lmod$fitted.values
jD<-jD[order(jD$residuals),]
jD$Rank<-seq(1,nrow(jD))

pdf("~/scMultiomics_AD/Peaks_per_gene_Residuals.pdf")
ggplot(jD, aes(x=Rank, y=residuals, color=deg))+geom_jitter(width=6)+theme_classic()+scale_color_manual(values=c("blue","grey60","red"))+theme(legend.position="none")+facet_wrap(~deg)+geom_text_repel(data=jD[which(jD$residuals>30 | jD$residuals< -30),], aes(x=Rank, y=residuals, label=Var1),box.padding=1,point.padding=0.3,segment.colour="black", size=2, min.segment.length=0, max.overlaps=80)
dev.off()





p<-ggplot(jD, aes(x=AD, y=Ctrl, color=deg))+geom_point(aes(color=deg),size=0.5)+ scale_color_manual(values=c("blue","grey50","red"))+xlab("##  of connected Peaks (AD)")+ylab("##  of connected Peaks (Ctrl)")+theme_classic()+theme(legend.position="none", axis.text=element_text(size=12), axis.title=element_text(size=14))

saveWidget(as_widget(ggplotly(p)), file="plotly_Links_per_gene.html", selfcontained=T)

Revision

Gene body

gtf<-import("~/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="gene"),]

c2<-GRanges(c2)
over<-findOverlaps(c2, gtf)
c2$geneAnnot<-NA
c3<-c2[queryHits(over)]
c3$geneAnnot<-gtf[subjectHits(over)]$gene_name
c3$gene_body<-c3$gene==c3$geneAnnot
c3<-c3[order(c3$gene_body, decreasing==T),]
c3<-c3[!duplicated(c3$link),]
c4<-c(c2[-queryHits(over)], c3)

c4$gene_body<-ifelse(is.na(c4$gene_body)==T, F, c4$gene_body)
c4$gb_p<-ifelse(c4$gene_body==T | (c4$annotation=="Promoter"& c4$distanceToTSS<1000  ),T,F)
prop.table(table(c4$pos, c4$gb_p),1)

size<-data.frame(size=width(gtf), gene=gtf$gene_name)
c4<-merge(as.data.frame(c4), size, by="gene")






t<-as.data.frame(table(c4$gene, c4$gb_p))
colnames(t)[1:2]<-c("gene","gb_p")
t<-merge(t,size, by="gene")
t$long<-t$size>167585.1 #85th percentile
t$bin40<-ifelse(t$Freq>=40, ">=40", 
                  ifelse(t$Freq<=2, "<=2","other"))
t2<-t[which(t$gb_p==F),]
aggregate(Freq~long, t2, quantile)
t.test(t2[which(t2$bin40==">=40"),]$size, t2[which(t2$bin40=="<=2"),]$size)
t.test(t2[which(t2$long==T),]$Freq, t2[which(t2$long==F),]$Freq)


t2<-as.data.frame(table(c4$gene))
colnames(t2)<-c("gene","total")
t2<-merge(t2,size, by="gene")
t2$long<-t2$size>167585.1
t2$bin40<-ifelse(t2$total>=40, ">=40", 
                  ifelse(t2$total<=2, "<=2","other"))

t.test(t2[which(t2$bin40==">=40"),]$size, t2[which(t2$bin40=="<=2"),]$size)
t.test(t2[which(t2$long==T),]$total, t2[which(t2$long==F),]$total)

including gene body links, long genes have a median of 22 links per gene.

DAPs

subs<-SplitObject(data,split.by="predicted.id")

counts<-list()
meta<-list()
for (i in 1:6){
  subs[[i]]<-FindMultiModalNeighbors(subs[[i]], reduction.list=list("harmony.rna","harmony.atac"),dims.list=list(1:30,2:50))
  subs[[i]]<-FindClusters(subs[[i]], resolution=0.8, graph.name="wknn")
  c<-c()
  for (j in unique(subs[[i]]$seurat_clusters)){
    c<-cbind(c, rowSums(subs[[i]]@assays$CTpeaks@counts[,which(subs[[i]]$seurat_clusters==j & subs[[i]]$Status=="AD")]) )
    c<-cbind(c, rowSums(subs[[i]]@assays$CTpeaks@counts[,which(subs[[i]]$seurat_clusters==j & subs[[i]]$Status=="Ctrl")]) )
  }
  df<-data.frame(Status=rep(c("AD","Ctrl"), length(unique(subs[[i]]$seurat_clusters))), Cluster=rep(unique(subs[[i]]$seurat_clusters), each=2))
  rownames(df)<-paste0(df$Status,"-", df$Cluster)
  colnames(c)<-paste0(df$Status,"-", df$Cluster)
  meta[[i]]<-df
  counts[[i]]<-c
}


all<-c()
for (i in 1:6){
  dds<-DESeqDataSetFromMatrix(countData=counts[[i]],
    colData=meta[[i]], design=~Status)
  dds<-DESeq(dds)
  res2 <- results(dds, contrast=c("Status","AD","Ctrl"))
  res2$celltype<-subs[[i]]$predicted.id[1]
  res2$gene<-rownames(res2)
  all<-c(all,res2)
}

all<-lapply(all, as.data.frame)
ALL<-rbind(all[[1]],all[[2]],all[[3]],all[[4]],all[[5]],all[[6]])

ALL_sig<-ALL[which(ALL$padj<0.05 & abs(ALL$log2FoldChange)>0.5),]

Overlap DAPs

c2$peak<-paste0(seqnames(c2),"-", start(c2),"-", end(c2))
c2_DAP<-merge(c2, ALL_sig, by.x="peak",by.y="gene", all.x=T)
bin<-c()
for (i in 1:nrow(c2_DAP)){
  bin<-c(bin, grepl(c2_DAP$celltype[i], c2_DAP$CT[i]))
}
bin<-ifelse(is.na(bin)==T,F,T)

c2_DAP$isDAP<-bin
actualDAPs<-unique(c2_DAP[which(c2_DAP$isDAP==T),]$peak)
ALL_DAPs<-ALL_sig[which(ALL_sig$gene %in% actualDAPs),]
#NOTE: this will only include DAPs that agree on CT 
write.csv(ALL_DAPs, "~/scMultiomics_AD/DAPs_metacell_ADCtrl_DESeq2_CTmatch.csv")


degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_MAST_ADCtrl_AgeSex.csv")
c2_dd<-merge(c2_DAP, degs, by="gene", all.x=T)
c2_dd$dd<-c2_dd$celltype.y==c2_dd$celltype.x

length(unique(c2_dd[which(c2_dd$dd==T),]$peak)) #1215 /8588
length(unique(c2_dd[which(c2_dd$dd==T),]$gene)) #447 /911

dd<-c2_dd[which(c2_dd$dd==T),]

447/911 DEGs are linked to a DAP in the same cell type. And vice versa 1215/8588 DAPs 976/1310 DAP-DEG links agree on log2FC direction

plot

gene="PSEN1"
CT="Oligodendrocytes"
tmp.gr<-signac.gr[which(signac.gr$gene==gene),]
tmp.gr<-tmp.gr[which(tmp.gr$Oligodendrocytes==T ),]

min.cutoff<-min(tmp.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(tmp.gr$score)*2

region<-GRanges(paste0(seqnames(tmp.gr)[1],":",min(start(tmp.gr))-4000,"-",max(end(tmp.gr)+4000)))

Idents(subs$Oligodendrocytes)<-subs$Oligodendrocytes$Status
DefaultAssay(subs$Oligodendrocytes)<-"CTpeaks"
cov_plot<-CoveragePlot(object=subs$Oligodendrocytes, region=region,  assay="CTpeaks",annotation=F, idents=c("AD","Ctrl"),window=800,peaks=F, links=F)&
  scale_fill_manual(values=c("red", "blue"))
#
Links(data)<-tmp.gr ## full
link_plotA <- LinkPlot.height(tmp.gr, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
#
dap_plot<-Bed_PeakPlot(GRanges(c2_DAP[which(c2_DAP$isDAP==T & (c2_DAP$celltype=="Oligodendrocytes") ),]), region)
#
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)


p<-CombineTracks(plotlist=list(cov_plot,link_plotA,dap_plot,gene_plot), heights=c(4,2, 1,2))
pdf(paste0("~/scMultiomics_AD/",gene,"_linkPlot_w_DAP_",CT,".pdf"), width=8, height=5)
p
dev.off()

Cell type specific genes and peaks

DefaultAssay(data)<-"RNA"
data<-NormalizeData(data)
rs<-rowSums(data@assays$RNA@data)
q<-quantile(rs, probs=0.6)
avgExp<-as.data.frame(AverageExpression(data, assay="RNA", group.by="predicted.id", features=names(rs[which(rs>q)])))

avgExp<-as.matrix(avgExp)
num<-as.data.frame(apply(avgExp, 1, function(i) sum(i>0.5)))
count<-as.data.frame(table(c2$gene))
num$Var1<-rownames(num)
colnames(num)[1]<-"numCT"

mer<-merge(num, count, by="Var1")
mer$spec<-ifelse(mer$numCT<2, T, F)
t.test(mer[which(mer$spec==T),]$Freq, mer[which(mer$spec==F),]$Freq)

c2.mer<-merge(c2, num, by.x="gene",by.y="Var1")
prop.table(table(c2.mer$numCT, c2.mer$ATAC_num), 2)



####################################
degs<-read.csv("~/scMultiomics_AD/DEGs/DEGs_celltype_markers_MAST_noFilter.csv")
degs<-degs[which(degs$p_val_adj<0.05 & degs$avg_log2FC>0.2 & degs$cluster !="Endothelial" & degs$cluster!="Pericytes"),]
c2.mer<-merge(c2, degs, by="gene", all=T)
sum(c2.mer$cluster == c2.mer$CT, na.rm=T)
c2.sub<-c2.mer[which(c2.mer$ATAC_num==1 & is.na(c2.mer$cluster)==F & c2.mer$CT !="Pericytes" & c2.mer$CT !="Endothelial"),]
table(c2.sub[which(c2.sub$CT != c2.sub$cluster),]$cluster)
table(c2.sub[which(c2.sub$CT != c2.sub$cluster),]$CT)
c2.sub$mis<-c2.sub$CT == c2.sub$cluster

count$CTdeg<-ifelse(count$Var1 %in% degs$gene, T,F)
aggregate(Freq~CTdeg, count, mean)



bin<-c()
for (i in 1:nrow(c2.mer)){
  b<-grepl(c2.mer$cluster[i], c2.mer$CT[i])
  bin<-c(bin,b)
}





rs<-as.data.frame(rowSums(data@assays$RNA@counts))
rs$gene<-rownames(rs)
colnames(rs)[1]<-"RNA"
rs$DEG<-rs$gene %in% degs$gene
rs$link<-rs$gene %in% c2$gene
rs2<-rs[which(rs$RNA> quantile(rs$RNA, probs=0.6)),]
fisher.test(table(rs2$DEG, rs2$link))

GO

dbs <- c("GO_Molecular_Function_2021", "GO_Cellular_Component_2021", "GO_Biological_Process_2021", "KEGG_2021_Human","Azimuth_Cell_Types_2021")

go1<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Excitatory"),]$gene), dbs)
go2<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Inhibitory"),]$gene), dbs)
go3<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Astrocytes"),]$gene), dbs)
go4<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Microglia"),]$gene), dbs)
go5<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="Oligodendrocytes"),]$gene), dbs)
go6<-enrichr(unique(c2[which(c2$group=="common" & c2$CT=="OPCs"),]$gene), dbs)

go1<-go1[["GO_Biological_Process_2021"]]
go2<-go2[["GO_Biological_Process_2021"]]
go3<-go3[["GO_Biological_Process_2021"]]
go4<-go4[["GO_Biological_Process_2021"]]
go5<-go5[["GO_Biological_Process_2021"]]
go6<-go6[["GO_Biological_Process_2021"]]


Terms<-unique(c(go1[which(go1$Adjusted.P.value<0.01),][1:3,]$Term, go2[which(go2$Adjusted.P.value<0.01),][1:3,]$Term, go3[which(go3$Adjusted.P.value<0.01),][1,]$Term, go4[which(go4$Adjusted.P.value<0.01),][1:3,]$Term, go5[which(go5$Adjusted.P.value<0.01),][1:3,]$Term, go6[which(go6$Adjusted.P.value<0.01),][1:3,]$Term))


#######################################
go1<-go1[order(go1$Term),]
go2<-go2[order(go2$Term),]
go3<-go3[order(go3$Term),]
go4<-go4[order(go4$Term),]
go5<-go5[order(go5$Term),]
go6<-go6[order(go6$Term),]

go<-cbind(go1[which(go1$Term %in% Terms),]$Adjusted.P.value, go2[which(go2$Term %in% Terms),]$Adjusted.P.value, go3[which(go3$Term %in% Terms),]$Adjusted.P.value, go4[which(go4$Term %in% Terms),]$Adjusted.P.value, go5[which(go5$Term %in% Terms),]$Adjusted.P.value, go6[which(go6$Term %in% Terms),]$Adjusted.P.value)
rownames(go)<-unique(go1[which(go1$Term %in% Terms),]$Term)
go<-as.matrix(go)
go<- -log10(go)
colnames(go)<-c("Exc","Inh","Ast","Mic","Oli","OPC")


ha<-HeatmapAnnotation(celltype=colnames(go), col= list(celltype=c("Ast"="darkgoldenrod1","Exc"="cornflowerblue","Inh"="seagreen3","Mic"="mediumorchid3","Oli"="coral3","OPC"="firebrick")), show_legend=F)
ht2=Heatmap(go, cluster_rows=T,show_row_dend=F,cluster_columns=T,col=colorRamp2(c(0,3,6),brewer.pal(n = 3, name = "YlOrRd")),  name="-log10(Adj P)",  show_column_names=T, show_row_names=T, column_title=NULL,row_names_gp = gpar(fontsize = 10), top_annotation=ha, row_names_side = "left", row_names_max_width=max_text_width(
        rownames(go), 
        gp = gpar(fontsize = 12)
    ))


pdf("~/scMultiomics_AD/CellType-specific_linked_genes_GO.pdf", width=8, height=5)
ht2
dev.off()

BIN1 snps

Nott: rs6733839

signac_form<-as.data.frame(c2)[,-c(1,2,3)]
colnames(signac_form)[c(19,20,21)]<-c("seqnames","start","end")
signac_form$seqnames<-paste0("chr",signac_form$seqnames)
signac_form$score<-ifelse(signac_form$group=="common", (signac_form$score.x+ signac_form$score.y)/2, 
                          ifelse(signac_form$group=="Ctrl",signac_form$score.y, signac_form$score.x))
signac.gr<-GRanges(signac_form)
signac.gr$group2<-ifelse(signac.gr$group=="AD",0,
                         ifelse(signac.gr$group=="common",1,2))


snps<-read.table("~/mac3_mac1_EPACTS.txt", header=F)
colnames(snps)[1:3]<-c("chr","start","end")
snps$chr<-paste0("chr",snps$chr)
snps.gr<-GRanges(snps)
snps.gr$P<-snps.gr$V5




gene="BIN1"
tmp.gr<-signac.gr[which(signac.gr$gene==gene),]
tmp.gr<-tmp.gr[which(tmp.gr$Microglia==T ),]

min.cutoff<-min(tmp.gr$score)*2
min.cutoff<-ifelse(min.cutoff<0, min.cutoff, 0)
max.cutoff<-max(tmp.gr$score)*2

region<-GRanges("chr2:127077002-127145234")

Idents(subs$Microglia)<-subs$Microglia$Status
DefaultAssay(subs$Microglia)<-"CTpeaks"
cov_plot<-CoveragePlot(object=subs$Microglia, region=region,  assay="CTpeaks",annotation=F, idents=c("AD","Ctrl"),window=500,peaks=F, links=F)&
  scale_fill_manual(values=c("red", "blue"))
#
Links(data)<-tmp.gr ## full
link_plotA <- LinkPlot.height(tmp.gr, region=region, min.cutoff=min.cutoff, max.cutoff=max.cutoff)+ylab("Score")
#
snp<-"rs6733839"
snps.gr$highlight<-ifelse(snps.gr$V4==snp, T, F)
bgp<-Bed_GWASPlot(snps.gr[which(snps.gr$highlight==T),], region)+theme(legend.position="none")
#
DefaultAssay(data)<-"ATAC"
gene_plot<-AnnotationPlot(data, region=region)


p<-CombineTracks(plotlist=list(cov_plot,link_plotA, bgp,gene_plot), heights=c(4,2, 1,2))
pdf(paste0("~/scMultiomics_AD/",gene,"_linkPlot_w_snp.pdf"), width=8, height=8)
p
dev.off()

permutations

### create sample lists
args = commandArgs(trailingOnly=TRUE)
i=args[1]

print(i)
set.seed(i)
df<-read.csv("~/scMultiomics_AD/LinkPerm/ALL_cells.csv")
id<-unique(df$Cluster)
ctrl<-c("HCT17HEX","NT1261", "1230", "NT1271", "1238", "3586", "HCTZZT", "1224")

sub<-sample(id, 7)
if (sum(sub %in% ctrl)==8 | sum(!(sub %in% ctrl))==7 ) {
  sub<-sample(id,7)
}
df_test<-df[which(df$id %in% sub),]
df_test2<-df[!(df$id %in% sub),]
write.csv(df_test, paste0("~/scMultiomics_AD/LinkPerm/AD.barcodes.",i,".csv"),quote=F, row.names=F )
write.csv(df_test, paste0("~/scMultiomics_AD/LinkPerm/Ctrl.barcodes.",i,".csv"),quote=F, row.names=F )


##Call Links with cellranger arc

##bring perms in and reformat
files=as.list(readLines("~/scMultiomics_AD/LinkPerm/random7/Linkages.txt"))

FT_list=lapply(files,function(x){
  FT=import(con=x,format="bedpe")
    })

format<-function(x){
a10<-as.data.frame(x)
a10<-a10[which(a10$NA.2!="peak-peak"),]
a10$seqnames<-paste0("chr",ifelse(a10$NA.2=="peak-gene",a10[,1], a10[,6]),sep="")
a10$start<-ifelse(a10$NA.2=="peak-gene",a10[,2], a10[,7])
a10$end<-ifelse(a10$NA.2=="peak-gene",a10[,3], a10[,8])
a<-matrix(unlist(strsplit(a10$name,"><")),ncol=2,byrow=T)
gene<-ifelse(a10$NA.2=="peak-gene",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
peak<-ifelse(a10$NA.2=="gene-peak",noquote(gsub(">","",a[,2])),noquote(gsub("<","",a[,1])))
a10$gene<-gene
a10$peak<-peak
a10$Peak_pos<-paste0(a10$peak.seqnames,"-",a10$peak.start-1,"-", a10$peak.end)
a10<-a10[which(abs(a10$score)>0.2),]
a10.gr<-GRanges(a10_ct)
a10.gr$link<-paste0(a10.gr$gene,"-",a10.gr$index)
return(a10.gr)
}

#format 
FL_format<-lapply(FT_list, format)



## distribution of group-specific
ll<-data.frame(i=1, j=1, AD=0,common=0,Ctrl=0)

for (i in 1:50){
  for (j in 1:50){
    if (i !=j){
      AD<-as.data.frame(FL_format[[i]])
      Ctrl<-as.data.frame(FL_format[[j]])
      comp<-merge(AD,Ctrl, by=c("seqnames","start","end","gene"), all=T)
      comp$group<-ifelse(is.na(comp$score.x)==T, "Ctrl",
                     ifelse(is.na(comp$score.y)==T,"AD","common"))
      t<-table(comp$group)
      ll.tmp<-data.frame(i=i, j=j, AD=t[1],common=t[2],Ctrl=t[3])
      ll<-rbind(ll, ll.tmp)
 } }}

ll<-ll[-1,]
ll$propSpec<-(ll[,3]+ll[,5])/(ll[,3]+ll[,4]+ll[,5])


pdf("~/scMultiomics_AD/LinkPerm_propSpec_allCombinations_221118.pdf", width=5,height=4)
ggplot(ll, aes(x=propSpec))+geom_histogram(bins=50)+theme_classic()+geom_vline(xintercept=0.359041, color="red", )+xlab("Proportion group-specific")
dev.off()



## number that are really AD-spec
ALL_0.05<-do.call("c", FL_format)

 table(c2[!(c2$link %in% ALL_0.05$link),]$group)

LIT-SEARCH Validation

Nott PLACseq

https://www-science-org.ezproxy3.lhl.uab.edu/doi/full/10.1126/science.aay0793

neu

neu<-read.table("~/NOTT_hg38_neu_interactome.bed")

colnames(neu)<-neu[1,]
neu<-neu[-1,]
neu.1<-makeGRangesFromDataFrame(neu, keep.extra.columns=T)
peak2<-neu[,-c(1,2,3)]
neu.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")
tmp<-ifelse(c2$signac.start>start(c2) & c2$signac.start<end(c2),"peak-gene","gene-peak")
start<-ifelse(tmp=="peak-gene", c2$signac.end-100, c2$signac.start-100)
end<-ifelse(tmp=="peak-gene", c2$signac.end+1000, c2$signac.start+1000)
genePos<-GRanges(data.frame(seqnames=paste0("chr",c2$signac.seqnames), start=start, end=end))
genePos$gene<-c2$gene

## olap placseq peak1
links<-Pairs(c2, genePos)
plac<-Pairs(neu.1,neu.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene

## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene

## 
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)

c2$PLACseq_neun<-ifelse(c2$link %in% link_keep, T,F)

microglia

mic.1<-GRanges(mic)
peak2<-mic[,-c(1,2,3)]
mic.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")



plac<-Pairs(mic.1,mic.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene

## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene

## 
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)

c2$PLACseq_mic<-ifelse(c2$link %in% link_keep, T,F)

olig

olig<-read.table("~/NOTT_hg38_olig_interactome.bed")

colnames(olig)<-olig[1,]
olig<-olig[-1,]
olig.1<-makeGRangesFromDataFrame(olig, keep.extra.columns=T)
peak2<-olig[,-c(16,17,18)]
olig.2<-makeGRangesFromDataFrame(peak2, keep.extra.columns=T, seqnames.field="chr2",start.field="start2",end.field="end2")



plac<-Pairs(olig.1,olig.2)
## pairs doesn't put them in order so second is always gene. Do peak-gene first
olap1<-findOverlaps(first(links),first(plac))
links.2<-links[queryHits(olap1)]
plac.2<-plac[subjectHits(olap1)]
olap2<-findOverlaps(second(links.2),second(plac.2))
links.f<-links.2[queryHits(olap2)]
links.f<-unique(links.f)
mcols(links.f)$index<-first(links.f)$index
mcols(links.f)$gene<-second(links.f)$gene

## gene-peak
Bolap1<-findOverlaps(second(links),first(plac)) ## do genes overlap placseq peak1
Blinks.2<-links[queryHits(Bolap1)]
Bplac.2<-plac[subjectHits(Bolap1)]
Bolap2<-findOverlaps(first(Blinks.2),second(Bplac.2)) ## do peaks olap plac peak2
Blinks.f<-Blinks.2[queryHits(Bolap2)]
Blinks.f<-unique(Blinks.f)
mcols(Blinks.f)$index<-first(Blinks.f)$index
mcols(Blinks.f)$gene<-second(Blinks.f)$gene

## 
all<-c(links.f, Blinks.f)
link_keep<-paste0(mcols(all)$gene,"-",mcols(all)$index)

c2$PLACseq_olig<-ifelse(c2$link %in% link_keep, T,F)

combine

c2$PLACseq<-ifelse(c2$PLACseq_neun==T | c2$PLACseq_mic==T | c2$PLACseq_olig==T,T,F)

Kampmann MPRA

mpra<-read.csv("~/lit_search_validation/Full_Cooper_MPRA_sumStats.csv")
mpra$start<-mpra$pos
mpra$end<-mpra$start+1
mpra<-GRanges(mpra)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
mpra<-liftOver(mpra, ch)
mpra<-unlist(mpra)  



olap<-findOverlaps(unique(c2), mpra)
c2.2<-c2[queryHits(olap)]
c2.2$mpra_q<-mpra[subjectHits(olap)]$q
c2.2$mpra_sig<-ifelse(c2.2$mpra_q<0.01,1,0) ## their cutoff

mpra$sig<-ifelse(mpra$q<0.01,1,0)
mpra$link<-F
mpra[subjectHits(olap)]$link<-T
t<-table(mpra$sig,mpra$link)
p<-prop.table(t,1)
p[2,2]/p[1,2] ## fold enrichment for active mpra 1.48
chisq.test(t) ## pval 0.022
a=37



olap<-findOverlaps(all[!(all$peak %in% c2$peak)],mpra)
mpra$link<-F
mpra[subjectHits(olap)]$link<-T
t<-table(mpra$sig,mpra$link)
chisq.test(t) ## p-value < 2.2e-16
## 94 peak have overlap
b=31

mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)

95 percent confidence interval: 1.459384 4.029969 sample estimates: odds ratio 2.417484

crispr validated

crispr<-read.csv("~/lit_search_validation/Cooper_CRISPR_validated.csv")
mp<-as.data.frame(mpra)[,c(1,2,3,7)]
crispr.pos<-merge(crispr,mp,by.y="rsID",by.x="snp") ## get liftover positions from mpra grange
## add crispr-ex
crispr.pos$method<-"CRISPRi"
tmp<-crispr.pos[which(crispr.pos$snp %in% c("rs36026988","rs13025717","rs9271171","rs10224310")),]
tmp<-tmp[which(tmp$cell_type=="Microglia"),]
tmp<-tmp[!duplicated(tmp$snp_target),]
tmp$sig<-T
tmp$method<-"CRIPSR-ex"
crispr.pos<-rbind(crispr.pos,tmp)

cm<-GRanges(crispr.pos[which(crispr.pos$cell_type=="Microglia"),])
cn<-GRanges(crispr.pos[which(crispr.pos$cell_type=="Neuron"),])

olap<-findOverlaps(c2,cm)
c2.mic<-c2[queryHits(olap)]
c2.mic$crispr.gene<-cm[subjectHits(olap)]$gene
c2.mic$crispr.log2FC<-cm[subjectHits(olap)]$log2FC
c2.mic$crispr.sig<-cm[subjectHits(olap)]$sig
c2.mic$crispr.method<-cm[subjectHits(olap)]$method
c2.mic<-c2.mic[which(c2.mic$gene==c2.mic$crispr.gene),]
c2.mic<-c2.mic[which(c2.mic$Microglia==T),]

cn$index<-seq(1,length(cn))
## Doesn't have CLU/ other snp selected ones
toTest<-readRDS("~/22_restart/Links_toTest.rds")
olap<-findOverlaps(c2,cn)
c2.neu<-c2[queryHits(olap)]
c2.neu$crispr.gene<-cn[subjectHits(olap)]$gene
c2.neu$crispr.log2FC<-cn[subjectHits(olap)]$log2FC
c2.neu$crispr.sig<-cn[subjectHits(olap)]$sig
c2.neu<-c2.neu[which(c2.neu$gene==c2.neu$crispr.gene),]
c2.neu<-c2.neu[which(c2.neu$Inhibitory==T | c2.neu$Excitatory==T),]


oops<-subsetByOverlaps(c2.neu,toTest)
oops2<-subsetByOverlaps(toTest,c2.neu)


library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
snps<-SNPlocs.Hsapiens.dbSNP151.GRCh38
loc<-snpsById(snps,crispr$snp, ifnotfound="drop")
loc<-merge(as.data.frame(loc),crispr, by.x="RefSNP_id", by.y="snp")
loc$seqnames<-paste0("chr",loc$seqnames)
loc$start<-loc$pos
loc$end<-loc$start+1
loc<-GRanges(loc)

over<-findOverlaps(c2, loc)
loc<-loc[subjectHits(over)]
loc$linked_gene<-c2[queryHits(over)]$gene
s<-loc[which(loc$gene==loc$linked_gene),]
s[!duplicated(s$snp_target),]

overlaps regions on our test list chr17 45893937-45897413 MAPT T chr17 46224752-46225787 MAPT F chr17 46224752-46225787 KANSL1 T

no CR1 no SPI1 all FNBP4 MS4A4E first linked to STX3 2nd MS4A4E there no RIN3 KNOP1 linkes to CCP110 and GPRC5B GPRC5B snp slightly outside peak no PLEKHM1 all MAPT all BIN1 APOC1 snp linked to CLASRP all KANSL1 c4A onelinked to GPSM3" “HLA-DMA” “HLA-DMB” “HLA-DOA” “HLA-DPA1” “HLA-DPB1”HLA-DQA1" “HLA-DQB1” “HLA-DRA” “HLA-DRB1” “HLA-DRB5” no epha1 all CLU

Weiss MPRAs

NPC

all<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
all<-GRanges(all)
 all$peak<-paste0(start(all),"-",end(all))

 NPC<-read.csv("~/lit_search_validation/Weiss_NPC_MPRA.csv")
NPC<-NPC[,4:13]
colnames(NPC)[c(6,7)]<-c("start","end")
NPC<-NPC[is.na(NPC$start)==F,]
NPC<-GRanges(NPC)

olap<-findOverlaps(unique(c2),NPC)
NPC$sig<-NPC$Active...NPC
NPC$link<-F
NPC[subjectHits(olap)]$link<-T
t<-table(NPC$sig,NPC$link)
p<-prop.table(t,1)
p[3,2]/p[1,2] ## fold enrichment for active mpra 2.6
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 95 links overlap active
a=95

olap<-findOverlaps(all[!(all$peak %in% c2$peak)],NPC)
NPC$sig<-NPC$Active...NPC
NPC$link<-F
NPC[subjectHits(olap)]$link<-T
t<-table(NPC$sig,NPC$link)
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 94 peak have overlap
b=94


mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)

95 percent confidence interval: 1.424516 2.547133 sample estimates: odds ratio 1.905552

ESC

ESC<-read.csv("~/lit_search_validation/Weiss_ESC_MPRA.csv")
ESC<-ESC[,4:13]
colnames(ESC)[c(6,7)]<-c("start","end")
ESC<-ESC[is.na(ESC$start)==F,]
ESC<-GRanges(ESC)

olap<-findOverlaps(unique(c2),ESC)
ESC$sig<-ESC$Active...ESC
ESC$link<-F
ESC[subjectHits(olap)]$link<-T
t<-table(ESC$sig,ESC$link)
p<-prop.table(t,1)
p[3,2]/p[1,2] ## fold enrichment for active mpra 1.77
chisq.test(t[-2,]) ## p-value < 5.36e-10
## 95 links overlap active
a=127

olap<-findOverlaps(all[!(all$peak %in% c2$peak)],ESC)
ESC$sig<-ESC$Active...ESC
ESC$link<-F
ESC[subjectHits(olap)]$link<-T
t<-table(ESC$sig,ESC$link)
chisq.test(t[-2,]) ## p-value < 2.2e-16
## 94 peak have overlap
b=101

mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)

95 percent confidence interval: 1.946489 3.342428 sample estimates: odds ratio 2.547977

Uebbing NPC

uebbing<-read.csv("~/lit_search_validation/Uebbing_NPC_MPRA.csv")
uebbing<-uebbing[,1:20]
uebbing<-GRanges(uebbing)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
uebbing38<-liftOver(uebbing, ch)
uebbing<-unlist(uebbing38)  ## lost 7

olap<-findOverlaps(unique(c2),uebbing)
uebbing$sig<-uebbing$Active
uebbing$link<-F
uebbing[subjectHits(olap)]$link<-T
t<-table(uebbing$sig,uebbing$link)
p<-prop.table(t,1)
p[2,2]/p[1,2] ## fold enrichment for active mpra 1.6
chisq.test(t) ## p-value < 2.2e-16
## 95 links overlap active
a=668

olap<-findOverlaps(all[!(all$peak %in% c2$peak)],uebbing)
uebbing$link<-F
uebbing[subjectHits(olap)]$link<-T
t<-table(uebbing$sig,uebbing$link)
chisq.test(t) ## p-value < 2.2e-16
## 94 peak have overlap
b=777

mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)

95 percent confidence interval: 1.570836 1.938006 sample estimates: odds ratio 1.745004

SuRE Hepg2/ K562

sure<-read.table("~/SuRE_SNP_table_181029.txt", sep="\t")
colnames(sure)<-sure[1,]
sure<-sure[-1,]
colnames(sure)[3]<-"start"
sure$end<-as.numeric(sure$start)+1
sure<-GRanges(sure)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
sure38<-liftOver(sure, ch)
sure<-unlist(sure38)  ## lost 7
## sure$sig<-sure$k562.wilcox.p.value <0.006192715 | sure$hepg2.wilcox.p.value <0.00173121
sure$sig<-sure$k562.wilcox.p.value <0.01 
sure.sig<-sure[which(sure$sig==T),]

olap<-findOverlaps(unique(c2),sure.sig)
length(unique(queryHits(olap))) ## 43253
c2.olap<-c2[queryHits(olap)]
c2.olap$snp<-sure.sig[subjectHits(olap)]$SNP_ID
a=11083

olap<-findOverlaps(all[!(all$peak %in% c2$peak)],sure.sig)
length(unique(queryHits(olap)))
b=12195

mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)


##  p-value < 2.2e-16
##  alternative hypothesis: true odds ratio is not equal to 1
##  95 percent confidence interval:
##   3.132566 3.328924
##  sample estimates:
##  odds ratio 
##    3.229191 



sure$sig<-sure$hepg2.wilcox.p.value <0.01
sure.sig<-sure[which(sure$sig==T),]

olap<-findOverlaps(unique(c2),sure.sig)
length(unique(queryHits(olap))) ## 43253
c2.olap<-c2[queryHits(olap)]
c2.olap$snp<-sure.sig[subjectHits(olap)]$SNP_ID
a=6713

olap<-findOverlaps(all[!(all$peak %in% c2$peak)],sure.sig)
length(unique(queryHits(olap)))
b=7399

mat<-matrix(data=c(length(all[!(all$peak %in% c2$peak),])-b, length(unique(c2))-a,b, a), nrow=2)
fisher.test(mat)


##  95 percent confidence interval:
##   1.821630 1.949439
##  sample estimates:
##  odds ratio 
##    1.884453 

95 percent confidence interval: 5.825352 6.104481 sample estimates: odds ratio 5.963298

Fig5B) barplot

df<-data.frame(source=c("Bryois_eQTL_2022","Cooper_HEK_MPRA_2022","Weiss_NPC_MPRA_2021","Weiss_ESC_MPRA_2021","Uebbing_NPC_MPRA_2020","GTEx_Brain_FC","SuRE_HepG2","SuRE_K562"), OR=c(3.40,2.42,1.91,2.54,1.74,3.53, 1.88,3.23), ymin=c(3.14,1.46,1.43,1.95,1.57,3.0,1.82,3.13), ymax=c(3.69,4.03,2.55,3.34,1.94,4.10,1.95,3.33))



pdf("~/scMultiomics_AD/lit_search_validation_OR.pdf", width=6, height=4)
ggplot(df[!(df$source %in% c("Bryois_eQTL_2022","GTEx_Brain_FC")),])+geom_bar(stat="identity",aes(y=source,x=OR), fill="slategray3")+geom_errorbar(aes(y=source,xmin=ymin,xmax=ymax))+theme_classic()+ylab("")+ggtitle("Enrichment in linked-peaks")
dev.off()

LINKED

background

peaks<-read.csv("~/scMultiomics_AD/CTpeaks_annotated.csv")
peaks<-GRanges(peaks)
gtf<-import("~/quantseq/STAR/genes.gtf")
gtf<-gtf[which(gtf$type=="start_codon"),]
gtf<-gtf[!duplicated(gtf$gene_name),]
 rs<-rowSums(data[["RNA"]])
gtf2<-gtf[which(gtf$gene_name %in% names(rs[which(rs>500)])),]
 
olap<-findOverlaps(peaks, gtf,maxgap=500000)
allPossible<-peaks[queryHits(olap)]
allPossible$gene<-gtf[subjectHits(olap)]$gene_name
allPossible$link<-paste0(seqnames(allPossible),":",start(allPossible),"-",allPossible$gene)

GTEx

 gtex<-read.table("~/lit_search_validation/Brain_Frontal_Cortex_BA9.v8.egenes.txt", sep="\t")
colnames(gtex)<-gtex[1,]
gtex<-gtex[-1,]
colnames(gtex)[15]<-"start"
gtex$end<-as.numeric(gtex$start)+1

gtex_fc<-GRanges(gtex[which(gtex$qval<0.05),])



c2$link2<-paste0(seqnames(c2),":",start(c2),"-",c2$gene)
olap<-findOverlaps(c2,gtex_fc)
c2_sub<-c2[queryHits(olap)]
c2_sub$test_gene<-gtex_fc[subjectHits(olap)]$gene_name
sum(c2_sub$gene==c2_sub$test_gene)
c2_sub$agree<-c2_sub$gene==c2_sub$test_gene
c2_sub<-c2_sub[order(c2_sub$agree, decreasing=T),]
c2_uni<-c2_sub[!duplicated(c2_sub$index),]
 prop.table(table(c2_uni$agree)) ## 32%
a=157

ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,gtex_fc)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-gtex_fc[subjectHits(olap)]$gene_name
sum(ap_sub$gene==ap_sub$test_gene)
b=347


mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)
## 95 percent confidence interval:
##  3.295097 4.850273
## sample estimates:
## odds ratio 
##   4.004342 

Bryois eQTL

qtl<-read.csv("~//lit_search_validation/eQTL_Bryois.csv")
pos<-read.csv("~//lit_search_validation/eQTL_Bryois_snpPos.csv") ## this is 5% FDR filtered
colnames(pos)[c(5,6)]<-c("seqnames","start")
pos$end<-pos$start+1
pos<-GRanges(pos)
seqlevels(pos)<-paste0("chr",seqlevels(pos))

olap<-findOverlaps(c2, pos)
c2_olap<-c2[queryHits(olap)]
c2_olap$snp<-pos[subjectHits(olap)]$SNP
c2_olap$symbol<-pos[subjectHits(olap)]$symbol
c2_olap$eQTL_celltype<-pos[subjectHits(olap)]$cell_type
agree<-c2_olap[which(c2_olap$gene==c2_olap$symbol),]

## how many multi-linked peaks do I lose in that step
c2_olap$agree<-c2_olap$gene==c2_olap$symbol
c2_olap<-c2_olap[order(-c2_olap$agree),]
t<-table(c2_olap$index, c2_olap$agree)
t<-data.frame(False=t[,1],True=t[,2])
nrow(t[which(t$True>0),])/nrow(t) ## of the peaks that overlap eQTLs. 50% are linked to the eQTL gene






library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
qtl<-read.csv("~/eQTL_Bryois.csv")
qtl<-qtl[,c(1,2,4,10)]
qtl<-qtl[grepl("rs",qtl$SNP),]
snps<-SNPlocs.Hsapiens.dbSNP151.GRCh38
loc<-snpsById(snps,qtl$SNP, ifnotfound="drop")
qtl.loc<-merge(qtl,as.data.frame(loc),by.y="RefSNP_id",by.x="SNP")
qtl.loc$seqnames<-paste0("chr",qtl.loc$seqnames)
colnames(qtl.loc)[6]<-"start"
qtl.loc$end<-qtl.loc$start+1
qtl<-GRanges(qtl.loc)
## ## ## 
qtl$sig<-ifelse(qtl$adj_p<0.05,T,F)
qtl.sig<-qtl[which(qtl$sig==T),]
qtl.sig$pair<-paste0(qtl.sig$SNP,"-", qtl.sig$symbol)
qtl.sig<-qtl.sig[!duplicated(qtl.sig$pair)]
olap<-findOverlaps(c2,qtl.sig)
c2_sub<-c2[queryHits(olap)]
c2_sub$test_gene<-qtl.sig[subjectHits(olap)]$symbol
c2_sub$pair<-qtl.sig[subjectHits(olap)]$pair
c2_sub$agree<-c2_sub$gene==c2_sub$test_gene
sum(c2_sub$gene==c2_sub$test_gene)
c2_sub<-c2_sub[order(c2_sub$agree, decreasing=T),]
c2_uni<-c2_sub[!duplicated(c2_sub$index),]
 prop.table(table(c2_uni$agree)) ## 57%
a=484

ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,qtl.sig)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-qtl.sig[subjectHits(olap)]$symbol
sum(ap_sub$gene==ap_sub$test_gene)
b=673


mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)


## reverse reverse
olap<-findOverlaps(c2,qtl.sig)
keep<-c2_sub[which(c2_sub$gene==c2_sub$test_gene),]$pair
qtl.sig$link<-qtl.sig$pair %in% keep
qtl.sig$olap<-F
qtl.sig[subjectHits(olap)]$olap<-T
t<-table(qtl.sig$olap, qtl.sig$link)

95 percent confidence interval: 1.724113 2.459824 sample estimates: odds ratio 2.064658

qtl<-read.csv("~/scMultiomics_AD/Link_overlap_Bryois_eQTLs.csv")


guides<-read.csv("~/feature_ref3.csv")

c2<-readRDS("~/scMultiomics_AD/AD_Ctrl_links_filt0.02_wPerm_wQval.rds")

guides<-unique(guides$id)
df<-data.frame(seqnames=sapply(strsplit(guides,"_"),`[`,1), start=sapply(strsplit(guides,"_"),`[`,2))
df$start<-as.numeric(df$start)
df$end<-df$start+1
df<-df[complete.cases(df),]
gr<-GRanges(df)
test <-subsetByOverlaps(GRanges(c2), gr)
olap<-findOverlaps(test, gr)
guide<-paste0(seqnames(gr[subjectHits(olap)]),"_",start(gr[subjectHits(olap)]),"_F")
## get guide that is in peak
atest<-test[queryHits(olap)]
atest$guide<-guide
atest$score<-ifelse(atest$group=="AD", atest$score.x, atest$score.y)
atest<-atest[order(abs(atest$score),decreasing=T),]
rna<-read.table("~/HEK/tpms.tsv", sep="\t", header=T)
rna<-rna[,c(1,2,75)] ## nucleus 3' HEK

k27<-import("~/HEK/H3k27ac_HEK.bb") ## it is hg38

over<-findOverlaps(atest, k27)
atest$HEK_k27olap<-"None"
atest[unique(queryHits(over))]$HEK_k27olap<-"Olap"
## didn't set all.x=T bc only lose pseudos (ie AC0..,AL...)
atest2<-merge(as.data.frame(atest),rna, by.x="gene", by.y="Gene.Name")
atest2<-GRanges(atest2)
table(unique(atest2)$HEK_k27olap)

good<-atest2[which(atest2$HEK_k27olap=="Olap" & atest2$nucleus..long.polyA.RNA..NHEK.cell.line>10),]


doubleGood<-good[which(good$link %in% agree$link),]

Geschwin HiC

hic<-read.xlsx("~/Hu_Geschwind_2021_HiC_NeuN.xlsx")

colnames(hic)<-hic[2,]
hic<-hic[-c(1,2),]
colnames(hic)[2:3]<-c("start","end")
hic<-GRanges(hic)
ch<-import.chain("~/liftover/hg19ToHg38.over.chain")
hic38<-liftOver(hic, ch)
hic38<-unlist(hic38) 


c2$link2<-paste0(seqnames(c2),":",start(c2),"-",c2$gene)
neun<-c2[which(c2$Excitatory==T | c2$Inhibitory==T),]
olap<-findOverlaps(neun,hic38)
neun_sub<-neun[queryHits(olap)]
neun_sub$test_gene<-hic38[subjectHits(olap)]$gene
sum(neun_sub$gene==neun_sub$test_gene)
neun_sub$agree<-neun_sub$gene==neun_sub$test_gene
neun_sub<-neun_sub[order(neun_sub$agree, decreasing=T),]
neun_uni<-neun_sub[!duplicated(neun_sub$index),]
 prop.table(table(neun_uni$agree)) ## 44%
a=11056

ap=allPossible[!(allPossible$link %in% c2$link2)]
olap<-findOverlaps(ap,hic38)
ap_sub<-ap[queryHits(olap)]
ap_sub$test_gene<-hic38[subjectHits(olap)]$gene
sum(ap_sub$gene==ap_sub$test_gene)
b=45508


mat<-matrix(data=c(length(ap)-b, length(c2)-a,b, a), nrow=2)
fisher.test(mat)



##  95 percent confidence interval:
##   2.345473 2.443410
##  sample estimates:
##  odds ratio 
##      2.3939 

Fig5B) PLOT

df<-data.frame(source=c("Bryois_eQTL_2022","GTEx_Brain_FC","Hu_NEUN_HiC"), OR=c(6.373,4.00,2.39), ymin=c(5.65,3.30,2.35), ymax=c(7.17,4.85,2.44), type=c(1,1,2))



pdf("~/scMultiomics_AD/lit_search_validation_eQTL_HiC.pdf", width=6, height=4)
ggplot(df)+geom_bar(stat="identity",aes(y=source,x=OR, fill=as.factor(type)))+geom_errorbar(aes(y=source,xmin=ymin,xmax=ymax))+theme_classic()+ylab("")+theme(legend.position="none")+ggtitle("Enrichment in Links")
dev.off()

overlaps

## atest from crispr.rmd
gtex_fc$gene<-gtex_fc$gene_name
gtex_fc$source<-"GTEx"
qtl.sig$gene<-qtl.sig$symbol
qtl.sig$source<-"Bryois"
hic38$source<-"HiC"
ahh<-c(gtex_fc, qtl.sig,hic38)

olap<-findOverlaps(atest, ahh)
atest2<-atest[queryHits(olap)]
atest2$source<-ahh[subjectHits(olap)]$source
atest2$test_gene<-ahh[subjectHits(olap)]$gene

atest2$agree<-atest2$gene==atest2$test_gene
atest2<-atest2[order(atest2$agree, decreasing=T),]
c2$MPRA<-c2$CooperMPRA==T | c2$Weiss_NPC==T | c2$Weiss_ESC==T | c2$Uebbing_MPRA==T | c2$SuRE_HePG2==T | c2$SuRE_K562==T
c2$eQTL<-c2$eQTL_Bryois==T | c2$GTEx_FC==T
df<-as.data.frame(c2)[,c(48,49,50)]

nrow(df[which(df$MPRA == T & df$eQTL == T & df$Geschwin_HiC == T),]) ##1
nrow(df[which(df$MPRA == T & df$eQTL == F & df$Geschwin_HiC == T),]) ##1541
nrow(df[which(df$MPRA == T & df$eQTL == F & df$Geschwin_HiC == F),]) ##58801
nrow(df[which(df$MPRA == T & df$eQTL == T & df$Geschwin_HiC == F),]) ##130
nrow(df[which(df$MPRA == F & df$eQTL == T & df$Geschwin_HiC == T),]) ##9
nrow(df[which(df$MPRA == F & df$eQTL == F & df$Geschwin_HiC == T),]) ##6582
nrow(df[which(df$MPRA == F & df$eQTL == T & df$Geschwin_HiC == F),]) ##477

Overlap Morabito

 mor<-read.table("~/Morabito_Sup4.txt")
colnames(mor)<-mor[1,]
mor<-mor[-1,]

mor$seqnames<-sapply(strsplit(mor$cCRE,"-"), `[`,1)
mor$start<-sapply(strsplit(mor$cCRE,"-"), `[`,2)
mor$end<-sapply(strsplit(mor$cCRE,"-"), `[`,3)
mor<-GRanges(mor)

olap<-findOverlaps(c2, mor)
c2.olap<-c2[queryHits(olap)]
c2.olap$morabito_gene<-mor[subjectHits(olap)]$target_gene
c2.olap$morabito_group<-mor[subjectHits(olap)]$group

agree<-c2.olap[which(c2.olap$gene ==c2.olap$morabito_gene),]

prop1<-length(agree[which(agree$CooperMPRA==T | agree$Weiss_NPC==T | agree$Weiss_ESC==T | agree$Uebbing_MPRA==T | agree$SuRE_K562==T | agree$SuRE_HePG2==T | agree$GTEx_FC==T | agree$eQTL_Bryois==T | agree$Geschwin_HiC==T)])


mat<-matrix(data=c(245775,65873,6589,1668),ncol=2)
fisher.test(t(mat))

Cooper MPRA 1 Weiss NPC 9 Weiss ESC 14 Uebbing MPRA 83 SuRE K562 984 SuRE HepG2 567 GTEx FC 6 Byois eQTL 15 HiC 241