How-to: build co-expression networks
Gene expression profiles that co-vary suggest co-regulation, co-function or co-localization. This pipeline involves the construction of individual gene co-expression networks, the aggregation of these networks, and then an assessment via neighbor-voting. Differences in bulk and single-cell expression data mean care needs to be taken when generating these networks. We provide guidelines for both.
Co-expression - bulk RNA-seq
Data
Read in expression data. This could be counts or normalized data. A minimum of 20 samples per experiment is recommended. And genes with expression in at least 80% of these samples, with at least 1 CPM.
load("counts.Rdata")
calc_cpm <- function(X){
K = colSums(X)
X.cpm = sapply(1:length(K), function(k) 10^6*X[,k]/K[k] )
return(X.cpm)
}
cpm = calc_cpm(counts)
nsamp = dim(counts)[2]
keep = rowSums(cpm >=1) > (0.8 * nsamp)
Building coexpression networks
net = EGAD::build_coexp_network(cpm, genes[keep])
nd = EGAD::node_degree(net)
EGAD::plot_distribution(nd, xlab="Node degrees")
Or
gene.corr = cor( t(cpm), method="s"))
bottom = row(gene.corr) > col(gene.corr)
gene.ranked = rank(gene.corr[bottom] , na.last = "keep", ties.method = "average")
gene.ranked = gene.ranked/max(gene.ranked)
net = gene.corr * 0
net[bottom] = gene.ranked
net = net + t(net)
diag(net) = 1
Aggregating
agg = diag(sum(keep))
for (i in 1:n_experiments) {
id = experiment_ids[i]
f.c = which(samples[,1] == id)
sub = exprs[keep,f.c]
net = build_coexp_network(sub, genes[keep])
med = median(net, na.rm=T)
net[is.na(net)] = med
# Save individual networks
# save(net, file=paste("coexp",i, "Rdata", sep=".") )
if(i==1 ) {
agg = net
} else {
agg = net + agg
}
}
agg.rank = matrix(rank(agg, na.last = "keep", ties.method = "average"), nrow=dim(agg)[1], ncol=dim(agg)[2] )
rownames(agg.rank) = rownames(agg)
colnames(agg.rank) = rownames(agg)
agg.rank = agg.rank/max(agg.rank, na.rm=T)
#save(agg.rank, file="coexp.agg.rank.Rdata")
Assessing
aurocs = run_GBA(agg.rank, annotations)
aurocs[[2]] = ""
EGAD::plot_density_compare(aurocs[[1]][,1], aurocs[[1]][,3])
Co-expression - single-cell RNA-seq
Data
Navigate to: https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_10k_v3 Submit permissions. Download filtered feature matrix folder:
wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz
gunzip pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz
Read and parse data using Seurat
Note, v3 chemistry has higher mitohondrial %. Not sure if this is techincal or that more cells are dying due to the new chemistry.
library(Seurat)
data <- Read10X(data.dir = "filtered_feature_bc_matrix/")
pbmc <- CreateSeuratObject(counts = data, project = "pbmc", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & percent.mt < 20)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
DimPlot(pbmc, reduction = "pca")
ElbowPlot(pbmc)
pbmc <- FindNeighbors(pbmc, dims = 1:10)
pbmc <- FindClusters(pbmc, resolution = 0.5)
pbmc <- RunTSNE(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "tsne")
save(pbmc, file="pbmc.Rdata")
Building coexpression networks
exprs = pbmc@assays$RNA@scale.data
net = build_coexp_network(exprs, genes )
save(net, file="coexp.scaled.Rdata")
exprs = as.matrix(pbmc@assays$RNA@data)
net = build_coexp_network(exprs, genes )
save(net, file="coexp.norm.Rdata")
exprs = calc_cpm( as.matrix(pbmc@assays$RNA@counts))
net = build_coexp_network(exprs, genes )
save(net, file="coexp.cpm.Rdata")
exprs = as.matrix(pbmc@assays$RNA@counts)
net = build_coexp_network(exprs, genes )
save(net, file="coexp.raw.Rdata")
Assessing
aurocs = run_GBA(net, annotations)
aurocs[[2]] = ""
EGAD::plot_density_compare(aurocs[[1]][,1], aurocs[[1]][,3])
Aggregating
agg = diag(sum(keep))
for (i in 1:n_experiments) {
id = experiment_ids[i]
f.c = which(samples[,1] == id)
sub = exprs[keep,f.c]
net = build_coexp_network(sub, genes[keep])
med = median(net, na.rm=T)
net[is.na(net)] = med
# Save individual networks
# save(net, file=paste("coexp",i, "Rdata", sep=".") )
if(i==1 ) {
agg = net
} else {
agg = net + agg
}
}
agg.rank = matrix(rank(agg, na.last = "keep", ties.method = "average"), nrow=dim(agg)[1], ncol=dim(agg)[2] )
rownames(agg.rank) = rownames(agg)
colnames(agg.rank) = rownames(agg)
agg.rank = agg.rank/max(agg.rank, na.rm=T)
#save(agg.rank, file="coexp.agg.rank.Rdata")
Assessing
aurocs = run_GBA(agg.rank, annotations)
aurocs[[2]] = ""
EGAD::plot_density_compare(aurocs[[1]][,1], aurocs[[1]][,3])