-
Notifications
You must be signed in to change notification settings - Fork 9
/
10x_sACC-n5_step02_clust-annot_MNT.R
executable file
·511 lines (406 loc) · 25 KB
/
10x_sACC-n5_step02_clust-annot_MNT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
### MNT 10x snRNA-seq workflow: step 02
### **Region-specific analyses**
### - (2x) sACC samples from: Br5161 & Br5212
### Initiated MNT 29Jan2020
### MNT 29Apr2021: add expansion samples (n=3, incl'g 2 female)
#####################################################################
library(SingleCellExperiment)
library(EnsDb.Hsapiens.v86)
library(scater)
library(scran)
library(batchelor)
library(DropletUtils)
library(jaffelab)
library(dendextend)
library(dynamicTreeCut)
source("plotExpressionCustom.R")
### Palette taken from `scater`
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA")
tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
"#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
"#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
"#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5")
# ===
# Load 'pilot' samples
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n14_preprint_SCEs_processing-QC_MNTMar2021.rda",
verbose=T)
# pilot.data, pilot.data.unfiltered, e.out, ref.sampleInfo
rm(pilot.data.unfiltered, e.out)
# Load 2021 expansion set
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n10_2021rev_SCEs_processing-QC_MNTMar2021.rda", verbose=T)
# pilot.data.2, pilot.data.2.unfiltered, e.out.2, ref.sampleInfo.rev
rm(pilot.data.2.unfiltered, e.out.2)
### MNT comment: At this point, each sample (which is a SCE object in the list, 'pilot.data') has been
# QC'd for cell/nucleus calling ('emptyDrops()' test) and mito rate thresholding
# Additionally, there have been a computed 'doubletScore', which will QC with, after
# clustering (e.g. that there are no doublet-driven clusters, etc.)
### Merging shared-region samples ============================================
# Newest iterations for normalization: multiBatchNorm-alize
# Order as will do for `fastMNN()` (this shouldn't matter here)
sce.sacc <- cbind(pilot.data.2[["br5400.sacc"]], pilot.data[["br5212.sacc"]],
pilot.data[["br5161.sacc"]],
pilot.data.2[["br5701.sacc.neun"]], pilot.data.2[["br5276.sacc.neun"]]
)
sce.sacc
#class: SingleCellExperiment
# dim: 33538 15669
# metadata(5): Samples Samples Samples Samples Samples
# assays(1): counts
# rownames(33538): MIR1302-2HG FAM138A ... AC213203.1 FAM231C
# rowData names(6): gene_id gene_version ... gene_biotype Symbol.uniq
# colnames(15669): AAACCCAAGAGTCTTC-1 AAACCCAAGGCCCGTT-1 ...
# TTTGACTGTATCGTGT-1 TTTGGTTAGCAGCACA-1
# colData names(16): Sample Barcode ... protocol sequencer
# reducedDimNames(0):
# altExpNames(0):
# Use `multiBatchNorm()` to compute log-normalized counts, matching the scaling across samples
sce.sacc <- multiBatchNorm(sce.sacc, batch=sce.sacc$sampleID)
# Use the simple `modelGeneVar` - this makes more sense over `combineVar`, since the
# cell composition is already known to be quite different (with NeuN selection)
geneVar.sacc <- modelGeneVar(sce.sacc)
chosen.hvgs.sacc <- geneVar.sacc$bio > 0
sum(chosen.hvgs.sacc)
# [1] 11898
### Dimensionality reduction ================================================================
# Run `fastMNN` (internally uses `multiBatchPCA`), taking top 100 (instead of default 50 PCs)
set.seed(109)
mnn.hold <- fastMNN(sce.sacc, batch=sce.sacc$sampleID,
merge.order=c("br5400.sacc","br5212.sacc","br5161.sacc",
"br5701.sacc.neun","br5276.sacc.neun"),
subset.row=chosen.hvgs.sacc, d=100,
correct.all=TRUE, get.variance=TRUE,
BSPARAM=BiocSingular::IrlbaParam())
# This temp file just used for getting batch-corrected components (drops a variety of entries)
table(colnames(mnn.hold) == colnames(sce.sacc)) # all TRUE
table(mnn.hold$batch == sce.sacc$sampleID) # all TRUE
# Add them to the SCE, as well as the metadata (though the latter might not be so usefl)
reducedDim(sce.sacc, "PCA_corrected") <- reducedDim(mnn.hold, "corrected") # 100 components
metadata(sce.sacc) <- metadata(mnn.hold)
# Save into a new region-specific SCE object/file
save(sce.sacc, chosen.hvgs.sacc, ref.sampleInfo, ref.sampleInfo.rev,
file="rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda")
## 'getClusteredPCs()' evaluated in qsub mode (with 'R-batchJob_sACC-n5_optimalPCselxn_MNT2021.R')
# --> saved into same .rda
### Picking up with optimally-defined PC space ===
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda",
verbose=TRUE)
# sce.sacc, chosen.hvgs.sacc, pc.choice.sacc, ref.sampleInfo, ref.sampleInfo.rev
# How many PCs is optimal?:
metadata(pc.choice.sacc)$chosen # [1] 97
## Assign this chosen (97 PCs) to 'PCA_opt'
reducedDim(sce.sacc, "PCA_opt") <- reducedDim(sce.sacc, "PCA_corrected")[ ,1:(metadata(pc.choice.sacc)$chosen)]
## t-SNE
set.seed(109)
sce.sacc <- runTSNE(sce.sacc, dimred="PCA_opt")
## UMAP
set.seed(109)
sce.sacc <- runUMAP(sce.sacc, dimred="PCA_opt")
# How do these look?
plotReducedDim(sce.sacc, dimred="TSNE", colour_by="sampleID")
plotReducedDim(sce.sacc, dimred="UMAP", colour_by="sampleID")
### Clustering: Two-step ======================================================
### Step 1: Perform graph-based clustering in this optimal PC space
# - take k=20 NN to build graph
snn.gr <- buildSNNGraph(sce.sacc, k=20, use.dimred="PCA_opt")
clusters.k20 <- igraph::cluster_walktrap(snn.gr)$membership
table(clusters.k20)
## 67 prelim clusters
# Assign as 'prelimCluster'
sce.sacc$prelimCluster <- factor(clusters.k20)
plotReducedDim(sce.sacc, dimred="TSNE", colour_by="prelimCluster")
# Is sample driving this 'high-res' clustering at this level?
table(sce.sacc$prelimCluster, sce.sacc$sampleID) # (not much!)
# rbind the ref.sampleInfo[.rev]
ref.sampleInfo <- rbind(ref.sampleInfo, ref.sampleInfo.rev)
## Save for now
save(sce.sacc, chosen.hvgs.sacc, pc.choice.sacc, ref.sampleInfo,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda")
### Step 2: Hierarchical clustering of pseudo-bulked ("PB'd") counts
# ** That is, to pseudo-bulk (aka 'cluster-bulk') on raw counts, on all [non-zero] genes,
# normalize with `librarySizeFactors()`, log2-transform, then perform HC'ing
# Preliminary cluster index for pseudo-bulking
clusIndexes = splitit(sce.sacc$prelimCluster)
prelimCluster.PBcounts <- sapply(clusIndexes, function(ii){
rowSums(assays(sce.sacc)$counts[ ,ii])
}
)
# Compute LSFs at this level
sizeFactors.PB.all <- librarySizeFactors(prelimCluster.PBcounts)
# Normalize with these LSFs
geneExprs.temp <- t(apply(prelimCluster.PBcounts, 1, function(x) {log2(x/sizeFactors.PB.all + 1)}))
## Perform hierarchical clustering
dist.clusCollapsed <- dist(t(geneExprs.temp))
tree.clusCollapsed <- hclust(dist.clusCollapsed, "ward.D2")
#tree.clusCollapsed$labels
dend <- as.dendrogram(tree.clusCollapsed, hang=0.2)
# Just for observation
par(cex=.6)
myplclust(tree.clusCollapsed, main="5x sACC prelim-kNN-cluster relationships", cex.main=2, cex.lab=1.5, cex=1.4)
clust.treeCut <- cutreeDynamic(tree.clusCollapsed, distM=as.matrix(dist.clusCollapsed),
minClusterSize=2, deepSplit=1, cutHeight=225)
table(clust.treeCut)
unname(clust.treeCut[order.dendrogram(dend)])
# [1] 0 0 16 16 0 0 0 0 0 12 12 0 15 15 4 4 4 4 9 9 9 8 8 8 13
# [26] 13 0 5 5 5 5 0 2 2 2 2 2 0 10 10 10 3 3 3 3 3 14 14 6 6
# [51] 6 6 0 1 1 1 1 1 1 0 11 11 11 7 7 7 7
## Cutting at 225 looks good for the main neuronal branch, but a lot of glial
# prelim clusters are dropped off (0's)
## Cut at 325 shows that prelimClust's 32, 41 should be merged - the rest will leave, due to more
# extreme height differences:
glia.treeCut <- cutreeDynamic(tree.clusCollapsed, distM=as.matrix(dist.clusCollapsed),
minClusterSize=2, deepSplit=1, cutHeight=325)
unname(glia.treeCut[order.dendrogram(dend)])
# [1] 9 9 9 9 0 0 12 12 10 10 10 0 4 4 4 4 4 4 2 2 2 2 2 2 2
# [26] 2 7 7 7 7 7 0 6 6 6 6 6 1 1 1 1 1 1 1 1 1 5 5 5 5
# [51] 5 5 3 3 3 3 3 3 3 8 11 11 11 8 8 8 8
clust.treeCut[order.dendrogram(dend)][c(7:8)] <- max(clust.treeCut)+c(1) # '17'
# Add new labels to rest of the prelimClusters cut off
clust.treeCut[order.dendrogram(dend)][which(
clust.treeCut[order.dendrogram(dend)]==0)] <- max(clust.treeCut) + c(1,2, 3,3, 4:10)
# ( 'Re-write', in case there are missing numbers )
clust.treeCut[order.dendrogram(dend)] <- as.numeric(as.factor(clust.treeCut[order.dendrogram(dend)]))
## Define color pallet
cluster_colors <- unique(c(tableau10medium, tableau20)[clust.treeCut[order.dendrogram(dend)]])
names(cluster_colors) <- unique(clust.treeCut[order.dendrogram(dend)])
labels_colors(dend) <- cluster_colors[as.character(clust.treeCut[order.dendrogram(dend)])]
# Print for future reference
pdf("pdfs/revision/regionSpecific_sACC-n5_HC-prelimCluster-relationships_MNT2021.pdf", width=9)
par(cex=0.8, font=2)
plot(dend, main="5x sACC prelim-kNN-cluster relationships with collapsed assignments")
dev.off()
# Make reference for new cluster assignment
clusterRefTab.sacc <- data.frame(origClust=order.dendrogram(dend),
merged=clust.treeCut[order.dendrogram(dend)])
# Assign as 'collapsedCluster'
sce.sacc$collapsedCluster <- factor(clusterRefTab.sacc$merged[match(sce.sacc$prelimCluster, clusterRefTab.sacc$origClust)])
# Print some visualizations:
pdf("pdfs/revision/regionSpecific_sACC-n5_reducedDims-with-collapsedClusters_MNT2021.pdf")
plotReducedDim(sce.sacc, dimred="PCA_corrected", ncomponents=5, colour_by="collapsedCluster", point_alpha=0.5)
plotTSNE(sce.sacc, colour_by="sampleID", point_alpha=0.5)
plotTSNE(sce.sacc, colour_by="protocol", point_alpha=0.5)
plotTSNE(sce.sacc, colour_by="collapsedCluster", text_by="collapsedCluster", point_alpha=0.5)
plotTSNE(sce.sacc, colour_by="sum", point_alpha=0.5)
plotTSNE(sce.sacc, colour_by="doubletScore", point_alpha=0.5)
# And some more informative UMAPs
plotUMAP(sce.sacc, colour_by="sampleID", point_alpha=0.5)
plotUMAP(sce.sacc, colour_by="collapsedCluster", text_by="collapsedCluster", point_alpha=0.5)
dev.off()
## Print marker genes for annotation
markers.mathys.custom = list(
'neurons' = c('SYT1', 'SNAP25', 'GRIN1'),
'excitatory_neuron' = c('CAMK2A', 'NRGN','SLC17A7', 'SLC17A6', 'SLC17A8'),
'inhibitory_neuron' = c('GAD1', 'GAD2', 'SLC32A1'),
'oligodendrocyte' = c('MBP', 'MOBP', 'PLP1'),
'oligodendrocyte_precursor' = c('PDGFRA', 'VCAN', 'CSPG4'),
'microglia' = c('CD74', 'CSF1R', 'C3'),
'astrocytes' = c('GFAP', 'TNC', 'AQP4', 'SLC1A2'),
'endothelial' = c('CLDN5', 'FLT1', 'VTN', 'PECAM1'),
# T cell markers - these seem to be somewhat common (across regions)
'Tcell' = c('SKAP1', 'ITK', 'CD247')
)
pdf("pdfs/revision/regionSpecific_sACC-n5_marker-logExprs_collapsedClusters_MNT2021.pdf", height=6, width=10)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpression(sce.sacc, exprs_values = "logcounts", features=c(markers.mathys.custom[[i]]), ncol=2,
x="collapsedCluster", colour_by="collapsedCluster", point_alpha=0.3, point_size=.7,
add_legend=F, show_median=T) +
# (since there are more than 20 collapsed clusters)
# stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
# geom = "crossbar", width = 0.3,
# colour=rep(tableau20[1:11], length(markers.mathys.custom[[i]])))
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle(label=paste0(names(markers.mathys.custom)[i], " markers"))
)
}
dev.off()
## QC - How do the total UMI distribution look?
newClusIndex <- splitit(sce.sacc$collapsedCluster)
sapply(newClusIndex, function(x) {quantile(sce.sacc[,x]$sum)})
# 1 2 3 4 5 6 7 8 9 10 11
# 0% 2909.00 2448.0 3360 2101.00 2390.0 2465.0 2313.00 2754.0 2984.0 2728 1472.00
# 25% 16433.75 22017.5 13669 26971.75 25059.5 20242.0 14973.75 30164.5 29526.5 13140 20121.75
# 50% 21474.50 30746.0 20617 40460.00 39576.0 26094.5 20633.00 43945.0 48494.0 19108 29225.00
# 75% 27697.50 41323.5 27494 56412.75 56040.5 32440.5 27588.75 60824.0 67895.0 25248 38971.00
# 100% 63006.00 121477.0 93363 127499.00 123619.0 69549.0 67423.00 172041.0 196431.0 73742 84097.00
# 12 13 14 15 16 17 18 19 20 21 *22
# 0% 1218 2156.00 2412.00 3238.00 174.0 1276.0 4614.00 102.00 113.00 111.0 103.0
# 25% 4046 30561.25 22631.00 23977.00 5781.0 7510.5 9990.25 1977.25 3198.50 1540.5 215.0
# 50% 5643 43868.50 32647.00 32559.00 7575.0 10068.0 12587.50 3872.50 4378.00 1706.0 321.0
# 75% 7539 58960.75 40748.75 39867.75 10077.5 12682.5 17470.25 6176.25 5458.25 1926.5 681.5
# 100% 25828 113463.00 73499.00 70217.00 23974.0 34023.0 27467.00 14206.00 11896.00 2649.0 14701.0
# 23 24 25 26 27
# 0% 3436.00 2333.00 2740.0 2874.00 12801
# 25% 19197.00 5914.25 21647.0 13851.50 23846
# 50% 47066.50 7686.00 33647.0 17916.50 27375
# 75% 58822.25 9597.50 45194.5 26535.75 30262
# 100% 102363.00 13985.00 69890.0 40784.00 48721
## doublet score?
sapply(newClusIndex, function(x) {round(quantile(sce.sacc$doubletScore[x]),2)})
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# 0% 0.02 0.00 0.01 0.01 0.00 0.00 0.05 0.00 0.04 0.02 0.04 0.00 0.02 0.02 0.03 0.00 0.00
# 25% 0.11 0.08 0.05 0.08 0.17 0.08 0.12 0.11 0.28 0.08 0.28 0.03 0.18 0.06 0.17 0.01 0.02
# 50% 0.18 0.16 0.08 0.17 0.99 0.13 0.23 0.23 0.51 0.13 0.46 0.13 0.31 0.28 0.29 0.05 0.06
# 75% 0.31 0.30 0.18 0.39 1.71 0.48 0.54 0.41 0.83 0.21 0.70 0.45 0.54 0.98 0.47 0.12 0.13
# 100% 8.45 14.34 9.20 10.08 3.66 3.81 5.88 23.05 5.30 11.64 17.43 7.19 12.44 4.31 5.08 2.00 5.66
# *18 19 20 21 22 23 24 25 26 27
# 0% 2.37 0.00 0.00 0.00 0.01 0.05 0.39 0.02 0.04 0.10
# 25% 3.73 0.01 0.00 0.00 0.05 1.24 0.44 0.18 0.10 0.21
# 50% 5.34 0.01 0.02 0.01 0.12 2.09 0.71 0.39 0.14 0.28
# 75% 6.43 0.04 0.05 0.02 0.36 2.41 1.08 0.74 0.21 1.05
# 100% 20.33 3.21 5.84 0.30 1.68 13.00 1.20 1.59 4.74 2.00
# At 'prelimCluster' level?
clusIndex <- splitit(sce.sacc$prelimCluster)
sapply(clusIndex, function(x) {round(quantile(sce.sacc$doubletScore[x]),2)})
# 62 [moderately, but with median > 5], which is == collapsedCluster 18 (see above); 32, 53 (meds 4.1, 3.9)
table(sce.sacc$collapsedCluster)
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# 842 912 465 856 575 384 330 1735 311 521 206 4389 428 208 228 747
# 17 18 19 20 21 22 23 24 25 26 27
# 911 28 160 784 195 298 30 20 39 42 25
## Add annotations, looking at marker gene expression
annotationTab.sacc <- data.frame(collapsedCluster=c(1:27))
annotationTab.sacc$cellType <- NA
annotationTab.sacc$cellType[c(4:5,8:9,13,15,23)] <- paste0("Excit_", c("A","B","C","D","E","F","G"))
annotationTab.sacc$cellType[c(1:3,6:7,10:11,14,25:27)] <- paste0("Inhib_",
c("A","B","C","D","E","F","G","H","I","J","K"))
annotationTab.sacc$cellType[c(12,21)] <- paste0("Oligo_", c("A","B"))
annotationTab.sacc$cellType[17] <- c("OPC")
annotationTab.sacc$cellType[18] <- "drop.doublet" # ahhh, these are doublets (see above)
# they express both astro & oligo markers - btw it's prelimCluster 62
annotationTab.sacc$cellType[c(16,19)] <- paste0("Astro_", c("A","B"))
annotationTab.sacc$cellType[20] <- "Micro"
annotationTab.sacc$cellType[22] <- "drop.lowNTx"
annotationTab.sacc$cellType[24] <- "Neu_FAT2.CDH15"
# perhaps this one is axonal debris?? It was prelimCluster 67 (stuck out a ton)
# Then add these to the SCE
sce.sacc$cellType <- annotationTab.sacc$cellType[match(sce.sacc$collapsedCluster,
annotationTab.sacc$collapsedCluster)]
sce.sacc$cellType <- factor(sce.sacc$cellType)
# Set colors to be stable for this region/these cell classes
cell_colors.sacc <- cluster_colors[order(as.integer(names(cluster_colors)))]
names(cell_colors.sacc) <- annotationTab.sacc$cellType
cell_colors.sacc
# Inhib_A Inhib_B Inhib_C Excit_A Excit_B Inhib_D
# "#729ECE" "#FF9E4A" "#67BF5C" "#ED665D" "#AD8BC9" "#A8786E"
# Inhib_E Excit_C Excit_D Inhib_F Inhib_G Oligo_A
# "#ED97CA" "#A2A2A2" "#CDCC5D" "#6DCCDA" "#1F77B4" "#AEC7E8"
# Excit_E Inhib_H Excit_F Astro_A OPC drop.doublet
# "#FF7F0E" "#FFBB78" "#2CA02C" "#98DF8A" "#D62728" "#FF9896"
# Astro_B Micro Oligo_B drop.lowNTx Excit_G Neu_FAT2.CDH15
# "#9467BD" "#C5B0D5" "#8C564B" "#C49C94" "#E377C2" "#F7B6D2"
# Inhib_I Inhib_J Inhib_K
# "#7F7F7F" "#C7C7C7" "#BCBD22"
## Save
save(sce.sacc, chosen.hvgs.sacc, pc.choice.sacc, clusterRefTab.sacc, ref.sampleInfo, annotationTab.sacc, cell_colors.sacc,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda")
## Re-print marker expression with cell type labels ===
# load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda",
# verbose=T)
pdf("pdfs/revision/regionSpecific_sACC-n5_marker-logExprs_collapsedClusters_MNT2021.pdf", height=4.5, width=10)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpressionCustom(sce = sce.sacc,
features = markers.mathys.custom[[i]],
features_name = names(markers.mathys.custom)[[i]],
anno_name = "cellType") +
scale_color_manual(values = cell_colors.sacc)
)
}
dev.off()
# Optionally, drop doublet cluster & 'drop.lowNTx' and re-print
sce.sacc <- sce.sacc[ ,-grep("drop.",sce.sacc$cellType)]
sce.sacc$cellType <- droplevels(sce.sacc$cellType)
# Final comment - the Micro cluster--its prelim clusters were checked for endothelial
# markers, but doesn't seem they were merged together. Perhaps this dataset
# just didn't capture any (unlike for the AMY)
## Re-print reducedDims with these annotations ===
pdf("pdfs/revision/regionSpecific_sACC-n5_reducedDims-with-collapsedClusters_MNT2021.pdf", width=8)
plotReducedDim(sce.sacc, dimred="PCA_corrected", ncomponents=5, colour_by="cellType", point_alpha=0.5) +
scale_color_manual(values = cell_colors.sacc) + labs(colour="Cell type")
plotTSNE(sce.sacc, colour_by="sampleID", point_alpha=0.5, point_size=2)
plotTSNE(sce.sacc, colour_by="protocol", point_alpha=0.5, point_size=2)
plotTSNE(sce.sacc, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotTSNE(sce.sacc, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.sacc,
labels=paste0(levels(sce.sacc$cellType)," (",table(sce.sacc$cellType),")")) +
labs(colour="Cell type")
plotTSNE(sce.sacc, colour_by="sum", point_alpha=0.5, point_size=2)
plotTSNE(sce.sacc, colour_by="doubletScore", point_alpha=0.5, point_size=2)
# And some more informative UMAPs
plotUMAP(sce.sacc, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotUMAP(sce.sacc, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.sacc,
labels=paste0(levels(sce.sacc$cellType)," (",table(sce.sacc$cellType),")")) +
labs(colour="Cell type")
dev.off()
## -> proceed to 'step03_markerDetxn-analyses[...].R'
### Session info for 06Jun2021 ==========================================================
sessionInfo()
# R version 4.0.4 RC (2021-02-08 r79975)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: CentOS Linux 7 (Core)
#
# Matrix products: default
# BLAS: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRblas.so
# LAPACK: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRlapack.so
#
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
# [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
# [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] parallel stats4 stats graphics grDevices datasets utils methods
# [9] base
#
# other attached packages:
# [1] dynamicTreeCut_1.63-1 dendextend_1.14.0 jaffelab_0.99.30
# [4] rafalib_1.0.0 DropletUtils_1.10.3 batchelor_1.6.2
# [7] scran_1.18.5 scater_1.18.6 ggplot2_3.3.3
# [10] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.14.1 AnnotationFilter_1.14.0
# [13] GenomicFeatures_1.42.3 AnnotationDbi_1.52.0 SingleCellExperiment_1.12.0
# [16] SummarizedExperiment_1.20.0 Biobase_2.50.0 GenomicRanges_1.42.0
# [19] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
# [22] BiocGenerics_0.36.1 MatrixGenerics_1.2.1 matrixStats_0.58.0
#
# loaded via a namespace (and not attached):
# [1] googledrive_1.0.1 ggbeeswarm_0.6.0 colorspace_2.0-0
# [4] ellipsis_0.3.2 scuttle_1.0.4 bluster_1.0.0
# [7] XVector_0.30.0 BiocNeighbors_1.8.2 rstudioapi_0.13
# [10] bit64_4.0.5 fansi_0.4.2 xml2_1.3.2
# [13] splines_4.0.4 R.methodsS3_1.8.1 sparseMatrixStats_1.2.1
# [16] cachem_1.0.4 Rsamtools_2.6.0 ResidualMatrix_1.0.0
# [19] dbplyr_2.1.1 R.oo_1.24.0 HDF5Array_1.18.1
# [22] compiler_4.0.4 httr_1.4.2 dqrng_0.2.1
# [25] assertthat_0.2.1 Matrix_1.3-2 fastmap_1.1.0
# [28] lazyeval_0.2.2 limma_3.46.0 BiocSingular_1.6.0
# [31] prettyunits_1.1.1 tools_4.0.4 rsvd_1.0.3
# [34] igraph_1.2.6 gtable_0.3.0 glue_1.4.2
# [37] GenomeInfoDbData_1.2.4 dplyr_1.0.5 rappdirs_0.3.3
# [40] Rcpp_1.0.6 vctrs_0.3.6 Biostrings_2.58.0
# [43] rhdf5filters_1.2.0 rtracklayer_1.50.0 DelayedMatrixStats_1.12.3
# [46] stringr_1.4.0 beachmat_2.6.4 lifecycle_1.0.0
# [49] irlba_2.3.3 statmod_1.4.35 XML_3.99-0.6
# [52] edgeR_3.32.1 zlibbioc_1.36.0 scales_1.1.1
# [55] hms_1.0.0 ProtGenerics_1.22.0 rhdf5_2.34.0
# [58] RColorBrewer_1.1-2 curl_4.3 memoise_2.0.0
# [61] gridExtra_2.3 segmented_1.3-3 biomaRt_2.46.3
# [64] stringi_1.5.3 RSQLite_2.2.7 BiocParallel_1.24.1
# [67] rlang_0.4.10 pkgconfig_2.0.3 bitops_1.0-7
# [70] lattice_0.20-41 purrr_0.3.4 Rhdf5lib_1.12.1
# [73] GenomicAlignments_1.26.0 bit_4.0.4 tidyselect_1.1.1
# [76] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
# [79] DelayedArray_0.16.3 DBI_1.1.1 pillar_1.6.0
# [82] withr_2.4.2 RCurl_1.98-1.3 tibble_3.1.1
# [85] crayon_1.4.1 utf8_1.2.1 BiocFileCache_1.14.0
# [88] viridis_0.6.0 progress_1.2.2 locfit_1.5-9.4
# [91] grid_4.0.4 blob_1.2.1 R.utils_2.10.1
# [94] openssl_1.4.3 munsell_0.5.0 beeswarm_0.3.1
# [97] viridisLite_0.4.0 vipor_0.4.5 askpass_1.1