-
Notifications
You must be signed in to change notification settings - Fork 0
/
02_Comparisons.R
2083 lines (1682 loc) · 105 KB
/
02_Comparisons.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## 1.2 Comparison of DEG
load(here("processed-data/04_DEA/Gene_analysis/top_genes_blood_smoking_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_blood_smoking_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_blood_smoking_interaction.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_nicotine_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_nicotine_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_nicotine_interaction.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_smoking_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_smoking_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_adults_smoking_interaction.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/de_genes_pups_nicotine_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_nicotine_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/de_genes_pups_nicotine_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_nicotine_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_nicotine_interaction.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/de_genes_pups_smoking_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_smoking_naive.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/de_genes_pups_smoking_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_smoking_fitted.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_pups_smoking_interaction.Rdata"))
load(here("processed-data/04_DEA/Gene_analysis/top_genes_results.Rdata"))
## Pup brain Tx data
load(here("processed-data/04_DEA/Tx_analysis/top_tx_nic.Rdata"))
load(here("processed-data/04_DEA/Tx_analysis/top_tx_smo.Rdata"))
## Data from prenatal and adult human postmortem prefrontal cortices exposed to cigarette smoke
## (from Semick, S.A. et al. (2018) in Mol Psychiatry, DOI: https://doi.org/10.1038/s41380-018-0223-1
## Check code here: https://github.com/LieberInstitute/Smoking_DLPFC_Devel/blob/master/README.md)
load(here("raw-data/Genes_DE_sva.rda"))
## Data of nearest and associated genes to genome-wide significant SNPs related to Tobacco Use Disorder (TUD)
## (from Toikumo, S. et al. (2023), in medRxiv, DOI: https://doi.org/10.1101/2023.03.27.23287713)
TUD_multi_UKBB <- read_excel("raw-data/TUD-multi+UKBB_NearestGenes.xlsx")
TUD_EUR_UKBB <- read_excel("raw-data/TUD-EUR+UKBB_NearestGenes.xlsx")
TUD_AA <- read_excel("raw-data/TUD-AA_NearestGenes.xlsx")
TUD_EUR_MAGMA <- read_excel("raw-data/TUD-EUR_MAGMA_associatedGenes.xlsx")
TUD_EUR_H_MAGMA<- read_excel("raw-data/TUD-EUR_H-MAGMA_associatedGenes.xlsx")
TUD_EUR_S_MultiXcan <- read_excel("raw-data/TUD-EUR_S-MultiXcan_associatedGenes.xlsx")
TUD_EUR_S_PrediXcan <- read_excel("raw-data/TUD-EUR_S-PrediXcan_associatedGenes.xlsx")
### 1.2.1 T-stats plots
## Function to add DE info of genes in both groups
add_DE_info <-function(top_genes1, top_genes2, name_1, name_2) {
DE<-vector()
for (i in 1:dim(top_genes1)[1]) {
## DE genes in both groups
if (top_genes1$adj.P.Val[i]<0.05 && top_genes2$adj.P.Val[i]<0.05) {
DE<-append(DE, "sig Both")
}
## DE genes in only one of the groups
else if (top_genes1$adj.P.Val[i]<0.05 && !top_genes2$adj.P.Val[i]<0.05) {
DE<-append(DE, paste("sig", name_1))
}
else if (top_genes2$adj.P.Val[i]<0.05 && !top_genes1$adj.P.Val[i]<0.05) {
DE<-append(DE, paste("sig", name_2))
}
## No DE genes in neither group
else {
DE<-append(DE, "None")
}
}
return(DE)
}
## Compare t-stats of genes from different groups of samples
t_stat_plot <- function(top_genes1, top_genes2, name_1, name_2, model_name){
## Correlation coeff
rho <- cor(top_genes1$t, top_genes2$t, method = "spearman")
rho_anno = paste0("rho = ", format(round(rho, 2), nsmall = 2))
## Colors and transparency
cols <- c("deeppink3", "thistle3","navajowhite2", "darkgrey")
names(cols)<-c("sig Both", paste0("sig ", name_1), paste0("sig ", name_2), "None")
alphas <- c( 1, 1, 1,0.5)
names(alphas)<-c("sig Both", paste0("sig ", name_1), paste0("sig ", name_2), "None")
## Merge data
t_stats<-data.frame(t1=top_genes1$t, t2=top_genes2$t)
## Add DE info for both groups
t_stats$DEG<-add_DE_info(top_genes1, top_genes2, name_1, name_2)
t_stats$DEG <- factor(t_stats$DEG, levels=names(cols))
plot <- ggplot(t_stats, aes(x = t1, y = t2, color=DEG, alpha=DEG)) +
geom_point(size = 1.5) +
scale_color_manual(values = cols, labels=names(cols), drop = FALSE) +
scale_alpha_manual(values = alphas, labels=names(alphas), drop=FALSE) +
labs(x = paste("t-stats", name_1),
y = paste("t-stats", name_2),
title = model_name,
subtitle = rho_anno,
color = "Differential expression",
parse = T) +
guides(alpha = 'none', color = guide_legend(override.aes = list(size=2))) +
theme_bw() +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size=11),
legend.title = element_text(size=12))
return(plot)
}
## Function to create multiple t-stats plots
tstats_plots<-function(top_genes_pairs, name_1, name_2, models){
plots<-list()
for (i in 1:length(top_genes_pairs)){
p<-t_stat_plot(top_genes_pairs[[i]][[1]], top_genes_pairs[[i]][[2]], name_1, name_2, models[i])
plots[[i]]<-p
}
plot_grid(plots[[1]], plots[[2]], plots[[3]], ncol=2, align = 'hv')
ggsave(filename=paste("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_",gsub(" ", "_", name_1), "_VS_",
gsub(" ", "_", name_2), ".pdf", sep=""),
height = 19, width = 27, units = "cm")
}
## Analyses
################################################################################
## 1. Compare tissues - Analysis of blood vs brain biomarkers
################################################################################
#################### 1.1 Compare t-stats in the 3 models #######################
####### Smoking adult blood vs Smoking adult brain #######
top_genes_pairs<-list(list(top_genes_adults_smoking_naive, top_genes_blood_smoking_naive),
list(top_genes_adults_smoking_fitted, top_genes_blood_smoking_fitted),
list(top_genes_adults_smoking_interaction, top_genes_blood_smoking_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Smoking adult brain", "Smoking adult blood", models)
####### Smoking adult blood vs Nicotine adult brain #######
top_genes_pairs<-list(list(top_genes_adults_nicotine_naive, top_genes_blood_smoking_naive),
list(top_genes_adults_nicotine_fitted, top_genes_blood_smoking_fitted),
list(top_genes_adults_nicotine_interaction, top_genes_blood_smoking_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Nicotine adult brain", "Smoking adult blood", models)
####### Smoking adult blood vs Smoking pup brain #######
top_genes_pairs<-list(list(top_genes_pups_smoking_naive, top_genes_blood_smoking_naive),
list(top_genes_pups_smoking_fitted, top_genes_blood_smoking_fitted),
list(top_genes_pups_smoking_interaction, top_genes_blood_smoking_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Smoking pup brain", "Smoking adult blood", models)
####### Smoking adult blood vs Nicotine pup brain #######
top_genes_pairs<-list(list(top_genes_pups_nicotine_naive, top_genes_blood_smoking_naive),
list(top_genes_pups_nicotine_fitted, top_genes_blood_smoking_fitted),
list(top_genes_pups_nicotine_interaction, top_genes_blood_smoking_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Nicotine pup brain", "Smoking adult blood", models)
####################### 1.2 Search mouse brain genes/txs that replicate in mouse blood #######################
## (With p<0.05 in blood, FDR<0.05 in pup brain/p<0.05 in adult brain, and the same logFC sign in both tissues)
## Use mouse genes and txs from fitted models only
t_stat_plot_brain_blood_replication <- function(age_mouse, expt_mouse, feature){
## Define blood dataset
top_genes_blood <- top_genes_blood_smoking_fitted
## Width of plot
if (age_mouse == "pups"){
width=22
}
else{
width=20.5
}
# ----------------------------------------------------------------------------
## Compare blood genes vs brain genes
if (feature=="genes"){
## Define brain dataset
top_genes_brain <-eval(parse_expr(paste("top_genes", age_mouse, expt_mouse, "fitted", sep="_")))
## Merge brain and blood data
## (Brain and blood datasets contain the same genes in the same order)
t_stats<-data.frame(ensemblID=rownames(top_genes_brain), gene_symbol=top_genes_brain$Symbol, t_blood=top_genes_blood$t, t_brain=top_genes_brain$t,
FDR_blood=top_genes_blood$adj.P.Val, FDR_brain=top_genes_brain$adj.P.Val, P.Val_blood=top_genes_blood$P.Value, P.Val_brain=top_genes_brain$P.Value,
logFC_blood=top_genes_blood$logFC, logFC_brain=top_genes_brain$logFC)
## Colors and alphas for plot
if (age_mouse=="pups"){
cols <- c("deeppink3", "thistle3", "darkgrey")
names(cols)<-c("Replicating genes (p<0.05 in blood, FDR<0.05 in brain)", "Signif in brain (FDR<0.05)", "ns genes")
alphas <- c(1, 1, 0.5)
names(alphas)<-c("Replicating genes (p<0.05 in blood, FDR<0.05 in brain)", "Signif in brain (FDR<0.05)", "ns genes")
}
else {
cols <- c("deeppink3", "darkgrey")
names(cols)<-c("Replicating genes (p<0.05 in blood and brain)", "ns genes")
alphas <- c(1, 0.3)
names(alphas)<-c("Replicating genes (p<0.05 in blood and brain)", "ns genes")
}
## Add DE and replication info for each gene
DE <-vector()
for (i in 1:dim(t_stats)[1]) {
## Pup brain genes
if (age_mouse=="pups"){
## Pup brain genes (FDR<0.05) that replicate in blood (p<0.05)
if ((t_stats$P.Val_blood[i]<0.05 && t_stats$FDR_brain[i]<0.05) &&
(sign(t_stats$logFC_blood[i])==sign(t_stats$logFC_brain[i]))) {
DE<-append(DE, "Replicating genes (p<0.05 in blood, FDR<0.05 in brain)")
}
## DEG in brain
## (Note that all replicating genes of pup brain are also DEG (FDR<0.05))
else if (t_stats$FDR_brain[i]<0.05){
DE<-append(DE, "Signif in brain (FDR<0.05)")
}
## Non-significant genes
else {
DE<-append(DE, "ns genes")
}
}
## Adult brain genes
else {
## Adult brain genes (p<0.05) that replicate in blood (p<0.05)
if ((t_stats$P.Val_blood[i]<0.05 && t_stats$P.Val_brain[i]<0.05) &&
(sign(t_stats$logFC_blood[i])==sign(t_stats$logFC_brain[i]))) {
DE<-append(DE, "Replicating genes (p<0.05 in blood and brain)")
}
else {
DE<-append(DE, "ns genes")
}
}
}
t_stats$DE<- DE
t_stats$DE <- factor(t_stats$DE, levels=names(cols))
## Correlation coeff between t-stats of blood vs brain genes
rho <- cor(t_stats$t_blood, t_stats$t_brain, method = "spearman")
rho_anno = paste0("rho = ", format(round(rho, 2), nsmall = 2))
## Add labels of interesting genes
## 3 most significant replicating genes in blood
rep_genes <- t_stats[which(t_stats$DE==names(alphas)[1]),]
top_rep_genes <- rep_genes[order(rep_genes$FDR_blood),"gene_symbol"][1:3]
label <- vector()
for (i in 1:dim(t_stats)[1]){
## Labels of the top 3 replicating genes
if (t_stats$gene_symbol[i] %in% top_rep_genes){
label <- append(label, t_stats$gene_symbol[i])
}
else{
label <- append(label, NA)
}
}
t_stats$label <- label
## Plot
plot <- ggplot(t_stats, aes(x = t_brain, y = t_blood, color=DE, alpha=DE, label=label)) +
geom_point(size = 1.5) +
labs(x = paste("t-stats in", capitalize(expt_mouse), substr(age_mouse, 1, nchar(age_mouse)-1), "brain"),
y = "t-stats in Smoking adult blood",
title = paste(capitalize(expt_mouse), "mouse brain vs Smoking mouse blood", sep=" "),
color = 'DE/Replication',
subtitle = rho_anno,
parse = T) +
scale_color_manual(values = cols, labels=names(cols), drop = FALSE) +
scale_alpha_manual(values = alphas, labels=names(alphas), drop=FALSE) +
guides(alpha = 'none', color = guide_legend(override.aes = list(size=2))) +
geom_label_repel(aes(fontface = 'bold'), fill='white', color='black',
size=3.3,
max.overlaps = 15,
min.segment.length = unit(0, "cm"),
point.padding = unit(0.1, "cm"),
box.padding = 0.2,
label.padding = 0.2,
label.size = 0.2,
nudge_y=0.9,
show.legend=FALSE) +
theme_bw() +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size=11),
legend.title = element_text(size=12))
plot
ggsave(filename=paste("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_replication_", capitalize(expt_mouse), "_", substr(age_mouse, 1, nchar(age_mouse)-1),
"_Brain_vs_Smoking_adult_Blood.pdf", sep=""), height = 12, width = width, units = "cm")
## Quantify the number of brain genes that replicate in blood
## Total unique pup brain DEG (FDR<0.05)
total_pups_DEG=length(unique(t_stats[which(t_stats$FDR_brain<0.05), "ensemblID"]))
## Total unique adult brain genes with p<0.05
total_adults_P_val_genes=length(unique(t_stats[which(t_stats$P.Val_brain<0.05), "ensemblID"]))
## Unique replicating genes
rep_genes=length(unique(t_stats[which(t_stats$DE==names(alphas)[1]),"ensemblID"]))
if (age_mouse=="pups"){
## Percentage
percentage=signif(rep_genes / total_pups_DEG *100, 3)
print(paste(rep_genes, "out of", total_pups_DEG, "DEG in", expt_mouse, "pup brain (FDR<0.05) replicate in smoking adult blood (with p<0.05 and same logFC direction) -",
paste(percentage, "%", sep="")))
}
else {
## Percentage
percentage=signif(rep_genes / total_adults_P_val_genes *100, 3)
print(paste(rep_genes, "out of", total_adults_P_val_genes, "genes in", expt_mouse, "adult brain (p<0.05) replicate in smoking adult blood (also p<0.05 and same logFC direction) -",
paste(percentage, "%", sep="")))
}
}
# ----------------------------------------------------------------------------
## Compare blood genes vs brain txs
else {
top_tx_brain <-eval(parse_expr(paste("top_tx", substr(expt_mouse, 1, 3), sep="_")))
## Common genes in gene and tx datasets
tx_genes<-unique(top_tx_brain$ensembl_id)
common_tx_genes<-tx_genes[which(tx_genes %in% top_genes_blood$gencodeID)]
## Txs info
t_stats<-top_tx_brain[which(top_tx_brain$ensembl_id %in% common_tx_genes), c("transcript_id", "P.Value", "adj.P.Val", "Symbol",
"t", "ensembl_id", "logFC")]
colnames(t_stats)[c(2,3,5,7)] <- paste(colnames(t_stats[c(2,3,5,7)]), "tx", sep="_")
## Add t-stats and FDRs of the transcripts' genes
t_tx_genes<-vector()
P_val_tx_genes<-vector()
logFC_tx_genes<-vector()
for (i in 1:dim(t_stats)[1]){
t<-top_genes_blood[which(top_genes_blood$gencodeID==t_stats$ensembl_id[i]), "t"]
P_val<-top_genes_blood[which(top_genes_blood$gencodeID==t_stats$ensembl_id[i]), "P.Value"]
logFC<-top_genes_blood[which(top_genes_blood$gencodeID==t_stats$ensembl_id[i]), "logFC"]
t_tx_genes<-append(t_tx_genes, t)
P_val_tx_genes<-append(P_val_tx_genes, P_val)
logFC_tx_genes<-append(logFC_tx_genes, logFC)
}
t_stats$t_gene<-t_tx_genes
t_stats$P.Val_gene<-P_val_tx_genes
t_stats$logFC_gene<-logFC_tx_genes
## Colors and alphas for plot
cols <- c("deeppink3", "thistle3", "darkgrey")
names(cols)<-c("Replicating txs (blood gene with p<0.05, brain tx with FDR<0.05)", "Signif txs in brain (FDR<0.05)", "ns txs")
alphas <- c(1, 1, 0.3)
names(alphas)<-c("Replicating txs (blood gene with p<0.05, brain tx with FDR<0.05)", "Signif txs in brain (FDR<0.05)", "ns txs")
## Add DE and replication info for each tx
DE <-vector()
for (i in 1:dim(t_stats)[1]) {
## Pup brain txs (FDR<0.05) that replicate in mouse blood genes (p<0.05)
if ((t_stats$P.Val_gene[i]<0.05 && t_stats$adj.P.Val_tx[i]<0.05) &&
(sign(t_stats$logFC_tx[i])==sign(t_stats$logFC_gene[i]))) {
DE<-append(DE, "Replicating txs (blood gene with p<0.05, brain tx with FDR<0.05)")
}
## DE txs in brain
## (Note that all replicating txs of pup brain are also significant (FDR<0.05))
else if (t_stats$adj.P.Val_tx[i]<0.05){
DE<-append(DE, "Signif txs in brain (FDR<0.05)")
}
## Non-significant txs
else {
DE<-append(DE, "ns txs")
}
}
t_stats$DE<- DE
t_stats$DE <- factor(t_stats$DE, levels=names(cols))
## Correlation coeff between t-stats of blood genes vs brain txs
rho <- cor(t_stats$t_gene, t_stats$t_tx, method = "spearman")
rho_anno = paste0("rho = ", format(round(rho, 2), nsmall = 2))
## Add labels of interesting txs and their genes
## 3 most significant replicating txs
rep_txs <- t_stats[which(t_stats$DE==names(alphas)[1]),]
top_rep_txs <- rep_txs[order(rep_txs$adj.P.Val_tx), "transcript_id"][1:3]
label <- vector()
for (i in 1:dim(t_stats)[1]){
## Labels of the top 3 replicating txs
if (t_stats$transcript_id[i] %in% top_rep_txs) {
label <- append(label, paste(t_stats$Symbol[i], t_stats$transcript_id[i], sep="-"))
}
else{
label <- append(label, NA)
}
}
t_stats$label <- label
## Plot
plot <- ggplot(t_stats, aes(x = t_tx, y = t_gene, color=DE, alpha=DE, label=label)) +
geom_point(size = 1.5) +
labs(x = paste("t-stats of txs from", capitalize(expt_mouse), "pup brain"),
y = "t-stats of genes from Smoking adult blood",
title = paste(capitalize(expt_mouse), "mouse brain vs Smoking mouse blood", sep=" "),
subtitle = rho_anno,
color = 'DE/Replication',
parse = T) +
scale_color_manual(values = cols, labels=names(cols), drop = FALSE) +
scale_alpha_manual(values = alphas, labels=names(alphas), drop=FALSE) +
guides(alpha = 'none', color = guide_legend(override.aes = list(size=2))) +
geom_label_repel(aes(fontface = 'bold'), fill='white', color='black',
size=2,
max.overlaps = 3,
min.segment.length = unit(0, "cm"),
point.padding = unit(0.1, "cm"),
box.padding = 0.2,
label.padding = 0.2,
label.size = 0.2,
nudge_y=0.9,
show.legend=FALSE) +
theme_bw() +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size=11),
legend.title = element_text(size=12))
plot
ggsave(filename=paste("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_replication_", capitalize(substr(expt_mouse, 1, 3)),
"PupBrain_Tx_vs_SmoAdultBlood_Genes.pdf", sep=""), height = 12, width = width, units = "cm")
## Quantify the number of brain txs that replicate in blood
## Total unique brain DE txs
total_pups_DEtxs=length(top_tx_brain[which(top_tx_brain$adj.P.Val<0.05), "transcript_id"])
## Unique replicating txs
rep_txs=length(unique(t_stats[which(t_stats$DE==names(alphas)[1]),"transcript_id"]))
## Percentage
percentage=signif(rep_txs / total_pups_DEtxs *100, 3)
print(paste(rep_txs, "out of", total_pups_DEtxs, "DE txs in", expt_mouse,
"pup brain (FDR<0.05) replicate in smoking adult blood genes (with p<0.05 and same logFC direction) -", paste(percentage, "%", sep="")))
}
return(t_stats)
}
################### 1.2.1 Brain genes vs blood genes ###################
####### Smoking adult blood vs Smoking adult brain #######
Blood_vs_SmoAdultBrain_data <- t_stat_plot_brain_blood_replication(age_mouse = "adults", expt_mouse = "smoking", feature = "genes")
## "37 out of 772 genes in smoking adult brain (p<0.05) replicate in smoking adult blood (also p<0.05 and same logFC direction) - 4.79%"
save(Blood_vs_SmoAdultBrain_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_SmoAdultBrain_data.Rdata")
## Add replication info to top_table data
top_genes_adults_smoking_fitted$replication_in_blood <- Blood_vs_SmoAdultBrain_data$DE
save(top_genes_adults_smoking_fitted, file="processed-data/04_DEA/Gene_analysis/top_genes_adults_smoking_fitted.Rdata")
####### Smoking adult blood vs Nicotine adult brain #######
Blood_vs_NicAdultBrain_data <- t_stat_plot_brain_blood_replication(age_mouse = "adults", expt_mouse = "nicotine", feature = "genes")
## "33 out of 679 genes in nicotine adult brain (p<0.05) replicate in smoking adult blood (also p<0.05 and same logFC direction) - 4.86%"
save(Blood_vs_NicAdultBrain_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_NicAdultBrain_data.Rdata")
top_genes_adults_nicotine_fitted$replication_in_blood <- Blood_vs_NicAdultBrain_data$DE
save(top_genes_adults_nicotine_fitted, file="processed-data/04_DEA/Gene_analysis/top_genes_adults_nicotine_fitted.Rdata")
####### Smoking adult blood vs Smoking pup brain #######
Blood_vs_SmoPupBrain_data <- t_stat_plot_brain_blood_replication(age_mouse = "pups", expt_mouse = "smoking", feature = "genes")
## "126 out of 4165 DEG in smoking pup brain (FDR<0.05) replicate in smoking adult blood (with p<0.05 and same logFC direction) - 3.03%"
save(Blood_vs_SmoPupBrain_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_SmoPupBrain_data.Rdata")
top_genes_pups_smoking_fitted$replication_in_blood <- Blood_vs_SmoPupBrain_data$DE
save(top_genes_pups_smoking_fitted, file="processed-data/04_DEA/Gene_analysis/top_genes_pups_smoking_fitted.Rdata")
####### Smoking adult blood vs Nicotine pup brain #######
Blood_vs_NicPupBrain_data <- t_stat_plot_brain_blood_replication(age_mouse = "pups", expt_mouse = "nicotine", feature = "genes")
## "15 out of 1010 DEG in nicotine pup brain (FDR<0.05) replicate in smoking adult blood (with p<0.05 and same logFC direction) - 1.49%"
save(Blood_vs_NicPupBrain_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_NicPupBrain_data.Rdata")
top_genes_pups_nicotine_fitted$replication_in_blood <- Blood_vs_NicPupBrain_data$DE
save(top_genes_pups_nicotine_fitted, file="processed-data/04_DEA/Gene_analysis/top_genes_pups_nicotine_fitted.Rdata")
################### 1.2.2 Brain txs vs blood genes ###################
####### Smoking adult blood (genes) vs Smoking pup brain Txs #######
Blood_vs_SmoPupBrainTx_data <- t_stat_plot_brain_blood_replication(age_mouse = 'pups', expt_mouse = "smoking", feature = "txs")
## "112 out of 4059 DE txs in smoking pup brain (FDR<0.05) replicate in smoking adult blood genes (with p<0.05 and same logFC direction) - 2.76%"
save(Blood_vs_SmoPupBrainTx_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_SmoPupBrainTx_data.Rdata")
####### Smoking adult blood (genes) vs Nicotine pup brain Txs #######
Blood_vs_NicPupBrainTx_data <- t_stat_plot_brain_blood_replication(age_mouse = 'pups', expt_mouse = "nicotine", feature = "txs")
## "9 out of 232 DE txs in nicotine pup brain (FDR<0.05) replicate in smoking adult blood genes (with p<0.05 and same logFC direction) - 3.88%"
save(Blood_vs_NicPupBrainTx_data, file="processed-data/04_DEA/Gene_analysis/Blood_vs_NicPupBrainTx_data.Rdata")
## (See code below to search for mouse blood genes that replicate in human brain and human brain genes that replicate in mouse blood)
######################################################
# 2. Compare experiments - Smoking vs Nicotine genes
######################################################
####### Smoking adults vs nicotine adults #######
top_genes_pairs<-list(list(top_genes_adults_smoking_naive, top_genes_adults_nicotine_naive),
list(top_genes_adults_smoking_fitted, top_genes_adults_nicotine_fitted),
list(top_genes_adults_smoking_interaction, top_genes_adults_nicotine_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Smoking adults", "Nicotine adults", models)
####### Smoking pups vs nicotine pups #######
top_genes_pairs<-list(list(top_genes_pups_smoking_naive, top_genes_pups_nicotine_naive),
list(top_genes_pups_smoking_fitted, top_genes_pups_nicotine_fitted),
list(top_genes_pups_smoking_interaction, top_genes_pups_nicotine_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Smoking pups", "Nicotine pups", models)
########################################
# 3. Compare ages - Pup vs adult genes
########################################
####### Smoking pups vs smoking adults #######
top_genes_pairs<-list(list(top_genes_pups_smoking_naive, top_genes_adults_smoking_naive),
list(top_genes_pups_smoking_fitted, top_genes_adults_smoking_fitted),
list(top_genes_pups_smoking_interaction, top_genes_adults_smoking_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Smoking pups", "Smoking adults", models)
####### Nicotine pups vs nicotine adults #######
top_genes_pairs<-list(list(top_genes_pups_nicotine_naive, top_genes_adults_nicotine_naive),
list(top_genes_pups_nicotine_fitted, top_genes_adults_nicotine_fitted),
list(top_genes_pups_nicotine_interaction, top_genes_adults_nicotine_interaction))
models<-c("Naive model", "Fitted model", "Interaction model")
tstats_plots(top_genes_pairs, "Nicotine pups", "Nicotine adults", models)
####################################################
# 4. Compare models - Naive vs fitted model genes
####################################################
####### Naive vs fitted models for nicotine pups #######
t<-t_stat_plot(top_genes_pups_nicotine_naive, top_genes_pups_nicotine_fitted,
"Naive model", "Fitted model", "Nicotine pups")
ggsave("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_Naive_VS_Fitted_Nicotine.pdf", t,
height = 10, width = 12, units = "cm")
####### Naive vs fitted models for smoking pups #######
t<-t_stat_plot(top_genes_pups_smoking_naive, top_genes_pups_smoking_fitted,
"Naive model", "Fitted model", "Smoking pups")
ggsave("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_Naive_VS_Fitted_Smoking.pdf", t,
height = 10, width = 12, units = "cm")
################################################################################
## 5. Compare human brain vs mouse brain/blood genes
################################################################################
## Samples from prenatal and adult human brain were exposed to smoking
## Compare mouse genes from fitted models only
################### 5.1 Mouse genes that replicate in human ####################
## Genes in prenatal and adult human brain are the same
setdiff(rownames(fetalGene), rownames(adultGene))
# character(0)
## " " to NA
fetalGene[fetalGene == ""] <- NA
adultGene[adultGene == ""] <- NA
## Ensembl IDs of human genes
fetalGene$ensemblID <- rownames(fetalGene)
adultGene$ensemblID <- rownames(adultGene)
## Find the orthologous genes for human in mouse
human_mouse_ids<-biomart(genes = fetalGene$ensemblID,
mart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl",
attributes = c("mmusculus_homolog_ensembl_gene", "mmusculus_homolog_associated_gene_name"),
filters = "ensembl_gene_id")
## Common genes in human orthologs and mouse datasets
## (All mouse datasets contain the same genes in the same order)
common_genes <- human_mouse_ids[which(human_mouse_ids$mmusculus_homolog_ensembl_gene %in% top_genes_pups_nicotine_fitted$ensemblID),]
common_genes$human_ensembl_gene_id <- common_genes$ensembl_gene_id
common_genes$ensembl_gene_id <- NULL
## Create plots to check if mouse genes replicate (FDR<5% for pups and p-value<5% for adults) in human brain (with p-value<5% and same logFC sign)
t_stat_plot_mouse_in_human <- function(age_mouse, expt_mouse, tissue_mouse, age_human){
## Define mouse dataset
if (tissue_mouse=="blood"){
top_genes <-eval(parse_expr(paste("top_genes", tissue_mouse, expt_mouse, "fitted", sep="_")))
signif_measure_mouse="p-value"
}
else {
top_genes <-eval(parse_expr(paste("top_genes", age_mouse, expt_mouse, "fitted", sep="_")))
if (age_mouse=="pups"){
signif_measure_mouse="FDR"
}
else {
signif_measure_mouse="p-value"
}
}
## Define human dataset
if (age_human=="prenatal"){
humanGene <-fetalGene
}
else {
humanGene <-adultGene
}
## Extract mouse and human data of those common genes
human_mouse_data <- data.frame(matrix(nrow = nrow(common_genes), ncol = 12))
colnames(human_mouse_data) <- c("mmusculus_homolog_ensembl_gene", "mmusculus_homolog_associated_gene_name", "human_ensembl_gene_id",
"t_mouse", "adj.P.Val_mouse", "P.Value_mouse", "logFC_mouse", "gene_symbol_human", "t_human", "adj.P.Val_human",
"P.Value_human", "logFC_human")
for (i in 1:nrow(common_genes)){
## Find and extract info of mouse gene in mouse dataset
mouse_data <-top_genes[which(top_genes$ensemblID==common_genes[i,1]), c("t", "adj.P.Val", "P.Value", "logFC")]
## Find and extract info of human gene in human dataset
human_data <-humanGene[which(humanGene$ensemblID==common_genes[i, 3]), c("Symbol", "t", "adj.P.Val", "P.Value", "logFC")]
human_mouse_data[i,] <- cbind(common_genes[i,], mouse_data, human_data)
}
## Width of plot
if (age_mouse == "pups"){
width=23
}
else{
width=21.5
}
## Colors and alphas for plot
if (age_mouse=="pups"){
cols <- c('blue', "deeppink3", "thistle3", "yellow3", "darkgrey")
names(cols) <- c("Signif in both", "Replicating genes (p<0.05 in human, FDR<0.05 in mouse)", "Signif in mouse (FDR<0.05)", "Signif in human (FDR<0.1)", "ns genes")
alphas <- c(1, 1, 1, 1, 0.5)
names(alphas) <- names(cols)
}
else {
cols <- c("blue", "deeppink3", "yellow3", "darkgrey")
names(cols) <- c("Signif in both", "Replicating genes (p<0.05 in human and mouse)", "Signif in human (FDR<0.1)", "ns genes")
alphas <- c(1, 1, 1, 0.5)
names(alphas) <- names(cols)
}
## Add DE and replication info of each gene
DE<-vector()
for (i in 1:dim(human_mouse_data)[1]) {
## Pup mouse genes with FDR<5% and human genes with p-value<5%
if (age_mouse=="pups"){
## DEG in human (FDR<0.1) and mouse (FDR<0.05)
if(human_mouse_data$adj.P.Val_human[i]<0.1 && human_mouse_data$adj.P.Val_mouse[i]<0.05) {
DE<-append(DE, "Signif in both")
}
## DEG in human
else if (human_mouse_data$adj.P.Val_human[i]<0.1){
DE<-append(DE, "Signif in human (FDR<0.1)")
}
## Replicating mouse genes
else if ((human_mouse_data$adj.P.Val_mouse[i]<0.05 && human_mouse_data$P.Value_human[i]<0.05) &&
(sign(human_mouse_data$logFC_mouse[i])==sign(human_mouse_data$logFC_human[i]))) {
DE<-append(DE, "Replicating genes (p<0.05 in human, FDR<0.05 in mouse)")
}
## DEG in mouse
else if (human_mouse_data$adj.P.Val_mouse[i]<0.05){
DE<-append(DE, "Signif in mouse (FDR<0.05)")
}
## Non-significant genes
else {
DE<-append(DE, "ns genes")
}
}
## Adult mouse genes with p-value<5% and human genes with p-value<5%
else {
## DEG in human (FDR<0.1) and mouse (FDR<0.05) <- there weren't DEG in adult mice
if(human_mouse_data$adj.P.Val_human[i]<0.1 && human_mouse_data$adj.P.Val_mouse[i]<0.05) {
DE<-append(DE, "Signif in both")
}
else if (human_mouse_data$adj.P.Val_human[i]<0.1){
DE<-append(DE, "Signif in human (FDR<0.1)")
}
else if ((human_mouse_data$P.Value_mouse[i]<0.05 && human_mouse_data$P.Value_human[i]<0.05) &&
(sign(human_mouse_data$logFC_mouse[i])==sign(human_mouse_data$logFC_human[i]))) {
DE<-append(DE, "Replicating genes (p<0.05 in human and mouse)")
}
else {
DE<-append(DE, "ns genes")
}
}
}
human_mouse_data$DE<- DE
human_mouse_data$DE <- factor(human_mouse_data$DE, levels=names(cols))
## Correlation coeff between t-stats of genes in human and mouse
rho <- cor(human_mouse_data$t_human, human_mouse_data$t_mouse, method = "spearman")
rho_anno = paste0("rho = ", format(round(rho, 2), nsmall = 2))
## Add labels of interesting genes
## 3 most significant replicating genes in human
rep_genes <- human_mouse_data[which(human_mouse_data$DE==names(alphas)[2]),]
top_rep_genes <- rep_genes[order(rep_genes$adj.P.Val_human),"gene_symbol_human"][1:3]
label <- vector()
for (i in 1:dim(human_mouse_data)[1]){
## DEG in both human and mouse
if (human_mouse_data$DE[i]=="Signif in both"){
## Label of the form: [gene name in mouse]-[gene name in human]
label <- append(label, paste(human_mouse_data$mmusculus_homolog_associated_gene_name[i], "-", human_mouse_data$gene_symbol_human[i], sep=""))
}
## Labels of the top 3 replicating genes
else if (human_mouse_data$gene_symbol_human[i] %in% top_rep_genes){
label <- append(label, paste(human_mouse_data$mmusculus_homolog_associated_gene_name[i], "-", human_mouse_data$gene_symbol_human[i], sep=""))
}
else{
label <- append(label, NA)
}
}
human_mouse_data$label <- label
## Plot
plot <- ggplot(human_mouse_data, aes(x = t_mouse, y = t_human, color=DE, alpha=DE, label=label)) +
geom_point(size = 1.5) +
geom_label_repel(aes(fontface = 'bold'), fill='white', color='black',
size=3.3,
max.overlaps = 15,
min.segment.length = unit(0, "cm"),
point.padding = unit(0.1, "cm"),
box.padding = 0.2,
label.padding = 0.2,
label.size = 0.2,
nudge_y=0.9,
show.legend=FALSE) +
scale_color_manual(values = cols, labels=names(cols), drop = FALSE) +
scale_alpha_manual(values = alphas, labels=names(alphas), drop=FALSE) +
labs(x = paste("t-stats in", substr(age_mouse, 1, nchar(age_mouse)-1), "mouse", tissue_mouse),
y = paste("t-stats in", age_human, "human brain"),
title = paste(capitalize(expt_mouse),"mouse vs Smoking human", sep=" "),
subtitle = rho_anno,
color='DE/Replication',
parse = T) +
guides(alpha = 'none', color = guide_legend(override.aes = list(size=2))) +
theme_bw() +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size=11),
legend.title = element_text(size=12))
plot
ggsave(filename=paste("plots/04_DEA/02_Comparisons/Gene_analysis/t_stats_", age_human, "_Human_vs_Mouse_", age_mouse, "_", substr(expt_mouse,1,3), "_",
tissue_mouse, ".pdf", sep=""), height = 12, width = width, units = "cm")
## Quantify mouse genes that replicate in human
## Total unique DEG in pup brain (FDR<0.05)
total_pups_DEG=length(unique(top_genes[which(top_genes$adj.P.Val<0.05),"ensemblID"]))
## Total unique adult brain genes with p<0.05
total_adults_P_val_genes=length(unique(top_genes[which(top_genes$P.Val<0.05),"ensemblID"]))
## Unique replicating genes
rep_genes=length(unique(human_mouse_data[which(human_mouse_data$DE==names(alphas)[2]),"mmusculus_homolog_ensembl_gene"]))
if (age_mouse=="pups"){
## Percentage
percentage=signif(rep_genes / total_pups_DEG *100, 3)
print(paste(rep_genes, "out of", total_pups_DEG, "DEG in", expt_mouse, "mouse pup", tissue_mouse, "(FDR<0.05) replicate in smoking human", age_human,
"brain (with p<0.05 and same logFC direction) -", paste(percentage, "%", sep="")))
}
else {
## Percentage
percentage=signif(rep_genes / total_adults_P_val_genes *100, 3)
print(paste(rep_genes, "out of", total_adults_P_val_genes, "genes in", expt_mouse, "adult mouse", tissue_mouse, "(p<0.05) replicate in smoking human",
age_human, "brain (also p<0.05 and same logFC direction) -", paste(percentage, "%", sep="")))
}
return(human_mouse_data)
}
################################################################
## Nicotine mouse pup brain vs Smoking human prenatal brain
################################################################
prenatalHuman_pupNicMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "pups", tissue_mouse = "brain", expt_mouse = "nicotine", age_human = "prenatal")
## "78 out of 1010 DEG in nicotine mouse pup brain (FDR<0.05) replicate in smoking human prenatal brain (with p<0.05 and same logFC direction) - 7.72%"
save(prenatalHuman_pupNicMouse_data, file="processed-data/04_DEA/Gene_analysis/prenatalHuman_pupNicMouse_data.Rdata")
################################################################
## Smoking mouse pup brain vs Smoking human prenatal brain
################################################################
prenatalHuman_pupSmoMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "pups", tissue_mouse = "brain", expt_mouse = "smoking", age_human = "prenatal")
## "267 out of 4165 DEG in smoking mouse pup brain (FDR<0.05) replicate in smoking human prenatal brain (with p<0.05 and same logFC direction) - 6.41%"
save(prenatalHuman_pupSmoMouse_data, file="processed-data/04_DEA/Gene_analysis/prenatalHuman_pupSmoMouse_data.Rdata")
################################################################
## Nicotine adult mouse brain vs Smoking human prenatal brain
################################################################
prenatalHuman_adultNicMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "brain", expt_mouse = "nicotine", age_human = "prenatal")
## "29 out of 679 genes in nicotine adult mouse brain (p<0.05) replicate in smoking human prenatal brain (also p<0.05 and same logFC direction) - 4.27%"
save(prenatalHuman_adultNicMouse_data, file="processed-data/04_DEA/Gene_analysis/prenatalHuman_adultNicMouse_data.Rdata")
################################################################
## Smoking adult mouse brain vs Smoking human prenatal brain
################################################################
prenatalHuman_adultSmoMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "brain", expt_mouse = "smoking", age_human = "prenatal")
## "40 out of 772 genes in smoking adult mouse brain (p<0.05) replicate in smoking human prenatal brain (also p<0.05 and same logFC direction) - 5.18%"
save(prenatalHuman_adultSmoMouse_data, file="processed-data/04_DEA/Gene_analysis/prenatalHuman_adultSmoMouse_data.Rdata")
################################################################
## Smoking adult mouse blood vs Smoking human prenatal brain
################################################################
prenatalHuman_bloodMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "blood", expt_mouse = "smoking", age_human = "prenatal")
## "100 out of 1499 genes in smoking adult mouse blood (p<0.05) replicate in smoking human prenatal brain (also p<0.05 and same logFC direction) - 6.67%"
save(prenatalHuman_bloodMouse_data, file="processed-data/04_DEA/Gene_analysis/prenatalHuman_bloodMouse_data.Rdata")
################################################################
## Nicotine mouse pup brain vs Smoking human adult brain
################################################################
adultHuman_pupNicMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "pups", tissue_mouse = "brain", expt_mouse = "nicotine", age_human = "adult")
## "18 out of 1010 DEG in nicotine mouse pup brain (FDR<0.05) replicate in smoking human adult brain (with p<0.05 and same logFC direction) - 1.78%"
save(adultHuman_pupNicMouse_data, file="processed-data/04_DEA/Gene_analysis/adultHuman_pupNicMouse_data.Rdata")
################################################################
## Smoking mouse pup brain vs Smoking human adult brain
################################################################
adultHuman_pupSmoMouse_data<- t_stat_plot_mouse_in_human(age_mouse = "pups", tissue_mouse = "brain", expt_mouse = "smoking", age_human = "adult")
## "73 out of 4165 DEG in smoking mouse pup brain (FDR<0.05) replicate in smoking human adult brain (with p<0.05 and same logFC direction) - 1.75%"
save(adultHuman_pupSmoMouse_data, file="processed-data/04_DEA/Gene_analysis/adultHuman_pupSmoMouse_data.Rdata")
################################################################
## Nicotine adult mouse brain vs Smoking human adult brain
################################################################
adultHuman_adultNicMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "brain", expt_mouse = "nicotine", age_human = "adult")
## "13 out of 679 genes in nicotine adult mouse brain (p<0.05) replicate in smoking human adult brain (also p<0.05 and same logFC direction) - 1.91%"
save(adultHuman_adultNicMouse_data, file="processed-data/04_DEA/Gene_analysis/adultHuman_adultNicMouse_data.Rdata")
################################################################
## Smoking adult mouse brain vs Smoking human adult brain
################################################################
adultHuman_adultSmoMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "brain", expt_mouse = "smoking", age_human = "adult")
## "9 out of 772 genes in smoking adult mouse brain (p<0.05) replicate in smoking human adult brain (also p<0.05 and same logFC direction) - 1.17%"
save(adultHuman_adultSmoMouse_data, file="processed-data/04_DEA/Gene_analysis/adultHuman_adultSmoMouse_data.Rdata")
################################################################
## Smoking adult mouse blood vs Smoking human adult brain
################################################################
adultHuman_bloodMouse_data <- t_stat_plot_mouse_in_human(age_mouse = "adults", tissue_mouse = "blood", expt_mouse = "smoking", age_human = "adult")
## "16 out of 1499 genes in smoking adult mouse blood (p<0.05) replicate in smoking human adult brain (also p<0.05 and same logFC direction) - 1.07%"
save(adultHuman_bloodMouse_data, file="processed-data/04_DEA/Gene_analysis/adultHuman_bloodMouse_data.Rdata")
## Condensate results in human and mice
human_and_mice_DGE_results <- cbind(prenatalHuman_pupNicMouse_data[,c("mmusculus_homolog_ensembl_gene", "mmusculus_homolog_associated_gene_name",
"human_ensembl_gene_id", "gene_symbol_human",
"logFC_human", "t_human", "P.Value_human", "adj.P.Val_human")],
adultHuman_bloodMouse_data[, c("logFC_human", "t_human", "P.Value_human", "adj.P.Val_human")],
prenatalHuman_bloodMouse_data[,c("logFC_mouse", "t_mouse", "P.Value_mouse", "adj.P.Val_mouse")],
prenatalHuman_adultNicMouse_data[,c("logFC_mouse", "t_mouse", "P.Value_mouse", "adj.P.Val_mouse")],
prenatalHuman_adultSmoMouse_data[,c("logFC_mouse", "t_mouse", "P.Value_mouse", "adj.P.Val_mouse")],
prenatalHuman_pupNicMouse_data[,c("logFC_mouse", "t_mouse", "P.Value_mouse", "adj.P.Val_mouse")],
prenatalHuman_pupSmoMouse_data[,c("logFC_mouse", "t_mouse", "P.Value_mouse", "adj.P.Val_mouse")])
colnames(human_and_mice_DGE_results)[5:32] <- c(paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_human_prenatal_brain_smoking'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_human_adult_brain_smoking'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_mouse_blood_adult_smoking'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_mouse_brain_adult_nicotine'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_mouse_brain_adult_smoking'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_mouse_brain_pup_nicotine'),
paste0(c("logFC", "t", "P.Value", 'adj.P.Val'), '_mouse_brain_pup_smoking'))
save(human_and_mice_DGE_results, file="processed-data/04_DEA/Gene_analysis/human_and_mice_DGE_results.Rdata")
write.table(human_and_mice_DGE_results, file="processed-data/04_DEA/Gene_analysis/human_and_mice_DGE_results.csv", row.names = FALSE, col.names = TRUE, sep = '\t')
################### 5.2 Human genes that replicate in mouse ####################
## Obtain human brain genes that replicate (FDR<10%) in mouse blood or brain (with p-value<5% and same logFC sign)
replication_human_in_mouse<- function(age_mouse, expt_mouse, tissue_mouse, age_human){
## Define mouse dataset
if (tissue_mouse=="blood"){
top_genes <-eval(parse_expr(paste("top_genes", tissue_mouse, expt_mouse, "fitted", sep="_")))
signif_measure_mouse="p-value"
}
else {
top_genes <-eval(parse_expr(paste("top_genes", age_mouse, expt_mouse, "fitted", sep="_")))
}
## Define human dataset
if (age_human=="prenatal"){
humanGene <-fetalGene
}
else {
humanGene <-adultGene
}
## Extract human DEG (with FDR<0.1)
de_genes_human <- humanGene[which(humanGene$adj.P.Val<0.1),]
colnames(de_genes_human) <- paste(colnames(de_genes_human), "human", sep="_")
## Add the IDs of the orthologous genes in mouse
mouse_ensemblIDs <- vector()
mouse_gene_names <- vector()
for (i in 1:dim(de_genes_human)[1]){
mouse_ensemblID<- common_genes[which(common_genes$human_ensembl_gene_id==de_genes_human$ensemblID_human[i]),"mmusculus_homolog_ensembl_gene"]
mouse_gene_name<- common_genes[which(common_genes$human_ensembl_gene_id==de_genes_human$ensemblID_human[i]),"mmusculus_homolog_associated_gene_name"]
mouse_ensemblIDs <- append(mouse_ensemblIDs, mouse_ensemblID)
mouse_gene_names <- append(mouse_gene_names, mouse_gene_name)
}
de_genes_human <- cbind(de_genes_human, "ensemblID_mouse"=mouse_ensemblIDs, "gene_name_mouse"=mouse_gene_names)
## Mouse data of human DEG orthologs
mouse_data <- data.frame(matrix(ncol = 4, nrow = nrow(de_genes_human)))
colnames(mouse_data) <- c("t_mouse", "adj.P.Val_mouse", "P.Value_mouse", "logFC_mouse")
for (i in 1:nrow(de_genes_human)){
## Extract info of mouse genes
mouse_data[i,] <-top_genes[which(top_genes$ensemblID==de_genes_human[i,"ensemblID_mouse"]), c("t", "adj.P.Val", "P.Value", "logFC")]
}
human_mouse_data <- cbind(de_genes_human, mouse_data)
## Add replication info of each gene
replication<-vector()
for (i in 1:dim(human_mouse_data)[1]) {
## Human genes with FDR<10% and mouse genes with p-value<5%
if (human_mouse_data$P.Value_mouse[i]<0.05 && (sign(human_mouse_data$logFC_mouse[i])==sign(human_mouse_data$logFC_human[i]))) {
replication<-append(replication, "Replicating gene (FDR<0.1 in human, p<0.05 in mouse)")
}
## Non-replicating genes
else {
replication<-append(replication, "Non-replicating gene")
}