From 15a93e149695f7937c23dc92c005f92e0bee142f Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 7 May 2024 00:53:28 +0000 Subject: [PATCH] differences for PR #8 --- episode2.md | 10 +++++----- episode3.md | 20 ++++++++++---------- episode4.md | 32 ++++++++++++++++---------------- episode5.md | 46 +++++++++++++++++++++++++++------------------- episode6.md | 2 +- md5sum.txt | 2 +- 6 files changed, 60 insertions(+), 52 deletions(-) diff --git a/episode2.md b/episode2.md index 95e3b77..752b5ef 100644 --- a/episode2.md +++ b/episode2.md @@ -121,7 +121,7 @@ raw.counts.ibd <- read.table(file="data/E-MTAB-11349.counts.matrix.csv", writeLines(sprintf("%i %s", c(dim(raw.counts.ibd)[1], dim(raw.counts.ibd)[2]), c("rows corresponding to transcript IDs", "columns corresponding to samples"))) ``` -```{.output} +```output 22751 rows corresponding to transcript IDs 592 columns corresponding to samples ``` @@ -140,7 +140,7 @@ View a small subset of the data, (e.g. first ten rows and 8 columns) to see how raw.counts.ibd[1:10,1:8] ``` -```{.output} +```output read Sample 1 Sample 2 Sample 3 Sample 4 Sample 5 Sample 6 1 1 * 13961 16595 20722 17696 25703 20848 2 2 ERCC-00002 0 0 0 0 0 0 @@ -171,7 +171,7 @@ samp.info.ibd <- read.table(file="data/E-MTAB-11349.sdrf.txt", sep="\t", header= sprintf("There are %i rows, corresponding to the samples", dim(samp.info.ibd)[1]) ``` -```{.output} +```output [1] "There are 590 rows, corresponding to the samples" ``` @@ -179,7 +179,7 @@ sprintf("There are %i rows, corresponding to the samples", dim(samp.info.ibd)[1] sprintf("There are %i columns, corresponding to the available variables for each sample", dim(samp.info.ibd)[2]) ``` -```{.output} +```output [1] "There are 32 columns, corresponding to the available variables for each sample" ``` @@ -190,7 +190,7 @@ If we view the column names, we can see that the file does indeed contain a set colnames(samp.info.ibd) ``` -```{.output} +```output [1] "Source Name" [2] "Characteristics[organism]" [3] "Characteristics[age]" diff --git a/episode3.md b/episode3.md index 8e03fe7..48ee8dd 100644 --- a/episode3.md +++ b/episode3.md @@ -116,23 +116,23 @@ If there is more than one SOFT file for a GEO Series, `getGEO()` will return a l gse212041 <- GEOquery::getGEO("GSE212041") ``` -```{.output} +```output Setting options('download.file.method.GEOquery'='auto') ``` -```{.output} +```output Setting options('GEOquery.inmemory.gpl'=FALSE) ``` -```{.output} +```output Found 2 file(s) ``` -```{.output} +```output GSE212041-GPL18573_series_matrix.txt.gz ``` -```{.output} +```output GSE212041-GPL24676_series_matrix.txt.gz ``` @@ -141,7 +141,7 @@ GSE212041-GPL24676_series_matrix.txt.gz sprintf("Number of files downloaded: %i", length(gse212041)) ``` -```{.output} +```output [1] "Number of files downloaded: 2" ``` @@ -158,7 +158,7 @@ Write the code to check that the number of samples in each file gives us the tot writeLines(sprintf("file %i: %i samples", 1:2, c(dim(gse212041[[1]])[2], dim(gse212041[[2]])[2]))) ``` -```{.output} +```output file 1: 16 samples file 2: 765 samples ``` @@ -181,7 +181,7 @@ samp.info.cov19 <- Biobase::pData(gse212041[[2]]) colnames(samp.info.cov19) ``` -```{.output} +```output [1] "title" "geo_accession" [3] "status" "submission_date" [5] "last_update_date" "type" @@ -219,7 +219,7 @@ If we use the `exprs()` function to extract the counts data from the expression Biobase::exprs(gse212041[[2]])[,1:10] ``` -```{.output} +```output GSM6507615 GSM6507616 GSM6507617 GSM6507618 GSM6507619 GSM6507620 GSM6507621 GSM6507622 GSM6507623 GSM6507624 ``` @@ -232,7 +232,7 @@ We can verify this by looking at the dimensions of the object in the exprs slot. dim(Biobase::exprs(gse212041[[2]])) ``` -```{.output} +```output [1] 0 765 ``` diff --git a/episode4.md b/episode4.md index 69f6c54..618c54d 100644 --- a/episode4.md +++ b/episode4.md @@ -86,7 +86,7 @@ We'll now apply these steps sequentially to the sample information for the IBD d dplyr::glimpse(samp.info.ibd) ``` -```{.output} +```output Rows: 590 Columns: 32 $ `Source Name` "Sample 1", "Sample 2", "Sam… @@ -153,7 +153,7 @@ for(i in seq_along(1:ncol(samp.info.ibd))){ sprintf("The unique IDs that match the counts matrix are in column: %s", colnames(samp.info.ibd)[which(lst.colnames)]) ``` -```{.output} +```output [1] "The unique IDs that match the counts matrix are in column: Source Name" ``` @@ -196,7 +196,7 @@ samp.info.ibd.sel <- dplyr::rename(samp.info.ibd.sel, dplyr::glimpse(samp.info.ibd.sel) ``` -```{.output} +```output Rows: 590 Columns: 4 $ sampleID "Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", … @@ -225,7 +225,7 @@ samp.info.ibd.sel$condition[agrep("Crohns", samp.info.ibd.sel$condition)] <- "cr unique(c(samp.info.ibd.sel$sex, samp.info.ibd.sel$condition)) ``` -```{.output} +```output [1] "male" "female" "crohns_disease" [4] "normal" "ulcerative colitis" ``` @@ -275,7 +275,7 @@ samp.info.ibd.sel[c('sex', 'condition', 'class')] <- lapply(samp.info.ibd.sel[c( table(samp.info.ibd.sel$class) ``` -```{.output} +```output -1 1 267 323 @@ -290,7 +290,7 @@ The two classes are approximately equally represented, so let's check everything dplyr::glimpse(samp.info.ibd.sel) ``` -```{.output} +```output Rows: 590 Columns: 5 $ sampleID "Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5", … @@ -334,7 +334,7 @@ Item | Check For... | Rationale raw.counts.ibd[1:10,1:8] ``` -```{.output} +```output read Sample 1 Sample 2 Sample 3 Sample 4 Sample 5 Sample 6 1 1 * 13961 16595 20722 17696 25703 20848 2 2 ERCC-00002 0 0 0 0 0 0 @@ -371,7 +371,7 @@ counts.mat.ibd <- counts.mat.ibd %>% tibble::column_to_rownames('read') counts.mat.ibd[1:10,1:6] ``` -```{.output} +```output Sample 1 Sample 2 Sample 3 Sample 4 Sample 5 Sample 6 ERCC-00002 0 0 0 0 0 0 ERCC-00003 0 0 0 0 0 0 @@ -398,7 +398,7 @@ ERCC-00019 0 0 0 0 0 0 which(duplicated(rownames(counts.mat.ibd))) ``` -```{.output} +```output integer(0) ``` @@ -406,7 +406,7 @@ integer(0) which(duplicated(colnames(counts.mat.ibd))) ``` -```{.output} +```output integer(0) ``` @@ -419,7 +419,7 @@ integer(0) if(!identical(colnames(counts.mat.ibd), samp.info.ibd.sel$sampleID)){stop()} ``` -```{.error} +```error Error in eval(expr, envir, enclos): ``` @@ -453,7 +453,7 @@ allMissValues <- function(x){all(is.na(x) | x == "")} allMissValues(counts.mat.ibd) ``` -```{.output} +```output [1] FALSE ``` @@ -466,7 +466,7 @@ Take a final look at the cleaned up matrix. counts.mat.ibd[1:10,1:6] ``` -```{.output} +```output Sample_1 Sample_2 Sample_3 Sample_4 Sample_5 Sample_6 ERCC-00002 0 0 0 0 0 0 ERCC-00003 0 0 0 0 0 0 @@ -484,7 +484,7 @@ ERCC-00019 0 0 0 0 0 0 sprintf("There are %i rows, corresponding to the transcript IDs", dim(counts.mat.ibd)[1]) ``` -```{.output} +```output [1] "There are 22750 rows, corresponding to the transcript IDs" ``` @@ -492,7 +492,7 @@ sprintf("There are %i rows, corresponding to the transcript IDs", dim(counts.mat sprintf("There are %i columns, corresponding to the samples", dim(counts.mat.ibd)[2]) ``` -```{.output} +```output [1] "There are 590 columns, corresponding to the samples" ``` @@ -630,7 +630,7 @@ samp.info.tb %>% dplyr::glimpse() # view output ``` -```{.output} +```output Rows: 360 Columns: 3 $ sampleID "PR123_S19", "PR096_S13", "PR146_S14", "PR158_S12", "PR095… diff --git a/episode5.md b/episode5.md index 3f332dd..a0a475e 100644 --- a/episode5.md +++ b/episode5.md @@ -79,12 +79,14 @@ data.frame(max_count = apply(counts.mat.ibd, 1, max, na.rm=TRUE)) %>% ggplot2::scale_x_log10(n.breaks = 6, labels = scales::comma) ``` -```{.warning} -Warning: Transformation introduced infinite values in continuous x-axis +```warning +Warning in ggplot2::scale_x_log10(n.breaks = 6, labels = scales::comma): log-10 +transformation introduced infinite values. ``` -```{.warning} -Warning: Removed 10 rows containing non-finite values (`stat_bin()`). +```warning +Warning: Removed 10 rows containing non-finite outside the scale range +(`stat_bin()`). ``` @@ -122,7 +124,7 @@ dds.ibd <- DESeq2::DESeqDataSetFromMatrix( design = ~ condition) ``` -```{.warning} +```warning Warning: replacing previous import 'S4Arrays::makeNindexFromArrayViewport' by 'DelayedArray::makeNindexFromArrayViewport' when loading 'SummarizedExperiment' ``` @@ -184,6 +186,12 @@ ggplot2::ggplot(data=data.frame(t = t.seq, jacc = ms.jac)) + ggplot2::ylab("Multiset Jaccard Index") ``` +```warning +Warning in ggplot2::geom_point(ggplot2::aes(x = which.max(ms.jac), y = max(ms.jac)), : All aesthetics have length 1, but the data has 25 rows. +ℹ Please consider using `annotate()` or provide this layer with data containing + a single row. +``` + ::::::::::::::::::::::::::::::::::::: challenge @@ -201,7 +209,7 @@ The threshold value is given by the following code, which should return a value (t.hold <- which.max(ms.jac)) ``` -```{.output} +```output [1] 11 ``` @@ -221,7 +229,7 @@ counts.mat.ibd.filtered <- counts.mat.ibd[which(apply(counts.ibd.norm, 1, functi sprintf("Genes filtered: %s; Genes remaining: %s", nrow(counts.mat.ibd)-nrow(counts.mat.ibd.filtered), nrow(counts.mat.ibd.filtered)) ``` -```{.output} +```output [1] "Genes filtered: 3712; Genes remaining: 19038" ``` @@ -239,7 +247,7 @@ Run the following code to view the top 10 values of read counts in the raw count tail(sort(as.matrix(counts.mat.ibd)),10) ``` -```{.output} +```output [1] 2037946 2038514 2043983 2133125 2238093 2269033 2341479 2683585 3188911 [10] 3191428 ``` @@ -248,7 +256,7 @@ tail(sort(as.matrix(counts.mat.ibd)),10) sprintf("The mean read count value: %f", mean(as.matrix(counts.mat.ibd))) ``` -```{.output} +```output [1] "The mean read count value: 506.731355" ``` @@ -273,41 +281,41 @@ Run `DESeq2` differential expression analysis, which automatically calculates th deseq.ibd <- DESeq2::DESeq(dds.ibd.filt) ``` -```{.output} +```output estimating size factors ``` -```{.output} +```output estimating dispersions ``` -```{.output} +```output gene-wise dispersion estimates ``` -```{.output} +```output mean-dispersion relationship ``` -```{.output} +```output final dispersion estimates ``` -```{.output} +```output fitting model and testing ``` -```{.output} +```output -- replacing outliers and refitting for 1559 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) ``` -```{.output} +```output estimating dispersions ``` -```{.output} +```output fitting model and testing ``` @@ -335,7 +343,7 @@ counts.mat.ibd.ol.filtered <- counts.mat.ibd.filtered[which(apply(cooks.mat, 1, sprintf("Genes filtered: %s; Genes remaining: %s", nrow(counts.mat.ibd.filtered)-nrow(counts.mat.ibd.ol.filtered), nrow(counts.mat.ibd.ol.filtered)) ``` -```{.output} +```output [1] "Genes filtered: 1776; Genes remaining: 17262" ``` diff --git a/episode6.md b/episode6.md index 4c2911f..0754830 100644 --- a/episode6.md +++ b/episode6.md @@ -107,7 +107,7 @@ dds.ibd.filt.ol <- DESeq2::DESeqDataSetFromMatrix( design = ~ condition) ``` -```{.warning} +```warning Warning: replacing previous import 'S4Arrays::makeNindexFromArrayViewport' by 'DelayedArray::makeNindexFromArrayViewport' when loading 'SummarizedExperiment' ``` diff --git a/md5sum.txt b/md5sum.txt index caba57c..88da84c 100644 --- a/md5sum.txt +++ b/md5sum.txt @@ -14,4 +14,4 @@ "learners/reference.md" "1c7cc4e229304d9806a13f69ca1b8ba4" "site/built/reference.md" "2024-05-07" "learners/setup.md" "d0c4fbb2853d84c779714577c6673c4b" "site/built/setup.md" "2024-05-07" "profiles/learner-profiles.md" "60b93493cf1da06dfd63255d73854461" "site/built/learner-profiles.md" "2024-05-07" -"renv/profiles/lesson-requirements/renv.lock" "2a8891b7e68f3c2c27f200711e598f32" "site/built/renv.lock" "2024-05-07" +"renv/profiles/lesson-requirements/renv.lock" "1dd04c6d681fdaff551f9dc546e4c96a" "site/built/renv.lock" "2024-05-07"