diff --git a/analyses/01_process_counts.Rmd b/analyses/01_process_counts.Rmd
index be674b7..6b338d5 100644
--- a/analyses/01_process_counts.Rmd
+++ b/analyses/01_process_counts.Rmd
@@ -5,7 +5,7 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.10.0
+      jupytext_version: 1.11.2
   kernelspec:
     display_name: Python [conda env:.conda-vanderburg_scanpy]
     language: python
diff --git a/analyses/02_filter_data.Rmd b/analyses/02_filter_data.Rmd
index 3838eed..0502481 100644
--- a/analyses/02_filter_data.Rmd
+++ b/analyses/02_filter_data.Rmd
@@ -5,7 +5,7 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.10.0
+      jupytext_version: 1.11.2
   kernelspec:
     display_name: Python [conda env:.conda-vanderburg_scanpy]
     language: python
diff --git a/analyses/03_normalize.Rmd b/analyses/03_normalize.Rmd
index 78eb622..7fb6a14 100644
--- a/analyses/03_normalize.Rmd
+++ b/analyses/03_normalize.Rmd
@@ -5,7 +5,7 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.10.0
+      jupytext_version: 1.11.2
   kernelspec:
     display_name: Python [conda env:.conda-vanderburg_scanpy]
     language: python
@@ -25,8 +25,10 @@ In this notebook, we will
 
 ```{python tags=c("parameters")}
 input_file = "../results/02_filter_data/adata.h5ad"
+adata_unfiltered_file = "../results/01_process_data/adata.h5ad"
 tables_dir = "../tables"
 output_file = "tmp/adata.h5ad"
+output_file_stats = "tmp/quality_stats.csv"
 doublet_file = f"{tables_dir}/is_doublet.npy"
 ```
 
@@ -63,6 +65,10 @@ pca_file = os.path.join(tables_dir, "adata_pca.pkl.gz")
 adata = sc.read_h5ad(input_file)
 ```
 
+```{python}
+adata_unfiltered = sc.read_h5ad(adata_unfiltered_file)
+```
+
 ### Load doublets precomputed by solo
 We don't run `solo` as part of the pipeline, as the results
 are not reproducible on different systems. Instead, 
@@ -129,6 +135,118 @@ adata = adata[~adata.obs["is_doublet"], :].copy()
 print_dim(adata)
 ```
 
+## Summary statistics
+
+Generate a summary table with
+ * total number of sequenced reads per sample
+ * total number of uniquely mapped reads per sample
+ * total number of called cells (quality control and filtering for e.g., possible cell doublets, potential apoptotic cells)
+ * median number and range of uniquely maped reads per called cell 
+ * median number and range of detected genes per called cell
+ * median number and range of detected genes per called cell
+ 
+The first two metrics are based on the raw files (FASTQ and BAM), i.e. before UMI deduplication and read in from precomputed tables. The other columns are based on the counts produced by cellranger and computed on the anndata object. 
+ 
+
+```{python}
+sequenced_reads = pd.read_csv(
+    f"{tables_dir}/summary_fastq_counts.txt", names=["samples", "total_sequenced_reads"]
+)
+sequenced_reads["samples"] = [f"H{x}" for x in sequenced_reads["samples"]]
+sequenced_reads.set_index("samples", inplace=True)
+```
+
+```{python}
+uniquely_mapped_reads = pd.read_csv(
+    f"{tables_dir}/summary_uniquely_mapped_reads.txt",
+    names=["samples", "total_uniquely_mapped_reads"],
+)
+uniquely_mapped_reads["samples"] = [f"H{x}" for x in uniquely_mapped_reads["samples"]]
+uniquely_mapped_reads.set_index("samples", inplace=True)
+```
+
+```{python}
+called_cells = (
+    adata.obs.groupby("samples")
+    .size()
+    .reset_index(name="total_called_cells")
+    .set_index("samples")
+)
+```
+
+### Get fraction of ribosomal genes
+need to revert to unfiltered anndata object to get stats on ribosomal genes as we removed them earlier. 
+The statistics need to be computed on "called cells" (after doublet filtering), so we can't compute the stats in the earlier notebook either. 
+
+```{python}
+ribo_genes = pd.read_csv(
+    os.path.join(tables_dir, "ribosomal_genes.tsv"), sep="\t", comment="#"
+)["Approved symbol"].values
+```
+
+```{python}
+adata.shape
+```
+
+```{python}
+# only keep 'called cells'
+adata_unfiltered = adata_unfiltered[adata.obs_names, :].copy()
+adata_unfiltered.shape
+```
+
+```{python}
+adata_unfiltered.var["is_ribo"] = adata_unfiltered.var_names.isin(ribo_genes)
+```
+
+```{python}
+adata.obs["ribo_frac"] = np.sum(
+    adata_unfiltered[:, adata_unfiltered.var["is_ribo"]].X, axis=1
+) / np.sum(adata_unfiltered.X, axis=1)
+```
+
+### Compute stats by aggregating `obs`
+
+```{python}
+gene_stats = adata.obs.groupby("samples").agg(
+    median_genes=pd.NamedAgg(column="n_genes", aggfunc="median"),
+    min_genes=pd.NamedAgg(column="n_genes", aggfunc="min"),
+    max_genes=pd.NamedAgg(column="n_genes", aggfunc="max"),
+    median_uniquely_mapped_reads=pd.NamedAgg(column="n_counts", aggfunc="median"),
+    min_uniquely_mapped_reads=pd.NamedAgg(column="n_counts", aggfunc="min"),
+    max_uniquely_mapped_reads=pd.NamedAgg(column="n_counts", aggfunc="max"),
+    median_ribosomal_read_fraction=pd.NamedAgg(column="ribo_frac", aggfunc="median"),
+    min_ribosomal_read_fraction=pd.NamedAgg(column="ribo_frac", aggfunc="min"),
+    max_ribosomal_read_fraction=pd.NamedAgg(column="ribo_frac", aggfunc="max"),
+)
+```
+
+```{python}
+quality_table = sequenced_reads.join(uniquely_mapped_reads).join(called_cells)
+quality_table["median_uniquely_mapped_reads_per_cell"] = [
+    f'{gene_stats.loc[s, "median_uniquely_mapped_reads"]:.0f} '
+    f'({gene_stats.loc[s, "min_uniquely_mapped_reads"]:.0f} - '
+    f'{gene_stats.loc[s, "max_uniquely_mapped_reads"]:.0f})'
+    for s in quality_table.index
+]
+quality_table["median_rrna_rate_per_cell"] = [
+    f'{gene_stats.loc[s, "median_ribosomal_read_fraction"]:.2f} '
+    f'({gene_stats.loc[s, "min_ribosomal_read_fraction"]:.2f} - '
+    f'{gene_stats.loc[s, "max_ribosomal_read_fraction"]:.2f})'
+    for s in quality_table.index
+]
+quality_table["median_detected_genes_per_cell"] = [
+    f'{gene_stats.loc[s, "median_genes"]:.0f} '
+    f'({gene_stats.loc[s, "min_genes"]:.0f} - '
+    f'{gene_stats.loc[s, "max_genes"]:.0f})'
+    for s in quality_table.index
+]
+quality_table
+```
+
+```{python}
+quality_table.to_csv(output_file_stats)
+```
+
 ## Compute highly variable genes
 
 ```{python}
diff --git a/analyses/04_annotate_cell_types.Rmd b/analyses/04_annotate_cell_types.Rmd
index 1d71b75..8ddfbcb 100644
--- a/analyses/04_annotate_cell_types.Rmd
+++ b/analyses/04_annotate_cell_types.Rmd
@@ -5,7 +5,7 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.10.0
+      jupytext_version: 1.11.2
   kernelspec:
     display_name: Python [conda env:.conda-vanderburg_scanpy]
     language: python
diff --git a/analyses/05_prepare_adata_nk_t.Rmd b/analyses/05_prepare_adata_nk_t.Rmd
index 4a634b1..c1a3b0c 100644
--- a/analyses/05_prepare_adata_nk_t.Rmd
+++ b/analyses/05_prepare_adata_nk_t.Rmd
@@ -7,7 +7,7 @@ jupyter:
       extension: .Rmd
       format_name: rmarkdown
       format_version: '1.2'
-      jupytext_version: 1.10.0
+      jupytext_version: 1.11.2
   kernelspec:
     display_name: Python [conda env:.conda-vanderburg_scanpy]
     language: python
diff --git a/main.nf b/main.nf
index a2d0b8f..0b94cb3 100755
--- a/main.nf
+++ b/main.nf
@@ -19,7 +19,7 @@ process p01_process_data {
         file 'notebook.Rmd' from Channel.fromPath("analyses/${id}.Rmd")
 
     output:
-        file "adata.h5ad" into process_data_adata
+        file "adata.h5ad" into process_data_adata, process_data_adata_2
         file "${id}.html" into process_data_html
 
     """
@@ -97,18 +97,20 @@ process p03_normalize {
         file "is_doublet.npy" from Channel.fromPath("tables/is_doublet.npy")
         file 'lib/*' from Channel.fromPath("lib/{jupytertools,scio,scpp}.py").collect()
         file 'tables/*' from Channel.fromPath(
-            "tables/{biomart.tsv,cell_cycle_regev.tsv,adata_pca.pkl.gz}"
+            "tables/{biomart.tsv,cell_cycle_regev.tsv,adata_pca.pkl.gz,summary*.txt,ribosomal_genes.tsv}"
         ).collect()
         file 'notebook.Rmd' from Channel.fromPath("analyses/${id}.Rmd")
         file 'input_adata.h5ad' from filter_data_adata_2
+        file 'adata_unfiltered.h5ad' from  process_data_adata_2
 
     output:
         file "adata.h5ad" into correct_data_adata
         file "${id}.html" into correct_data_html
+        file "quality_stats.csv" into correct_data_quality_stats
 
     """
     execute_notebook.sh ${id} ${task.cpus} notebook.Rmd \\
-       "-r input_file input_adata.h5ad -r output_file adata.h5ad -r tables_dir tables -r doublet_file is_doublet.npy"
+       "-r input_file input_adata.h5ad -r output_file adata.h5ad -r tables_dir tables -r doublet_file is_doublet.npy -r adata_unfiltered_file adata_unfiltered.h5ad -r output_file_stats quality_stats.csv"
     """
 }
 
@@ -243,20 +245,22 @@ process deploy {
             process_data_html,
             filter_data_html,
             correct_data_html,
+            correct_data_quality_stats,
             annotate_cell_types_html,
             prepare_adata_t_nk_html,
             nkg2a_html,
             nkg2a_figures,
             nkg2a_de_analysis,
-            nkg2a_de_analysis_zip
+            nkg2a_de_analysis_zip,
         ).collect()
 
     output:
         file "*.html"
         file "*.zip"
+        file "*.csv"
 
     """
-    cp input/*.{html,zip} .
+    cp input/*.{html,zip,csv} .
     """
 }