From 0589d31b2b486fbd251d4ae064888391ee6b77bf Mon Sep 17 00:00:00 2001
From: Barbara Hoellbacher <barbara.hoellbacher@roche.com>
Date: Wed, 6 Nov 2024 15:02:04 +0100
Subject: [PATCH 1/4] Fix: check if mat is csc_matrix and return dense
 representation in case it is

---
 besca/pp/_normalization.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/besca/pp/_normalization.py b/besca/pp/_normalization.py
index 390505d6..cd7871fc 100644
--- a/besca/pp/_normalization.py
+++ b/besca/pp/_normalization.py
@@ -1,5 +1,6 @@
 import numpy as np
 from scipy.sparse.csr import csr_matrix
+from scipy.sparse._csc import csc_matrix
 from anndata._core.views import SparseCSRView
 
 def closure(mat):
@@ -180,6 +181,9 @@ def normalize_geometric(adata):
     # need to add a catch for newly encountered datatype
     elif type(X) == SparseCSRView:
         X = X.todense()
+    # need to add a catch for new sparse matrix datatype
+    elif type(X) == csc_matrix:
+        X = X.todense()
 
     # ensure that X is an array otherwise this will cause type issue with multiplicative replacement function
     X = np.array(X)

From cd868c87d56d9e583ca9f6e1e19c8b6e7cc59d1a Mon Sep 17 00:00:00 2001
From: Barbara Hoellbacher <barbara.hoellbacher@roche.com>
Date: Wed, 6 Nov 2024 17:32:02 +0100
Subject: [PATCH 2/4] Bugfix: sns.jointplot requires arguments to be passed in
 using keywords

---
 workbooks/standard_workflow_besca2.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workbooks/standard_workflow_besca2.ipynb b/workbooks/standard_workflow_besca2.ipynb
index 64e1f60f..9cf10cd7 100644
--- a/workbooks/standard_workflow_besca2.ipynb
+++ b/workbooks/standard_workflow_besca2.ipynb
@@ -760,7 +760,7 @@
     "        n_prots = len(adata_prot.var_names)\n",
     "        percent_top = (int(round(0.01*n_prots, 0)) if int(round(0.01*n_prots, 0)) >= 1 else 1, int(round(0.1*n_prots, 0)), int(round(0.25*n_prots, 0)))\n",
     "        qc_adata = sc.pp.calculate_qc_metrics(adata_prot, percent_top=percent_top, var_type=\"antibodies\", inplace=False)\n",
-    "        fig = sns.jointplot(\"log1p_total_counts\", \"n_antibodies_by_counts\", qc_adata[0], kind=\"hex\", norm=mpl.colors.LogNorm())\n",
+    "        fig = sns.jointplot(x=\"log1p_total_counts\", y=\"n_antibodies_by_counts\", data=qc_adata[0], kind=\"hex\", norm=mpl.colors.LogNorm())\n",
     "        fig.savefig(os.path.join(results_folder_citeseq, 'citeseq', 'figures', 'CITESEQ_QC_plot.png'))\n",
     "        \n",
     "        #generate overview of n_counts\n",

From 0326e06529494cca4b91b50a8f80a5fca6d7f65f Mon Sep 17 00:00:00 2001
From: Barbara Hoellbacher <barbara.hoellbacher@roche.com>
Date: Thu, 7 Nov 2024 16:49:10 +0100
Subject: [PATCH 3/4] using cache=True within the scanpy read function can
 cause issues when changing the thresholds for filtering

---
 besca/Import/_read.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/besca/Import/_read.py b/besca/Import/_read.py
index d3f94510..2d627b30 100644
--- a/besca/Import/_read.py
+++ b/besca/Import/_read.py
@@ -138,7 +138,7 @@ def read_mtx(
     if gzfiles == "gz":
         print("reading matrix.mtx.gz")
         adata = read(
-            os.path.join(filepath, "matrix.mtx.gz"), cache=True
+            os.path.join(filepath, "matrix.mtx.gz"), cache=False
         ).T  # transpose the data
         print("adding cell barcodes")
         adata.obs_names = pd.read_csv(
@@ -155,7 +155,7 @@ def read_mtx(
     else:
         print("reading matrix.mtx")
         adata = read(
-            os.path.join(filepath, "matrix.mtx"), cache=True
+            os.path.join(filepath, "matrix.mtx"), cache=False
         ).T  # transpose the data
         print("adding cell barcodes")
         adata.obs_names = pd.read_csv(

From 285c075b4e1284eb6044b89faa2b82ec4708d00f Mon Sep 17 00:00:00 2001
From: Barbara Hoellbacher <barbara.hoellbacher@roche.com>
Date: Fri, 8 Nov 2024 13:03:44 +0100
Subject: [PATCH 4/4] Add whether to read from cache or source as additional
 parameter to read_mtx

---
 besca/Import/_read.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/besca/Import/_read.py b/besca/Import/_read.py
index 2d627b30..c1904eca 100644
--- a/besca/Import/_read.py
+++ b/besca/Import/_read.py
@@ -107,7 +107,7 @@ def assert_adata(adata: AnnData, attempFix=True):
 
 
 def read_mtx(
-    filepath, annotation=True, use_genes="SYMBOL", species="human", citeseq=None
+    filepath, annotation=True, use_genes="SYMBOL", species="human", citeseq=None, read_cache=True
 ):
     """Read matrix.mtx, genes.tsv, barcodes.tsv to AnnData object.
     By specifiying an input folder this function reads the contained matrix.mtx,
@@ -129,6 +129,9 @@ def read_mtx(
     citeseq: 'gex_only' or 'citeseq_only' or False or None | default = None
         string indicating if only gene expression values (gex_only) or only protein
         expression values ('citeseq_only') or everything is read if None is specified
+    read_cache: `bool` (default=True)
+        boolian identifier if scanpy should read the AnnData object from fast h5ad
+        cache or from source
 
     Returns
     -------
@@ -138,7 +141,7 @@ def read_mtx(
     if gzfiles == "gz":
         print("reading matrix.mtx.gz")
         adata = read(
-            os.path.join(filepath, "matrix.mtx.gz"), cache=False
+            os.path.join(filepath, "matrix.mtx.gz"), cache=read_cache
         ).T  # transpose the data
         print("adding cell barcodes")
         adata.obs_names = pd.read_csv(
@@ -155,7 +158,7 @@ def read_mtx(
     else:
         print("reading matrix.mtx")
         adata = read(
-            os.path.join(filepath, "matrix.mtx"), cache=False
+            os.path.join(filepath, "matrix.mtx"), cache=read_cache
         ).T  # transpose the data
         print("adding cell barcodes")
         adata.obs_names = pd.read_csv(