From 0589d31b2b486fbd251d4ae064888391ee6b77bf Mon Sep 17 00:00:00 2001 From: Barbara Hoellbacher Date: Wed, 6 Nov 2024 15:02:04 +0100 Subject: [PATCH 1/4] Fix: check if mat is csc_matrix and return dense representation in case it is --- besca/pp/_normalization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/besca/pp/_normalization.py b/besca/pp/_normalization.py index 390505d6..cd7871fc 100644 --- a/besca/pp/_normalization.py +++ b/besca/pp/_normalization.py @@ -1,5 +1,6 @@ import numpy as np from scipy.sparse.csr import csr_matrix +from scipy.sparse._csc import csc_matrix from anndata._core.views import SparseCSRView def closure(mat): @@ -180,6 +181,9 @@ def normalize_geometric(adata): # need to add a catch for newly encountered datatype elif type(X) == SparseCSRView: X = X.todense() + # need to add a catch for new sparse matrix datatype + elif type(X) == csc_matrix: + X = X.todense() # ensure that X is an array otherwise this will cause type issue with multiplicative replacement function X = np.array(X) From cd868c87d56d9e583ca9f6e1e19c8b6e7cc59d1a Mon Sep 17 00:00:00 2001 From: Barbara Hoellbacher Date: Wed, 6 Nov 2024 17:32:02 +0100 Subject: [PATCH 2/4] Bugfix: sns.jointplot requires arguments to be passed in using keywords --- workbooks/standard_workflow_besca2.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workbooks/standard_workflow_besca2.ipynb b/workbooks/standard_workflow_besca2.ipynb index 64e1f60f..9cf10cd7 100644 --- a/workbooks/standard_workflow_besca2.ipynb +++ b/workbooks/standard_workflow_besca2.ipynb @@ -760,7 +760,7 @@ " n_prots = len(adata_prot.var_names)\n", " percent_top = (int(round(0.01*n_prots, 0)) if int(round(0.01*n_prots, 0)) >= 1 else 1, int(round(0.1*n_prots, 0)), int(round(0.25*n_prots, 0)))\n", " qc_adata = sc.pp.calculate_qc_metrics(adata_prot, percent_top=percent_top, var_type=\"antibodies\", inplace=False)\n", - " fig = sns.jointplot(\"log1p_total_counts\", \"n_antibodies_by_counts\", qc_adata[0], kind=\"hex\", norm=mpl.colors.LogNorm())\n", + " fig = sns.jointplot(x=\"log1p_total_counts\", y=\"n_antibodies_by_counts\", data=qc_adata[0], kind=\"hex\", norm=mpl.colors.LogNorm())\n", " fig.savefig(os.path.join(results_folder_citeseq, 'citeseq', 'figures', 'CITESEQ_QC_plot.png'))\n", " \n", " #generate overview of n_counts\n", From 0326e06529494cca4b91b50a8f80a5fca6d7f65f Mon Sep 17 00:00:00 2001 From: Barbara Hoellbacher Date: Thu, 7 Nov 2024 16:49:10 +0100 Subject: [PATCH 3/4] using cache=True within the scanpy read function can cause issues when changing the thresholds for filtering --- besca/Import/_read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/besca/Import/_read.py b/besca/Import/_read.py index d3f94510..2d627b30 100644 --- a/besca/Import/_read.py +++ b/besca/Import/_read.py @@ -138,7 +138,7 @@ def read_mtx( if gzfiles == "gz": print("reading matrix.mtx.gz") adata = read( - os.path.join(filepath, "matrix.mtx.gz"), cache=True + os.path.join(filepath, "matrix.mtx.gz"), cache=False ).T # transpose the data print("adding cell barcodes") adata.obs_names = pd.read_csv( @@ -155,7 +155,7 @@ def read_mtx( else: print("reading matrix.mtx") adata = read( - os.path.join(filepath, "matrix.mtx"), cache=True + os.path.join(filepath, "matrix.mtx"), cache=False ).T # transpose the data print("adding cell barcodes") adata.obs_names = pd.read_csv( From 285c075b4e1284eb6044b89faa2b82ec4708d00f Mon Sep 17 00:00:00 2001 From: Barbara Hoellbacher Date: Fri, 8 Nov 2024 13:03:44 +0100 Subject: [PATCH 4/4] Add whether to read from cache or source as additional parameter to read_mtx --- besca/Import/_read.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/besca/Import/_read.py b/besca/Import/_read.py index 2d627b30..c1904eca 100644 --- a/besca/Import/_read.py +++ b/besca/Import/_read.py @@ -107,7 +107,7 @@ def assert_adata(adata: AnnData, attempFix=True): def read_mtx( - filepath, annotation=True, use_genes="SYMBOL", species="human", citeseq=None + filepath, annotation=True, use_genes="SYMBOL", species="human", citeseq=None, read_cache=True ): """Read matrix.mtx, genes.tsv, barcodes.tsv to AnnData object. By specifiying an input folder this function reads the contained matrix.mtx, @@ -129,6 +129,9 @@ def read_mtx( citeseq: 'gex_only' or 'citeseq_only' or False or None | default = None string indicating if only gene expression values (gex_only) or only protein expression values ('citeseq_only') or everything is read if None is specified + read_cache: `bool` (default=True) + boolian identifier if scanpy should read the AnnData object from fast h5ad + cache or from source Returns ------- @@ -138,7 +141,7 @@ def read_mtx( if gzfiles == "gz": print("reading matrix.mtx.gz") adata = read( - os.path.join(filepath, "matrix.mtx.gz"), cache=False + os.path.join(filepath, "matrix.mtx.gz"), cache=read_cache ).T # transpose the data print("adding cell barcodes") adata.obs_names = pd.read_csv( @@ -155,7 +158,7 @@ def read_mtx( else: print("reading matrix.mtx") adata = read( - os.path.join(filepath, "matrix.mtx"), cache=False + os.path.join(filepath, "matrix.mtx"), cache=read_cache ).T # transpose the data print("adding cell barcodes") adata.obs_names = pd.read_csv(