diff --git a/besca/st/_wrapper_funcs.py b/besca/st/_wrapper_funcs.py index 67bf15a9..e945732f 100644 --- a/besca/st/_wrapper_funcs.py +++ b/besca/st/_wrapper_funcs.py @@ -308,7 +308,7 @@ def clr_normalize(adata, results_folder): return adata -def highly_variable_genes(adata, batch_key=None, n_shared=2): +def highly_variable_genes(adata, batch_key=None, n_shared=2, only_shared=False): """Calculate highly variable genes and return filtered adata containing only the HVGs. Parameters @@ -320,7 +320,8 @@ def highly_variable_genes(adata, batch_key=None, n_shared=2): n_shared: `int` | default = 2 requirement for selection of HVGs - HVGs shared in nr_samples/n_shared will be included. A higher value will result in a less stringent selection, e.g. with 2 HVGs need to be present - in at least 50% of the samples. + in at least 50% of the samples, in addition to the default scanpy behavior. + only_shared: `bool` | default = False Returns ------- @@ -342,6 +343,9 @@ def highly_variable_genes(adata, batch_key=None, n_shared=2): ) if batch_key is not None: hvglist = adata.var["highly_variable"].copy() + ## Set to False so that default scanpy selection ignored + if only_shared: + hvglist[hvglist==True]=False hvglist.loc[ adata.var["highly_variable_nbatches"] >= len(set(adata.obs[batch_key])) / n_shared,