Skip to content

Commit

Permalink
skeleton modified
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Oct 20, 2024
1 parent 8f10928 commit 7317995
Show file tree
Hide file tree
Showing 21 changed files with 1,640 additions and 2,059 deletions.
2,915 changes: 1,203 additions & 1,712 deletions runs.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/sbatch/calculate_scores.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --mem=64G
#SBATCH --mem=250G
#SBATCH --cpus-per-task=20

python src/metrics/script_all.py
6 changes: 4 additions & 2 deletions scripts/sbatch/robustness_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --mem=64G
#SBATCH --cpus-per-task=20
#SBATCH --mem=250G
#SBATCH --cpus-per-task=20
# SBATCH --partition=gpu
# SBATCH --gres=gpu:1

python src/robustness_analysis/script_all.py
4 changes: 4 additions & 0 deletions src/api/comp_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ functionality:
type: boolean
direction: input
default: True
- name: --causal
type: boolean
direction: input
default: True


test_resources:
Expand Down
5 changes: 5 additions & 0 deletions src/control_methods/pearson_corr/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ functionality:
info:
label: pearson_corr
summary: "Baseline based on correlation"
arguments:
- name: --normalize
type: boolean
default: True
direction: input

resources:
- type: python_script
Expand Down
30 changes: 21 additions & 9 deletions src/control_methods/pearson_corr/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,35 @@
'normalize': False,
'donor_specific': False,
'temp_dir': 'output/pearson_corr',
'causal': True}
'causal': True,
'normalize': True}
## VIASH END


import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
parser.add_argument('--multiomics_atac', type=str, help='Path to the multiomics RNA file')
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
parser.add_argument('--num_workers', type=str, help='Number of cores')
parser.add_argument('--max_n_links', type=str, help='Number of top links to retain')
parser.add_argument('--causal', action='store_true', help='Enable causal mode')
parser.add_argument('--normalize', action='store_true')

args = parser.parse_args()

if args.multiomics_rna:
par['multiomics_rna'] = args.multiomics_rna
if args.causal:
par['causal'] = True
else:
par['causal'] = False

if args.causal:
par['normalize'] = True
else:
par['normalize'] = False

if args.prediction:
par['prediction'] = args.prediction
if args.tf_all:
Expand All @@ -38,9 +51,6 @@
par['num_workers'] = args.num_workers
if args.max_n_links:
par['max_n_links'] = int(args.max_n_links)
if args.resources_dir:
meta['resources_dir'] = args.resources_dir


os.makedirs(par['temp_dir'], exist_ok=True)
import sys
Expand All @@ -59,9 +69,11 @@ def create_corr_net(par):
print(par)
print('Read data')
adata = ad.read_h5ad(par["multiomics_rna"])
# - lognorm
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
if 'normalize' in par:
if par['normalize']:
# - lognorm
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
# - corr
gene_names = adata.var_names.to_numpy()
grn = corr_net(adata.X, gene_names, par)
Expand Down
73 changes: 63 additions & 10 deletions src/exp_analysis/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def cosine_similarity_net(nets_dict, col_name='source', weight_col='weight', fig

return cosine_sim_matrix, fig

def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None):
def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None, fmt='.02f'):
from itertools import combinations
import seaborn as sns
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -138,15 +138,15 @@ def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='ja
fig, ax = plt.subplots(1, 1, figsize=figsize)
else:
fig = None
sns.heatmap(jaccard_matrix, annot=True, cmap="coolwarm", xticklabels=nets_names, yticklabels=nets_names, ax=ax)
sns.heatmap(jaccard_matrix, annot=True, cmap="viridis", xticklabels=nets_names, yticklabels=nets_names, ax=ax, fmt=fmt, cbar=None)
ax.grid(False)
ax.set_title(title)
# Rotate x labels for readability
plt.xticks(rotation=45, ha='right')

return jaccard_matrix, fig

def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1, x_label='Weight', **kwdgs):
def plot_cumulative_density(data, ax=None, title=None, label=None, s=3, x_label='Weight', **kwdgs):
# Sort the data
sorted_data = np.sort(data)
# Compute the cumulative density values
Expand All @@ -159,7 +159,9 @@ def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1
ax.set_xlabel(x_label)
ax.set_ylabel('Cumulative Density')
ax.set_title(title)
ax.grid(True)
ax.grid(True, linewidth=0.2, linestyle='--', color='gray')
for side in ['right', 'top']:
ax.spines[side].set_visible(False)
return fig, ax

class Connectivity:
Expand Down Expand Up @@ -224,6 +226,37 @@ def diff_roles(control: pd.DataFrame, sample: pd.DataFrame, critical_change_q_t:
# df_distance['critical_change_ps'] = df_distance['ps_distance'] > df_distance['ps_distance'].quantile(critical_change_q_t)
# df_distance['critical_change_overall'] = df_distance['overall_distance'] > df_distance['overall_distance'].quantile(critical_change_q_t)
return df_distance
def find_peak_intersection(peaks, peaks_ref):
'''Find those peaks_ref intersect with peaks'''
# Convert arrays to structured data (chr, start, end)
def split_peaks(peak_array):
split_data = []
for p in peak_array:
try:
chr_, range_ = p.split(':')
start, end = map(int, range_.split('-'))
split_data.append((chr_, start, end))
except ValueError:
continue # Skip malformed peaks
return np.array(split_data, dtype=object)

peaks_struct = split_peaks(peaks)
peaks_ref_struct = split_peaks(peaks_ref)

# Optimize with NumPy broadcasting for faster intersection check
intersecting_peaks_ref = []

chr_peaks = peaks_struct[:, 0]
start_peaks = peaks_struct[:, 1].astype(int)
end_peaks = peaks_struct[:, 2].astype(int)

for chr_ref, start_ref, end_ref in peaks_ref_struct:
# Vectorized filtering for chromosome and overlap conditions
mask = (chr_peaks == chr_ref) & (start_peaks <= end_ref) & (end_peaks >= start_ref)
if np.any(mask):
intersecting_peaks_ref.append(f"{chr_ref}:{start_ref}-{end_ref}")

return intersecting_peaks_ref
class Exp_analysis:
'''
This class provides functions for explanatory analysis of GRNs
Expand All @@ -232,8 +265,10 @@ def __init__(self, net, peak_gene_net=None):
self.net = net
# self.net.weight = minmax_scale(self.net.weight)
self.net['link'] = self.net['source'].astype(str) + '_' + self.net['target'].astype(str)

self.peak_gene_net = peak_gene_net
if self.peak_gene_net is not None:
if 'peak' in peak_gene_net.columns:
peak_gene_net.rename(columns={'peak': 'source'}, inplace=True)
self.tfs = net.source.unique()
self.targets = net.target.unique()
# check duplicates
Expand All @@ -251,7 +286,7 @@ def __init__(self, net, peak_gene_net=None):
def plot_centrality_barh(df, title='',ax=None, xlabel='Degree', ylabel='Gene', colors=None):
if ax==None:
fig, ax = plt.subplots(figsize=(10, 6))
df['degree'].plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method
df.plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method
ax.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
Expand Down Expand Up @@ -293,15 +328,33 @@ def subset_quantile(df, col_name='weight', top_q=0.95, top_n=None, ascending=Fal
else:
df = df.sort_values(by=col_name, ascending=ascending, key=abs)[:top_n]
return df


def annotate_peaks(self, annotation_df) -> dict[str, float]:
'''Annotate peaks with associated regions on genome.
'''
if self.peak_gene_net is None:
print('Peak gene net is not given. Peak annoation is skipped.')
return
peaks = self.peak_gene_net.peak.unique()
# print(self.peak_gene_net)
peaks = self.peak_gene_net.source.unique()
peaks = self.format_peak(peaks)
annotation_df = annotation_df[annotation_df.peak.isin(peaks)]

annotation_peaks = annotation_df.peak.unique()

flag = False
for peak in peaks:
if peak not in annotation_peaks:
flag = True


if flag:
print('Not all peaks in the net is among the annotated ones. Finding the overlap')
peaks = find_peak_intersection(peaks, annotation_df.peak.unique())
annotation_df = annotation_df[annotation_df.peak.isin(peaks)]
else:
annotation_df = annotation_df[annotation_df.peak.isin(peaks)]

value_counts = annotation_df.annotation.value_counts()
sum_values = value_counts.sum()
value_ratio = ((value_counts/sum_values)*100).round(1)
Expand Down Expand Up @@ -343,7 +396,7 @@ def calculate_feature(net, feature='active_sum_weight'):
return df


def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None) -> plt.Figure:
def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None, sort_by='degree') -> plt.Figure:
"""Upset plot of interactions
Args:
Expand All @@ -359,7 +412,7 @@ def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_de
out_dict = upsetplot.plot(upsetplot.from_indicators(indicators=lambda a: a==True, data=interaction_df), fig=fig,
show_counts=True,
show_percentages = '{:.0%}',
# sort_by='cardinality',
sort_by=sort_by, #'cardinality'
# min_subset_size =".1%", # min interaction to show
min_subset_size = min_subset_size, # min interaction to show
min_degree=min_degree,
Expand Down
Loading

0 comments on commit 7317995

Please sign in to comment.