Skip to content

Commit

Permalink
Merge pull request XiaoLabJHU#4 from XiaoLabJHU/dev
Browse files Browse the repository at this point in the history
Updating scripts to align with PEP8, added docstrings, flake8 linting and reduced loop/functions complexity
  • Loading branch information
shachafl authored Nov 5, 2024
2 parents 9b1eeb8 + 85ac357 commit 628b056
Show file tree
Hide file tree
Showing 22 changed files with 2,939 additions and 6,767 deletions.
10 changes: 5 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
.env*
.venv*
env*/
venv*/
ENV*/
env.bak/
venv.bak/

Expand Down
3,100 changes: 0 additions & 3,100 deletions CODE/AUPR_calc_for_filling_missing_MI_or_Inference.ipynb

This file was deleted.

848 changes: 307 additions & 541 deletions CODE/Building_MI_matrices.py

Large diffs are not rendered by default.

197 changes: 0 additions & 197 deletions CODE/Computation_time_vs_data_size.ipynb

This file was deleted.

78 changes: 78 additions & 0 deletions CODE/Computation_time_vs_data_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Computation time vs. data size with an expression matrix (genes X conditions)
"""
import math
import os
import time
import numpy as np
import Building_MI_matrices as Building_MI_matrices_mod


def time_array_per_MIestimator_and_MIquantity(mi_est, mi_quantity_to_calc, rand_seed=13):
"""
Measure computation time for different MI estimators and quantities.
Parameters:
mi_est (str): Mutual information estimator ("Shannon", "KSG", "KL").
mi_quantity_to_calc (str): Quantity to calculate (e.g., "MI", "TC").
rand_seed (int, optional): Random seed for reproducibility. Default is 13.
Returns:
np.ndarray: Array of computation times for different data sizes.
"""
np.random.seed(rand_seed)

# constants initialization
MI_matrix_fname = "MI_matrix.dat"
# list of vector sizes equivalent to a single gene expression profile with Ntot conditions/perturbations
Ntot_list = [100, 250, 500, 1000]
# Ntot_list = [50, 100] # debug
time_array = np.zeros(len(Ntot_list), dtype=float)

# We make a list with number of bins to be used corresponding to the different Ntot size
if mi_est == "Shannon":
bins_or_neighbors_list = [math.floor(Ntot ** (1/2)) for Ntot in Ntot_list]
elif mi_est in ["KSG", "KL"]:
bins_or_neighbors_list = [1] * len(Ntot_list)

# Generate "gene expression" matrix for 50 gemes amd upto 1000 conditions/perturbations
m = np.random.normal(8, 1.5, size=(50, 1000))

# Build MI matrix and save time to build matrix in time_array
for n, Ntot in enumerate(Ntot_list):
input1_data_array = m[:, :Ntot]
bins_or_neighbors = bins_or_neighbors_list[n]

start_time = time.time()

if mi_quantity_to_calc == "MI2":
Building_MI_matrices_mod.mi2_matrix_build(MI_matrix_fname, input1_data_array, bins_or_neighbors, mi_est)

elif mi_quantity_to_calc == "TC":
if mi_est == "KSG":
Building_MI_matrices_mod.tc_matrix_build(MI_matrix_fname, input1_data_array, bins_or_neighbors, mi_est)
elif mi_est in ["KL", "Shannon"]:
Building_MI_matrices_mod.tc_matrix_build_from_entropies(MI_matrix_fname, input1_data_array,
bins_or_neighbors, mi_est)

end_time = time.time()
time_array[n] = end_time - start_time

print(mi_est, time_array)
output_fname = f"Time_array_{mi_quantity_to_calc}_{mi_est}{bins_or_neighbors}_50genes_100to1k_perturb_fast.txt"
np.savetxt(output_fname, time_array)


# Change to folder where data will be saved
path_to_data = os.path.expanduser('../DATA/MI_comparison_FB_vs_KNN/')
os.chdir(path_to_data)

# Calculating time arrays for MI2 for the MI estimators: Shannon, KL, KSG
time_array_per_MIestimator_and_MIquantity("Shannon", "MI2")
time_array_per_MIestimator_and_MIquantity("KL", "MI2")
time_array_per_MIestimator_and_MIquantity("KSG", "MI2")

# Calculating time arrays for TC for the MI estimators: Shannon, KL, KSG
time_array_per_MIestimator_and_MIquantity("Shannon", "TC")
time_array_per_MIestimator_and_MIquantity("KL", "TC")
time_array_per_MIestimator_and_MIquantity("KSG", "TC")
Loading

0 comments on commit 628b056

Please sign in to comment.