Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor py #12

Open
wants to merge 64 commits into
base: dev
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
82e5c3b
first line
ypriverol Sep 19, 2024
deb2df6
first iteration of pandas fdataframe.py
ypriverol Sep 19, 2024
70fec44
first iteration of pandas fdataframe.py
ypriverol Sep 19, 2024
b99aee0
first iteration of pandas fdataframe.py
ypriverol Sep 19, 2024
64fb1aa
update fdataframe class
enriquea Sep 20, 2024
cc471f0
minor refactory
enriquea Sep 20, 2024
30e0659
Merge branch 'refactor-py' of https://github.com/bigbio/fsspark into …
enriquea Sep 20, 2024
471dafa
first iteration of pandas fdataframe.py
ypriverol Sep 20, 2024
66d6118
Merge remote-tracking branch 'origin/refactor-py' into refactor-py
ypriverol Sep 20, 2024
174196a
first iteration of pandas fdataframe.py
ypriverol Sep 20, 2024
fa0d320
first iteration of pandas fdataframe.py
ypriverol Sep 20, 2024
0a8080b
added test univariate corr
enriquea Sep 20, 2024
8558656
refactor univariate methods (corr)
enriquea Sep 20, 2024
d2ca24d
update
enriquea Sep 20, 2024
516b4c6
added methods to select features and update FSDataFrame
enriquea Sep 20, 2024
a787707
move from unitests to pytests
ypriverol Sep 20, 2024
f75093d
move from unitests to pytests
ypriverol Sep 20, 2024
f15b4e8
minor changes to store sparse matrices
ypriverol Sep 21, 2024
ea15b18
fsspark -> fslite
ypriverol Sep 22, 2024
a4de03c
fsspark -> fslite
ypriverol Sep 22, 2024
032a422
better structure for methods in constants.py
ypriverol Sep 22, 2024
c2312c8
better structure for methods in constants.py
ypriverol Sep 22, 2024
10ee2e8
fsspark -> fslite
ypriverol Sep 22, 2024
a69ac12
Minor changes in constants.py
ypriverol Sep 22, 2024
3f56ded
black applied
ypriverol Sep 22, 2024
1fafeb5
clean more code.
ypriverol Sep 22, 2024
f2ce664
clean more code.
ypriverol Sep 22, 2024
6d1f54a
update in dependencies
ypriverol Sep 22, 2024
a0181aa
update in dependencies
ypriverol Sep 22, 2024
4a93621
update in dependencies
ypriverol Sep 22, 2024
5d70dfc
update in dependencies
ypriverol Sep 22, 2024
0eddddd
update in dependencies
ypriverol Sep 22, 2024
94703eb
smaller tests for CI/CD
ypriverol Sep 22, 2024
f67a259
smaller tests for CI/CD
ypriverol Sep 23, 2024
7a08e82
Another refactoring
ypriverol Sep 23, 2024
5e56b21
Another refactoring
ypriverol Sep 23, 2024
9b74ada
Another refactoring
ypriverol Sep 23, 2024
b1c4ad5
refactoring ml methods
ypriverol Sep 23, 2024
c657be9
refactoring ml methods
ypriverol Sep 23, 2024
c46167c
added file for experiments
ypriverol Sep 23, 2024
7b06d1e
minor comments
ypriverol Sep 23, 2024
35f58a2
minor refinements
ypriverol Sep 23, 2024
43dddb7
minor refinements
ypriverol Sep 23, 2024
b6e8eab
added example script to parse single-cell data
enriquea Sep 23, 2024
07a9dc5
implemented univariate selector methods (from sci-learn) and added te…
enriquea Sep 23, 2024
6c29cd8
added implementation for multivariate methods: variance and matrix_co…
enriquea Sep 24, 2024
5cbd7da
added tests for multivariate
enriquea Sep 24, 2024
cc493f6
loom2parquet examples
ypriverol Sep 24, 2024
4250a4e
Update fslite/fs/utils.py
ypriverol Sep 25, 2024
cc4e794
Update fslite/tests/test_ml_methods.py
ypriverol Sep 25, 2024
0ccd98d
Update fslite/tests/generate_big_tests.py
ypriverol Sep 25, 2024
0e24e2c
Update fslite/tests/generate_big_tests.py
ypriverol Sep 25, 2024
82a1a86
delete ML methods
ypriverol Sep 25, 2024
e2f7b9c
delete ML methods
ypriverol Sep 25, 2024
5a91f14
Update examples/loom2parquetmerge.py
ypriverol Sep 25, 2024
718b743
Update fslite/tests/generate_big_tests.py
ypriverol Sep 25, 2024
681a823
Update fslite/fs/methods.py
ypriverol Sep 25, 2024
8608117
Update fslite/fs/ml.py
ypriverol Sep 25, 2024
6ecbaca
Update fslite/fs/fdataframe.py
ypriverol Sep 25, 2024
d5cc974
Update fslite/fs/multivariate.py
ypriverol Sep 25, 2024
7ee27c8
delete ML methods
ypriverol Sep 25, 2024
d1f74d6
delete ML methods
ypriverol Sep 25, 2024
3909487
refactoring parquet SC generation
ypriverol Sep 25, 2024
07cb771
small changes
ypriverol Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added tests for multivariate
  • Loading branch information
enriquea committed Sep 24, 2024
commit 5cbd7dab721738430f03ba791252f71c851c2229
122 changes: 122 additions & 0 deletions fslite/tests/test_multivariate_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pandas as pd

from fslite.fs.fdataframe import FSDataFrame
from fslite.fs.multivariate import FSMultivariate
from fslite.utils.datasets import get_tnbc_data_path


# test multivariate_filter method with 'm_corr' method
def test_multivariate_filter_corr_strict_mode():
"""
Test multivariate_filter method with 'm_corr' method.
:return: None
"""

# import tsv as pandas DataFrame
df = pd.read_csv(get_tnbc_data_path(), sep="\t")

# create FSDataFrame instance
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")

# create FSMultivariate instance
fs_multivariate = FSMultivariate(fs_method="m_corr",
selection_mode="strict",
selection_threshold=0.75)

fsdf_filtered = fs_multivariate.select_features(fs_df)

assert fs_df.count_features() == 500
assert fsdf_filtered.count_features() == 239

# Export the filtered DataFrame as Pandas DataFrame
df_filtered = fsdf_filtered.to_pandas()
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)


# test multivariate_filter method with 'm_corr' method in approximate mode
def test_multivariate_filter_corr_approximate_mode():
"""
Test multivariate_filter method with 'm_corr' method in approximate mode.
:return: None
"""

# import tsv as pandas DataFrame
df = pd.read_csv(get_tnbc_data_path(), sep="\t")

# create FSDataFrame instance
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")

# create FSMultivariate instance
fs_multivariate = FSMultivariate(fs_method="m_corr",
selection_mode="approximate",
selection_threshold=0.75)

fsdf_filtered = fs_multivariate.select_features(fs_df)

assert fs_df.count_features() == 500

# test if number of features selected is within the expected range [240-260]
assert 240 <= fsdf_filtered.count_features() <= 260

# Export the filtered DataFrame as Pandas DataFrame
df_filtered = fsdf_filtered.to_pandas()
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)


# test multivariate_filter method with 'variance' method
def test_multivariate_filter_variance_percentile_mode():
"""
Test multivariate_filter method with 'variance' method.
:return: None
"""

# import tsv as pandas DataFrame
df = pd.read_csv(get_tnbc_data_path(), sep="\t")

# create FSDataFrame instance
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")

# create FSMultivariate instance
fs_multivariate = FSMultivariate(fs_method="variance",
selection_mode="percentile",
selection_threshold=0.2)

fsdf_filtered = fs_multivariate.select_features(fs_df)

assert fs_df.count_features() == 500
assert fsdf_filtered.count_features() == 400

# Export the filtered DataFrame as Pandas DataFrame
df_filtered = fsdf_filtered.to_pandas()
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)


# test multivariate_filter method with 'variance' method in k_best mode
def test_multivariate_filter_variance_k_best_mode():
"""
Test multivariate_filter method with 'variance' method in k_best mode.
:return: None
"""

# import tsv as pandas DataFrame
df = pd.read_csv(get_tnbc_data_path(), sep="\t")

# create FSDataFrame instance
fs_df = FSDataFrame(df=df, sample_col="Sample", label_col="label")

# create FSMultivariate instance
fs_multivariate = FSMultivariate(fs_method="variance",
selection_mode="k_best",
selection_threshold=68100000.0
# TODO: check this value (should be normalized variance?)
)

fsdf_filtered = fs_multivariate.select_features(fs_df)

assert fs_df.count_features() == 500
assert fsdf_filtered.count_features() == 87

# Export the filtered DataFrame as Pandas DataFrame
df_filtered = fsdf_filtered.to_pandas()
df_filtered.to_csv("filtered_tnbc_data.csv", index=False)