Skip to content

Commit

Permalink
delete ML methods
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Sep 25, 2024
1 parent 7ee27c8 commit d1f74d6
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 75 deletions.
10 changes: 8 additions & 2 deletions examples/loom2parquetmerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def concatenate_parquet_files_incremental(files_paths, output_path, batch_size=1
:param batch_size: Number of rows to read from each file at a time.
"""
writer = None
with pq.ParquetWriter(output_path, schema=None, compression='gzip') as writer:

for file_path in files_paths:
print(f"Processing file: {file_path}")
parquet_file = pq.ParquetFile(file_path)
Expand All @@ -38,10 +38,16 @@ def concatenate_parquet_files_incremental(files_paths, output_path, batch_size=1
# Convert the batch to a PyArrow Table
table = pa.Table.from_batches([batch])

# If the writer is not initialized, create a new Parquet writer
if writer is None:
writer = pq.ParquetWriter(output_path, table.schema, compression='gzip')

# Write the batch to the output Parquet file
writer.write_table(table)

print(f"Concatenated parquet file written to {output_path}")
# Close the writer after all batches are written
if writer is not None:
writer.close()
print(f"Concatenated parquet file written to {output_path}")


Expand Down
1 change: 0 additions & 1 deletion fslite/tests/generate_big_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def generate_large_test_dataset():
f"feature{i}": rng.random(chunk_end - chunk_start)
for i in range(1, n_features + 1)
}
)

# Create DataFrame chunk
chunk_data = {"sample_id": chunk_sample_ids, "label": chunk_labels}
Expand Down
72 changes: 0 additions & 72 deletions fslite/tests/test_fs_pipeline.py

This file was deleted.

0 comments on commit d1f74d6

Please sign in to comment.