Skip to content

Commit

Permalink
lint, shard name fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nikil-ravi committed Oct 13, 2024
1 parent 52bff4f commit af78281
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions tests/test_sharded_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_sniff_format_for_parquet():
f.flush()

assert _sniff_format_for_dataset(f.name) == ".parquet"


@skip_if_no_soundlibs
def test_resolve_audio_pointer():
Expand All @@ -56,15 +56,17 @@ def test_basic_parquet_datasource_read_row():
table = pa.Table.from_pydict(data)
pq.write_table(table, f.name)

# Instantiate the ParquetDataSource
datasource = ParquetDataSource([f.name])

assert len(datasource.shard_names) == 1, "Expected only one shard"
shard_name = datasource.shard_names[0]

# sanity check: Read data starting from row 1
row_data = list(datasource.open_shard_at_row(shard_name=f.name.replace(".", "_"), row=1))
row_data = list(datasource.open_shard_at_row(shard_name=shard_name, row=1))

# Verify the output
assert len(row_data) == 2 # We expect 2 rows starting from index 1
assert row_data[0]["column1"] == "value2"
assert row_data[0]["column2"] == 20
assert row_data[1]["column1"] == "value3"
assert row_data[1]["column2"] == 30
assert row_data[1]["column2"] == 30

0 comments on commit af78281

Please sign in to comment.