Skip to content

Commit

Permalink
fixup! Treat warning as error in CI/Dev
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Jul 31, 2024
1 parent 2a38d57 commit b146ce6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 11 deletions.
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,13 @@ markers = [
]

# Turns a warning into an error
filterwarnings = ["error"]
filterwarnings = [
"error",
"ignore:A plugin raised an exception during an old-style hookwrapper teardown.",
"ignore:unclosed <socket.socket",
# Remove this in a future release of PySpark.
"ignore:distutils Version classes are deprecated. Use packaging.version instead.",
]

[tool.black]
line-length = 130
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/test_deletes.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_rewrite_partitioned_table_with_null(spark: SparkSession, session_catalo

@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCatalog, format_version: int) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -175,6 +176,7 @@ def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCa


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_delete_partitioned_table_positional_deletes(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -223,6 +225,7 @@ def test_delete_partitioned_table_positional_deletes(spark: SparkSession, sessio


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -274,6 +277,7 @@ def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestC


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete_sequence_number"

Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_inspect_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_inspect_snapshots(
identifier = "default.table_metadata_snapshots"
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_inspect_refs(
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

# write data to create snapshot
tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)

# create a test branch
spark.sql(
Expand Down Expand Up @@ -667,7 +667,7 @@ def test_inspect_files(

tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)

# append more data
tbl.append(arrow_table_with_null)
Expand Down
16 changes: 9 additions & 7 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w
identifier = "default.arrow_data_files"
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [])

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -288,7 +288,7 @@ def get_current_snapshot_id(identifier: str) -> int:
.snapshot_id
)

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
tbl.overwrite(arrow_table_with_null)
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
Expand Down Expand Up @@ -330,7 +330,7 @@ def test_python_writes_special_character_column_with_spark_reads(
arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table_with_special_character_column)
tbl.append(arrow_table_with_special_character_column)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand All @@ -354,7 +354,7 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads(

tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table)
tbl.append(arrow_table)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand Down Expand Up @@ -393,7 +393,7 @@ def test_python_writes_with_small_and_large_types_spark_reads(
arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table)
tbl.append(arrow_table)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand Down Expand Up @@ -429,7 +429,7 @@ def get_data_files_count(identifier: str) -> int:

# writes 1 data file since the table is smaller than default target file size
assert arrow_table_with_null.nbytes < TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT
tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
assert get_data_files_count(identifier) == 1

# writes 1 data file as long as table is smaller than default target file size
Expand Down Expand Up @@ -820,7 +820,7 @@ def test_inspect_snapshots(
identifier = "default.table_metadata_snapshots"
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -979,6 +979,7 @@ def test_table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with

@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None:
identifier = "default.test_table_write_out_of_order_schema"
# rotate the schema fields by 1
Expand All @@ -989,6 +990,7 @@ def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_w
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=rotated_schema)

tbl.overwrite(arrow_table_with_null)

tbl.append(arrow_table_with_null)
# overwrite and then append should produce twice the data
assert len(tbl.scan().to_arrow()) == len(arrow_table_with_null) * 2
Expand Down

0 comments on commit b146ce6

Please sign in to comment.