Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Treat warning as error in CI/Dev #973

Merged
merged 2 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,18 @@ classifiers = [
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11"
"Programming Language :: Python :: 3.11",
]
packages = [
{ include = "pyiceberg" },
{ from = "vendor", include = "fb303" },
{ from = "vendor", include = "hive_metastore" },
{ include = "tests", format = "sdist" },
{ include = "Makefile", format = "sdist" },
{ include = "NOTICE", format = ["sdist", "wheel"] }
{ include = "NOTICE", format = [
"sdist",
"wheel",
] },
]
include = [
{ path = "dev", format = "sdist" },
Expand All @@ -62,8 +65,8 @@ pyarrow = { version = ">=9.0.0,<18.0.0", optional = true }
pandas = { version = ">=1.0.0,<3.0.0", optional = true }
duckdb = { version = ">=0.5.0,<2.0.0", optional = true }
ray = [
{ version = "==2.10.0", python = "<3.9", optional = true},
{ version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true}
{ version = "==2.10.0", python = "<3.9", optional = true },
{ version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true },
]
python-snappy = { version = ">=0.6.0,<1.0.0", optional = true }
thrift = { version = ">=0.13.0,<1.0.0", optional = true }
Expand Down Expand Up @@ -599,13 +602,17 @@ markers = [
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
"adlfs: marks a test as requiring access to adlfs compliant storage (use with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)",
"integration: marks integration tests against Apache Spark",
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)"
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
]

# Turns a warning into an error
#filterwarnings = [
# "error"
#]
filterwarnings = [
"error",
"ignore:A plugin raised an exception during an old-style hookwrapper teardown.",
"ignore:unclosed <socket.socket",
# Remove this in a future release of PySpark.
"ignore:distutils Version classes are deprecated. Use packaging.version instead.",
]

[tool.black]
line-length = 130
Expand Down
2 changes: 1 addition & 1 deletion tests/catalog/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier)
namespace = Catalog.namespace_from(table_identifier_nocatalog)
catalog.create_namespace(namespace)
table = catalog.create_table(table_identifier, pyarrow_table.schema)
table.overwrite(pyarrow_table)
table.append(pyarrow_table)


@pytest.mark.parametrize(
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/test_deletes.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_rewrite_partitioned_table_with_null(spark: SparkSession, session_catalo

@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCatalog, format_version: int) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -175,6 +176,7 @@ def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCa


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_delete_partitioned_table_positional_deletes(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -223,6 +225,7 @@ def test_delete_partitioned_table_positional_deletes(spark: SparkSession, sessio


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete"

Expand Down Expand Up @@ -274,6 +277,7 @@ def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestC


@pytest.mark.integration
@pytest.mark.filterwarnings("ignore:Merge on read is not yet supported, falling back to copy-on-write")
def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSession, session_catalog: RestCatalog) -> None:
identifier = "default.table_partitioned_delete_sequence_number"

Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_inspect_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_inspect_snapshots(
identifier = "default.table_metadata_snapshots"
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_inspect_refs(
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

# write data to create snapshot
tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)

# create a test branch
spark.sql(
Expand Down Expand Up @@ -667,7 +667,7 @@ def test_inspect_files(

tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)

# append more data
tbl.append(arrow_table_with_null)
Expand Down
16 changes: 9 additions & 7 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w
identifier = "default.arrow_data_files"
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [])

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -288,7 +288,7 @@ def get_current_snapshot_id(identifier: str) -> int:
.snapshot_id
)

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
tbl.overwrite(arrow_table_with_null)
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore
Expand Down Expand Up @@ -330,7 +330,7 @@ def test_python_writes_special_character_column_with_spark_reads(
arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table_with_special_character_column)
tbl.append(arrow_table_with_special_character_column)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand All @@ -354,7 +354,7 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads(

tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table)
tbl.append(arrow_table)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand Down Expand Up @@ -393,7 +393,7 @@ def test_python_writes_with_small_and_large_types_spark_reads(
arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)

tbl.overwrite(arrow_table)
tbl.append(arrow_table)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
Expand Down Expand Up @@ -429,7 +429,7 @@ def get_data_files_count(identifier: str) -> int:

# writes 1 data file since the table is smaller than default target file size
assert arrow_table_with_null.nbytes < TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT
tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
assert get_data_files_count(identifier) == 1

# writes 1 data file as long as table is smaller than default target file size
Expand Down Expand Up @@ -820,7 +820,7 @@ def test_inspect_snapshots(
identifier = "default.table_metadata_snapshots"
tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version})

tbl.overwrite(arrow_table_with_null)
tbl.append(arrow_table_with_null)
# should produce a DELETE entry
tbl.overwrite(arrow_table_with_null)
# Since we don't rewrite, this should produce a new manifest with an ADDED entry
Expand Down Expand Up @@ -979,6 +979,7 @@ def test_table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with

@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
@pytest.mark.filterwarnings("ignore:Delete operation did not match any records")
def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None:
identifier = "default.test_table_write_out_of_order_schema"
# rotate the schema fields by 1
Expand All @@ -989,6 +990,7 @@ def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_w
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=rotated_schema)

tbl.overwrite(arrow_table_with_null)

tbl.append(arrow_table_with_null)
# overwrite and then append should produce twice the data
assert len(tbl.scan().to_arrow()) == len(arrow_table_with_null) * 2
Expand Down