From 781601600f64835b66d1a3757a16846f8d83de69 Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Wed, 10 Apr 2024 11:33:09 +0400 Subject: [PATCH 1/3] test(filesystem): add UNC file path test --- tests/te.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/te.py diff --git a/tests/te.py b/tests/te.py new file mode 100644 index 000000000..4219cad4f --- /dev/null +++ b/tests/te.py @@ -0,0 +1,45 @@ +import os +import posixpath +from typing import Iterator +import dlt +from dlt.sources import TDataItems + +from sources.filesystem import ( + FileItemDict, + filesystem, + readers, + read_csv, +) + +TESTS_BUCKET_URL = r"\\localhost\\c$\\git_reps" + +# import fsspec + +# with fsspec.open(TESTS_BUCKET_URL + "\\VendorProvince.csv") as f: +# print(f.read()) + +# exit() + + +def test_load_csv() -> None: + """Demonstrates how to scan folder with csv files, load them in chunk and merge on date column with the previous load""" + pipeline = dlt.pipeline( + pipeline_name="standard_filesystem_csv", + destination="duckdb", + dataset_name="file_data", + full_refresh=True, + ) + + data_file = ( + filesystem(bucket_url=TESTS_BUCKET_URL, file_glob="VendorProvince.csv") + | read_csv() + ) + + load_info = pipeline.run(data_file) + + print(load_info) + print(pipeline.last_trace.last_normalize_info) + + +test_load_csv() +# //localhost\c$\git_reps/VendorProvince.csv From 9f5bb567fdc9d6229cba7e562b83dfbc4fcb7280 Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Thu, 11 Apr 2024 13:25:23 +0400 Subject: [PATCH 2/3] add a UNC path test --- tests/filesystem/test_filesystem.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index 550f4c29d..38cc1c589 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -1,4 +1,5 @@ import os +import platform from typing import Any, Dict, List import dlt @@ -8,6 +9,7 @@ from sources.filesystem import ( filesystem, readers, + read_csv, FileItem, FileItemDict, ) @@ -255,3 +257,24 @@ def test_file_chunking() -> None: assert len(pipe_item.item) == 2 # no need to test more chunks break + + +@pytest.mark.skipif(platform.system() != "Windows", reason="Test it only on Windows") +def test_windows_unc_path() -> None: + bucket_url = r"\\localhost\\" + os.path.abspath( + "tests/filesystem/samples/csv" + ).replace(":", "$") + + pipeline = dlt.pipeline( + pipeline_name="unc_path_test", destination="duckdb", full_refresh=True + ) + + data_file = ( + filesystem(bucket_url=bucket_url, file_glob="freshman_kgs.csv") | read_csv() + ) + + load_info = pipeline.run(data_file) + assert_load_info(load_info) + + table_counts = load_table_counts(pipeline, "_read_csv") + assert table_counts["_read_csv"] == 67 From 07141087adb46f0121c673b16dda311e0d39281f Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Thu, 11 Apr 2024 13:45:57 +0400 Subject: [PATCH 3/3] delete test file --- tests/te.py | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 tests/te.py diff --git a/tests/te.py b/tests/te.py deleted file mode 100644 index 4219cad4f..000000000 --- a/tests/te.py +++ /dev/null @@ -1,45 +0,0 @@ -import os -import posixpath -from typing import Iterator -import dlt -from dlt.sources import TDataItems - -from sources.filesystem import ( - FileItemDict, - filesystem, - readers, - read_csv, -) - -TESTS_BUCKET_URL = r"\\localhost\\c$\\git_reps" - -# import fsspec - -# with fsspec.open(TESTS_BUCKET_URL + "\\VendorProvince.csv") as f: -# print(f.read()) - -# exit() - - -def test_load_csv() -> None: - """Demonstrates how to scan folder with csv files, load them in chunk and merge on date column with the previous load""" - pipeline = dlt.pipeline( - pipeline_name="standard_filesystem_csv", - destination="duckdb", - dataset_name="file_data", - full_refresh=True, - ) - - data_file = ( - filesystem(bucket_url=TESTS_BUCKET_URL, file_glob="VendorProvince.csv") - | read_csv() - ) - - load_info = pipeline.run(data_file) - - print(load_info) - print(pipeline.last_trace.last_normalize_info) - - -test_load_csv() -# //localhost\c$\git_reps/VendorProvince.csv