diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index 62a65f79bf..a3e2d85c4a 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -31,7 +31,11 @@ concurrency: jobs: integration-test: - runs-on: ubuntu-22.04 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-22.04", "windows-2022" ] steps: - uses: actions/checkout@v4 diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index aa27796081..1f5fe279bb 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -30,6 +30,7 @@ import itertools import logging import os +import platform import re import uuid import warnings @@ -56,6 +57,7 @@ cast, ) from urllib.parse import urlparse +from urllib.request import url2pathname import numpy as np import pyarrow as pa @@ -342,6 +344,12 @@ def parse_location(location: str) -> Tuple[str, str, str]: return "file", uri.netloc, os.path.abspath(location) elif uri.scheme in ("hdfs", "viewfs"): return uri.scheme, uri.netloc, uri.path + elif uri.scheme and uri.scheme.lower() in "abcdefghijklmnopqrstuvwxyz" and platform.system() == "Windows": + return "file", "", os.path.abspath(location) + elif uri.scheme == "file" and platform.system() == "Windows": + netloc = rf"\\{uri.netloc}" if uri.netloc else uri.netloc + path = url2pathname(f"{netloc}{uri.path}") + return uri.scheme, uri.netloc, path else: return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}" diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index e4017e1df5..43f7b5e6f0 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -1701,6 +1701,17 @@ def check_results(location: str, expected_schema: str, expected_netloc: str, exp check_results("/root/foo.txt", "file", "", "/root/foo.txt") check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt") + with patch("pyiceberg.io.pyarrow.platform") as mock_platform: + with patch("pyiceberg.io.pyarrow.os") as mock_os: + with patch("pyiceberg.io.pyarrow.url2pathname") as mock_url2pathname: + windows_paths = [r"\\nfs_server\root\tmp\foo.txt", r"C:\root\tmp\foo.txt"] + mock_platform.system.return_value = "Windows" + mock_os.path.abspath.side_effect = windows_paths + mock_url2pathname.side_effect = windows_paths + check_results(r"\\nfs_server\root\tmp\foo.txt", "file", "", r"\\nfs_server\root\tmp\foo.txt") + check_results(r"file://nfs_server/root/tmp/foo.txt", "file", "nfs_server", r"\\nfs_server\root\tmp\foo.txt") + check_results(r"C:\root\tmp\foo.txt", "file", "", r"C:\root\tmp\foo.txt") + check_results(r"file:///C:/root/tmp/foo.txt", "file", "", r"C:\root\tmp\foo.txt") def test_make_compatible_name() -> None: