-
Notifications
You must be signed in to change notification settings - Fork 175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(filesystem): UNC paths are not supported #1209
Changes from all commits
8e111c1
45d1d7b
e39b8bc
de82bb5
7c465bc
6fee728
f43a13a
e813a3c
95edfaf
50c6f2b
c045539
57a6b47
2f1346c
0b8e04e
7c1bdfb
e7a893f
fbcffbb
f111714
7c0a918
5232ce6
98b985f
e0ef3a9
d983976
3810f80
44cca36
23aee6d
556a256
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import os | ||
from typing import TYPE_CHECKING, Any, Literal, Optional, Type, get_args, ClassVar, Dict, Union | ||
from urllib.parse import urlparse | ||
import pathlib | ||
from typing import Any, Literal, Optional, Type, get_args, ClassVar, Dict, Union | ||
from urllib.parse import urlparse, unquote | ||
|
||
from dlt.common.configuration import configspec, resolve_type | ||
from dlt.common.configuration.exceptions import ConfigurationValueError | ||
|
@@ -87,24 +88,22 @@ class FilesystemConfiguration(BaseConfiguration): | |
@property | ||
def protocol(self) -> str: | ||
"""`bucket_url` protocol""" | ||
url = urlparse(self.bucket_url) | ||
# this prevents windows absolute paths to be recognized as schemas | ||
if not url.scheme or (os.path.isabs(self.bucket_url) and "\\" in self.bucket_url): | ||
if self.is_local_path(self.bucket_url): | ||
return "file" | ||
else: | ||
return url.scheme | ||
return urlparse(self.bucket_url).scheme | ||
|
||
def on_resolved(self) -> None: | ||
url = urlparse(self.bucket_url) | ||
if not url.path and not url.netloc: | ||
uri = urlparse(self.bucket_url) | ||
if not uri.path and not uri.netloc: | ||
raise ConfigurationValueError( | ||
"File path or netloc missing. Field bucket_url of FilesystemClientConfiguration" | ||
" must contain valid url with a path or host:password component." | ||
"File path and netloc are missing. Field bucket_url of" | ||
" FilesystemClientConfiguration must contain valid uri with a path or host:password" | ||
" component." | ||
) | ||
# this is just a path in a local file system | ||
if url.path == self.bucket_url: | ||
url = url._replace(scheme="file") | ||
self.bucket_url = url.geturl() | ||
if self.is_local_path(self.bucket_url): | ||
self.bucket_url = self.make_file_uri(self.bucket_url) | ||
|
||
@resolve_type("credentials") | ||
def resolve_credentials_type(self) -> Type[CredentialsConfiguration]: | ||
|
@@ -117,11 +116,57 @@ def fingerprint(self) -> str: | |
|
||
def __str__(self) -> str: | ||
"""Return displayable destination location""" | ||
url = urlparse(self.bucket_url) | ||
uri = urlparse(self.bucket_url) | ||
# do not show passwords | ||
if url.password: | ||
new_netloc = f"{url.username}:****@{url.hostname}" | ||
if url.port: | ||
new_netloc += f":{url.port}" | ||
return url._replace(netloc=new_netloc).geturl() | ||
if uri.password: | ||
new_netloc = f"{uri.username}:****@{uri.hostname}" | ||
if uri.port: | ||
new_netloc += f":{uri.port}" | ||
return uri._replace(netloc=new_netloc).geturl() | ||
return self.bucket_url | ||
|
||
@staticmethod | ||
def is_local_path(uri: str) -> bool: | ||
"""Checks if `uri` is a local path, without a schema""" | ||
uri_parsed = urlparse(uri) | ||
# this prevents windows absolute paths to be recognized as schemas | ||
return not uri_parsed.scheme or os.path.isabs(uri) | ||
|
||
@staticmethod | ||
def make_local_path(file_uri: str) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could have been in some dlt.common.destination.utils|filesystem helpers |
||
"""Gets a valid local filesystem path from file:// scheme. | ||
Supports POSIX/Windows/UNC paths | ||
|
||
Returns: | ||
str: local filesystem path | ||
""" | ||
uri = urlparse(file_uri) | ||
if uri.scheme != "file": | ||
raise ValueError(f"Must be file scheme but is {uri.scheme}") | ||
if not uri.path and not uri.netloc: | ||
raise ConfigurationValueError("File path and netloc are missing.") | ||
local_path = unquote(uri.path) | ||
if uri.netloc: | ||
# or UNC file://localhost/path | ||
local_path = "//" + unquote(uri.netloc) + local_path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it intended to be |
||
else: | ||
# if we are on windows, strip the POSIX root from path which is always absolute | ||
if os.path.sep != local_path[0]: | ||
# filesystem root | ||
if local_path == "/": | ||
return str(pathlib.Path("/").resolve()) | ||
# this prevents /C:/ or ///share/ where both POSIX and Windows root are present | ||
if os.path.isabs(local_path[1:]): | ||
local_path = local_path[1:] | ||
return str(pathlib.Path(local_path)) | ||
|
||
@staticmethod | ||
def make_file_uri(local_path: str) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could have been in some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are right that this needs a better place. we have file utils in |
||
"""Creates a normalized file:// uri from a local path | ||
|
||
netloc is never set. UNC paths are represented as file://host/path | ||
""" | ||
p_ = pathlib.Path(local_path) | ||
p_ = p_.expanduser().resolve() | ||
# return "file:///" + p_.as_posix().lstrip("/") | ||
return p_.as_uri() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
File path and netloc are missing.
will confuse people, to avoid this should we mention that we are usingurlparse
orURI
?