diff --git a/src/azure_dfs_filesystem.cpp b/src/azure_dfs_filesystem.cpp index 739078c..27966e3 100644 --- a/src/azure_dfs_filesystem.cpp +++ b/src/azure_dfs_filesystem.cpp @@ -20,9 +20,11 @@ namespace duckdb { const string AzureDfsStorageFileSystem::SCHEME = "abfss"; const string AzureDfsStorageFileSystem::PATH_PREFIX = "abfss://"; +const string AzureDfsStorageFileSystem::UNSECURE_SCHEME = "abfs"; +const string AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX = "abfs://"; inline static bool IsDfsScheme(const string &fpath) { - return fpath.rfind("abfss://", 0) == 0; + return fpath.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 || fpath.rfind(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX, 0) == 0; } static void Walk(const Azure::Storage::Files::DataLake::DataLakeFileSystemClient &fs, const std::string &path, diff --git a/src/azure_parsed_url.cpp b/src/azure_parsed_url.cpp index 2eb2be0..ccc013a 100644 --- a/src/azure_parsed_url.cpp +++ b/src/azure_parsed_url.cpp @@ -7,14 +7,16 @@ namespace duckdb { AzureParsedUrl ParseUrl(const std::string &url) { constexpr auto invalid_url_format = "The URL %s does not match the expected formats: (azure|az):///[] or the fully qualified one: " - "(abfss|azure|az)://.//[] " - "or abfss://@./[]"; + "(abfs[s]|azure|az)://.//[] " + "or abfs[s]://@./[]"; bool is_fully_qualified; std::string container, storage_account_name, endpoint, prefix, path; if (url.rfind("azure://", 0) != 0 && url.rfind("az://", 0) != 0 && - url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) != 0) { - throw IOException("URL needs to start with azure:// or az:// or %s", AzureDfsStorageFileSystem::PATH_PREFIX); + url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) != 0 && url.rfind(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX, 0) != 0) { + throw IOException("URL needs to start with azure:// or az:// or %s or %s", + AzureDfsStorageFileSystem::PATH_PREFIX, + AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX); } const auto prefix_end_pos = url.find("//") + 2; @@ -31,9 +33,12 @@ AzureParsedUrl ParseUrl(const std::string &url) { if (dot_pos != std::string::npos && dot_pos < slash_pos) { is_fully_qualified = true; - if (url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 && + if (( + url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 || + url.rfind(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX, 0) == 0 + ) && at_pos != std::string::npos) { - // syntax is abfss://@./[] + // syntax is abfs[s]://@./[] const auto path_slash_pos = url.find('/', prefix_end_pos + 1); if (path_slash_pos == string::npos) { throw IOException(invalid_url_format, url); @@ -44,7 +49,7 @@ AzureParsedUrl ParseUrl(const std::string &url) { endpoint = url.substr(dot_pos + 1, path_slash_pos - dot_pos - 1); path = url.substr(path_slash_pos + 1); } else { - // syntax is (abfss|azure|az)://.//[] + // syntax is (abfs[s]|azure|az)://.//[] const auto container_slash_pos = url.find('/', dot_pos); if (container_slash_pos == string::npos) { throw IOException(invalid_url_format, url); diff --git a/src/azure_secret.cpp b/src/azure_secret.cpp index ba24951..e9f08f0 100644 --- a/src/azure_secret.cpp +++ b/src/azure_secret.cpp @@ -36,6 +36,7 @@ static unique_ptr CreateAzureSecretFromConfig(ClientContext &context scope.push_back("azure://"); scope.push_back("az://"); scope.push_back(AzureDfsStorageFileSystem::PATH_PREFIX); + scope.push_back(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX); } auto result = make_uniq(scope, input.type, input.provider, input.name); @@ -61,6 +62,7 @@ static unique_ptr CreateAzureSecretFromCredentialChain(ClientContext scope.push_back("azure://"); scope.push_back("az://"); scope.push_back(AzureDfsStorageFileSystem::PATH_PREFIX); + scope.push_back(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX); } auto result = make_uniq(scope, input.type, input.provider, input.name); @@ -85,6 +87,7 @@ static unique_ptr CreateAzureSecretFromServicePrincipal(ClientContex scope.push_back("azure://"); scope.push_back("az://"); scope.push_back(AzureDfsStorageFileSystem::PATH_PREFIX); + scope.push_back(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX); } auto result = make_uniq(scope, input.type, input.provider, input.name); @@ -114,6 +117,7 @@ static unique_ptr CreateAzureSecretFromAccessToken(ClientContext &co scope.push_back("azure://"); scope.push_back("az://"); scope.push_back(AzureDfsStorageFileSystem::PATH_PREFIX); + scope.push_back(AzureDfsStorageFileSystem::UNSECURE_PATH_PREFIX); } auto result = make_uniq(scope, input.type, input.provider, input.name); diff --git a/src/azure_storage_account_client.cpp b/src/azure_storage_account_client.cpp index a1825f0..e0e2293 100644 --- a/src/azure_storage_account_client.cpp +++ b/src/azure_storage_account_client.cpp @@ -591,8 +591,8 @@ ConnectToDfsStorageAccount(optional_ptr opener, const std::string &p if (!azure_parsed_url.is_fully_qualified) { throw InvalidInputException( - "Cannot identified the storage account from path '%s'. To connect anonymously to a " - "storage account easier a fully qualified path has to be provided or secret must be create.", + "Cannot identify the storage account from path '%s'. To connect anonymously to a " + "storage account easier a fully qualified path has to be provided or secret must be created.", path); } diff --git a/src/include/azure_dfs_filesystem.hpp b/src/include/azure_dfs_filesystem.hpp index 8f35dc7..9105b4e 100644 --- a/src/include/azure_dfs_filesystem.hpp +++ b/src/include/azure_dfs_filesystem.hpp @@ -51,6 +51,8 @@ class AzureDfsStorageFileSystem : public AzureStorageFileSystem { public: static const string SCHEME; static const string PATH_PREFIX; + static const string UNSECURE_SCHEME; + static const string UNSECURE_PATH_PREFIX; protected: // From AzureFilesystem diff --git a/test/sql/cloud/hierarchical_namespace.test b/test/sql/cloud/hierarchical_namespace.test index 8a4608e..8e72955 100644 --- a/test/sql/cloud/hierarchical_namespace.test +++ b/test/sql/cloud/hierarchical_namespace.test @@ -63,18 +63,36 @@ SELECT count(*) FROM 'abfss://testing-private/partitioned/l_receipmonth=1997/l_s ---- 1291 +# Check with absolute path using unsecure abfs +query I +SELECT count(*) FROM 'abfs://testing-private/partitioned/l_receipmonth=1997/l_shipmode=TRUCK/data_0.csv'; +---- +1291 + # Check fully qualified name query I SELECT count(*) FROM 'abfss://${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/testing-private/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv'; ---- 2317 +# Check fully qualified name using unsecure abfs +query I +SELECT count(*) FROM 'abfs://${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/testing-private/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv'; +---- +2317 + # Check fully qualified name abfss alternative syntax query I SELECT count(*) FROM 'abfss://testing-private@${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv'; ---- 2317 +# Check fully qualified name abfs alternative syntax +query I +SELECT count(*) FROM 'abfs://testing-private@${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv'; +---- +2317 + # Enable http info for the explain analyze statement statement ok SET azure_http_stats = true; @@ -84,6 +102,11 @@ EXPLAIN ANALYZE SELECT count(*) FROM 'abfss://testing-private/partitioned/l_rece ---- analyzed_plan :.*HTTP Stats.*in\: 322\.0 KiB.*\#HEAD\: 1.*GET\: 4.*PUT\: 0.*\#POST\: 0.* +query II +EXPLAIN ANALYZE SELECT count(*) FROM 'abfs://testing-private/partitioned/l_receipmonth=*7/l_shipmode=TRUCK/*.csv'; +---- +analyzed_plan :.*HTTP Stats.*in\: 322\.0 KiB.*\#HEAD\: 1.*GET\: 4.*PUT\: 0.*\#POST\: 0.* + query II EXPLAIN ANALYZE SELECT count(*) FROM 'azure://testing-private/partitioned/l_receipmonth=*7/l_shipmode=TRUCK/*.csv';