From 0b17c95c0a5c77900d57e5589d4d9224dbeb780a Mon Sep 17 00:00:00 2001 From: Alvaro Huarte Date: Mon, 21 Oct 2024 23:59:54 +0200 Subject: [PATCH 1/2] elegant headers & params syntax --- src/http_client_extension.cpp | 42 ++++++++++++++++++++--------------- test/sql/httpclient.test | 39 ++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/http_client_extension.cpp b/src/http_client_extension.cpp index 5e6f93c..c71c5e5 100644 --- a/src/http_client_extension.cpp +++ b/src/http_client_extension.cpp @@ -1,6 +1,8 @@ #define DUCKDB_EXTENSION_MAIN #include "http_client_extension.hpp" #include "duckdb.hpp" +#include "duckdb/common/types.hpp" +#include "duckdb/common/vector_operations/generic_executor.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/main/extension_util.hpp" #include "duckdb/common/atomic.hpp" @@ -118,14 +120,18 @@ static void HTTPGetRequestFunction(DataChunk &args, ExpressionState &state, Vect static void HTTPPostRequestFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.data.size() == 3); + using STRING_TYPE = PrimitiveType; + using LENTRY_TYPE = PrimitiveType; + auto &url_vector = args.data[0]; auto &headers_vector = args.data[1]; + auto &headers_entry = ListVector::GetEntry(headers_vector); auto &body_vector = args.data[2]; - TernaryExecutor::Execute( + GenericExecutor::ExecuteTernary( url_vector, headers_vector, body_vector, result, args.size(), - [&](string_t url, string_t headers, string_t body) { - std::string url_str = url.GetString(); + [&](STRING_TYPE url, LENTRY_TYPE headers, STRING_TYPE body) { + std::string url_str = url.val.GetString(); // Use helper to setup client and parse URL auto client_and_path = SetupHttpClient(url_str); @@ -134,24 +140,24 @@ static void HTTPPostRequestFunction(DataChunk &args, ExpressionState &state, Vec // Prepare headers duckdb_httplib_openssl::Headers header_map; - std::istringstream header_stream(headers.GetString()); - std::string header; - while (std::getline(header_stream, header)) { - size_t colon_pos = header.find(':'); - if (colon_pos != std::string::npos) { - std::string key = header.substr(0, colon_pos); - std::string value = header.substr(colon_pos + 1); - // Trim leading and trailing whitespace - key.erase(0, key.find_first_not_of(" \t")); - key.erase(key.find_last_not_of(" \t") + 1); - value.erase(0, value.find_first_not_of(" \t")); - value.erase(value.find_last_not_of(" \t") + 1); - header_map.emplace(key, value); + auto header_list = headers.val; + for (idx_t i = header_list.offset; i < header_list.offset + header_list.length; i++) { + const auto &child_value = headers_entry.GetValue(i); + + Vector tmp(child_value); + auto &children = StructVector::GetEntries(tmp); + + if (children.size() == 2) { + auto name = FlatVector::GetData(*children[0]); + auto data = FlatVector::GetData(*children[1]); + std::string key = name->GetString(); + std::string val = data->GetString(); + header_map.emplace(key, val); } } // Make the POST request with headers and body - auto res = client.Post(path.c_str(), header_map, body.GetString(), "application/json"); + auto res = client.Post(path.c_str(), header_map, body.val.GetString(), "application/json"); if (res) { if (res->status == 200) { return StringVector::AddString(result, res->body); @@ -175,7 +181,7 @@ static void LoadInternal(DatabaseInstance &instance) { ScalarFunctionSet http_post("http_post"); http_post.AddFunction(ScalarFunction( - {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, + {LogicalType::VARCHAR, LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR), LogicalType::JSON()}, LogicalType::VARCHAR, HTTPPostRequestFunction)); ExtensionUtil::RegisterFunction(instance, http_post); } diff --git a/test/sql/httpclient.test b/test/sql/httpclient.test index d542032..69049ab 100644 --- a/test/sql/httpclient.test +++ b/test/sql/httpclient.test @@ -44,8 +44,9 @@ WITH __input AS ( 'https://httpbin.org/delay/0', headers => MAP { 'accept': 'application/json', - }::VARCHAR, - params => MAP {}::VARCHAR + }, + params => MAP { + } ) AS data ), __features AS ( @@ -62,3 +63,37 @@ WITH __input AS ( ; ---- httpbin.org + +# Confirm the POST extension works with headers and params +query I +WITH __input AS ( + SELECT + http_post( + 'https://earth-search.aws.element84.com/v0/search', + headers => MAP { + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip', + 'Accept': 'application/geo+json' + }, + params => { + 'collections': ['sentinel-s2-l2a-cogs'], + 'ids': ['S2A_56LPN_20210930_0_L2A'], + 'datetime': '2021-09-30/2021-09-30', + 'limit': 10 + } + ) AS data + ), + __features AS ( + SELECT + unnest( from_json((data::JSON)->'features', '["json"]') ) + AS features + FROM + __input + ) + SELECT + features->>'id' AS id + FROM + __features + ; +---- +S2A_56LPN_20210930_0_L2A From 12d94e61433062bdadea91578404ec51d7c202f7 Mon Sep 17 00:00:00 2001 From: Alvaro Huarte Date: Tue, 22 Oct 2024 00:17:12 +0200 Subject: [PATCH 2/2] Update README.md --- docs/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index cbb42ef..b6a00b9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -46,8 +46,9 @@ WITH __input AS ( 'https://httpbin.org/delay/0', headers => MAP { 'accept': 'application/json', - }::VARCHAR, - params => MAP {}::VARCHAR + }, + params => MAP { + } ) AS data ), __features AS (