From e506f844e1b463fdabb3a81d43c12c22dd02935f Mon Sep 17 00:00:00 2001 From: lmangani Date: Wed, 1 Jan 2025 12:50:09 +0000 Subject: [PATCH] add clickhouse_scan --- Cargo.lock | 788 +++++++++++++++++++++++++++++++++++-- Cargo.toml | 6 +- src/clickhouse_scan.rs | 292 ++++++++++++++ src/lib.rs | 4 + test/sql/chsql_native.test | 6 + 5 files changed, 1072 insertions(+), 24 deletions(-) create mode 100644 src/clickhouse_scan.rs diff --git a/Cargo.lock b/Cargo.lock index b82f56d..7435f26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + [[package]] name = "adler2" version = "2.0.0" @@ -240,6 +249,21 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + [[package]] name = "base64" version = "0.22.1" @@ -284,7 +308,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", "syn_derive", ] @@ -367,14 +391,88 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "chsql_native" version = "0.1.0" dependencies = [ "byteorder", + "clickhouse-rs", "duckdb", "duckdb-loadable-macros", "libduckdb-sys", + "serde", + "serde_derive", + "serde_json", + "tokio", +] + +[[package]] +name = "clickhouse-rs" +version = "1.1.0-alpha.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "802fe62a5480415bcdbb5217b3ea029d748c9a3ce3b884767cf58888e33e7f65" +dependencies = [ + "byteorder", + "chrono", + "chrono-tz", + "clickhouse-rs-cityhash-sys", + "combine", + "crossbeam", + "either", + "futures-core", + "futures-sink", + "futures-util", + "hostname", + "lazy_static", + "log", + "lz4", + "percent-encoding", + "pin-project", + "thiserror", + "tokio", + "url", + "uuid", +] + +[[package]] +name = "clickhouse-rs-cityhash-sys" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4baf9d4700a28d6cb600e17ed6ae2b43298a5245f1f76b4eab63027ebfd592b9" +dependencies = [ + "cc", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", ] [[package]] @@ -423,6 +521,62 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crunchy" version = "0.2.2" @@ -450,7 +604,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.79", + "syn 2.0.93", ] [[package]] @@ -461,7 +615,18 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.79", + "syn 2.0.93", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", ] [[package]] @@ -493,9 +658,15 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", ] +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "equivalent" version = "1.0.1" @@ -552,12 +723,65 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + [[package]] name = "funty" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -569,6 +793,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + [[package]] name = "half" version = "2.4.1" @@ -625,6 +855,17 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + [[package]] name = "iana-time-zone" version = "0.1.61" @@ -648,12 +889,151 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.6.0" @@ -679,6 +1059,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "lexical-core" version = "1.0.2" @@ -762,7 +1148,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.79", + "syn 2.0.93", "tar", "vcpkg", ] @@ -790,11 +1176,45 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "log" version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +dependencies = [ + "serde", +] + +[[package]] +name = "lz4" +version = "1.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" [[package]] name = "memchr" @@ -811,6 +1231,17 @@ dependencies = [ "adler2", ] +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "num" version = "0.4.3" @@ -885,12 +1316,106 @@ dependencies = [ "libm", ] +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.31" @@ -913,7 +1438,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.79", + "syn 2.0.93", ] [[package]] @@ -950,9 +1475,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -1114,6 +1639,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustix" version = "0.38.37" @@ -1147,29 +1678,29 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.134" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" dependencies = [ "itoa", "memchr", @@ -1189,12 +1720,43 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -1232,7 +1794,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.79", + "syn 2.0.93", ] [[package]] @@ -1245,7 +1807,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.79", + "syn 2.0.93", ] [[package]] @@ -1261,9 +1823,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058" dependencies = [ "proc-macro2", "quote", @@ -1279,7 +1841,18 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", ] [[package]] @@ -1299,6 +1872,26 @@ dependencies = [ "xattr", ] +[[package]] +name = "thiserror" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -1308,6 +1901,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -1323,6 +1926,21 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokio" +version = "1.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.52.0", +] + [[package]] name = "toml_datetime" version = "0.6.8" @@ -1352,6 +1970,29 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.10.0" @@ -1398,7 +2039,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", "wasm-bindgen-shared", ] @@ -1420,7 +2061,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1431,6 +2072,28 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" @@ -1531,6 +2194,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -1551,6 +2226,30 @@ dependencies = [ "rustix", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -1569,5 +2268,48 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.93", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", ] diff --git a/Cargo.toml b/Cargo.toml index 62f2dad..2bfad2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,4 +24,8 @@ duckdb = { version = "1.1.1", features = ["vtab-loadable"] } duckdb-loadable-macros = "0.1.3" libduckdb-sys = { version = "1.1.1", features = ["loadable-extension"] } byteorder = "1.4" - +serde_derive = "1.0.217" +serde_json = "1.0.134" +tokio = { version = "1.42.0", features = ["rt", "rt-multi-thread"] } +serde = "1.0.217" +clickhouse-rs = "=1.1.0-alpha.1" diff --git a/src/clickhouse_scan.rs b/src/clickhouse_scan.rs new file mode 100644 index 0000000..17a6177 --- /dev/null +++ b/src/clickhouse_scan.rs @@ -0,0 +1,292 @@ +use std::{error::Error, ffi::CString}; +use duckdb::{ + core::{DataChunkHandle, LogicalTypeHandle, LogicalTypeId, Inserter}, + vtab::{BindInfo, Free, FunctionInfo, InitInfo, VTab}, + Connection, Result, +}; +use clickhouse_rs::{Pool, types::SqlType}; + +#[repr(C)] +struct ClickHouseScanBindData { + url: String, + user: String, + password: String, + query: String, +} + +impl Drop for ClickHouseScanBindData { + fn drop(&mut self) { + // Let Rust handle string cleanup + } +} + +impl Free for ClickHouseScanBindData { + fn free(&mut self) { + // println!("Freeing ClickHouseScanBindData..."); + // Strings will be dropped automatically via Drop trait + } +} + +#[repr(C)] +struct ClickHouseScanInitData { + runtime: tokio::runtime::Runtime, + block_data: Option>>, + column_types: Option>, + column_names: Option>, + current_row: usize, + total_rows: usize, + done: bool, +} + +impl Drop for ClickHouseScanInitData { + fn drop(&mut self) { + // Let Rust handle dropping the vectors safely + self.done = true; + // Clear optional vectors by taking ownership + self.block_data.take(); + self.column_types.take(); + self.column_names.take(); + } +} + +impl Free for ClickHouseScanInitData { + fn free(&mut self) { + // println!("Freeing ClickHouseScanInitData..."); + // Mark as done and prevent further processing + self.done = true; + self.current_row = self.total_rows; + } +} + +fn map_clickhouse_type(sql_type: SqlType) -> LogicalTypeId { + match sql_type { + SqlType::Int8 | SqlType::Int16 | SqlType::Int32 => LogicalTypeId::Integer, + SqlType::Int64 => LogicalTypeId::Bigint, + SqlType::UInt8 | SqlType::UInt16 | SqlType::UInt32 => LogicalTypeId::UInteger, + SqlType::UInt64 => LogicalTypeId::UBigint, + SqlType::Float32 => LogicalTypeId::Float, + SqlType::Float64 => LogicalTypeId::Double, + SqlType::String | SqlType::FixedString(_) => LogicalTypeId::Varchar, + SqlType::Date => LogicalTypeId::Date, + SqlType::DateTime(_) => LogicalTypeId::Timestamp, + SqlType::Bool => LogicalTypeId::Boolean, + _ => LogicalTypeId::Varchar, // Default to varchar for unknown types + } +} + +struct ClickHouseScanVTab; + +impl VTab for ClickHouseScanVTab { + type InitData = ClickHouseScanInitData; + type BindData = ClickHouseScanBindData; + + unsafe fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box> { + // println!("Binding ClickHouseScanVTab..."); + + if data.is_null() { + return Err("Invalid bind data pointer".into()); + } + + let query = bind.get_parameter(0).to_string(); + let url = bind.get_named_parameter("url") + .map(|v| v.to_string()) + .unwrap_or_else(|| std::env::var("CLICKHOUSE_URL").unwrap_or_else(|_| "tcp://localhost:9000".to_string())); + let user = bind.get_named_parameter("user") + .map(|v| v.to_string()) + .unwrap_or_else(|| std::env::var("CLICKHOUSE_USER").unwrap_or_else(|_| "default".to_string())); + let password = bind.get_named_parameter("password") + .map(|v| v.to_string()) + .unwrap_or_else(|| std::env::var("CLICKHOUSE_PASSWORD").unwrap_or_default()); + + println!("Parameters - URL: {}, User: {}, Query: {}", url, user, query); + + unsafe { + (*data) = ClickHouseScanBindData { + url, + user, + password, + query, + }; + } + + bind.add_result_column("version", LogicalTypeHandle::from(LogicalTypeId::Varchar)); + + Ok(()) + } + + unsafe fn init(info: &InitInfo, data: *mut Self::InitData) -> Result<(), Box> { + // println!("Initializing ClickHouseScanVTab..."); + + if data.is_null() { + return Err("Invalid init data pointer".into()); + } + + let bind_data = info.get_bind_data::(); + if bind_data.is_null() { + return Err("Invalid bind data".into()); + } + + let runtime = tokio::runtime::Runtime::new()?; + + let result = runtime.block_on(async { + // println!("Creating connection pool"); + let pool = Pool::new((*bind_data).url.clone()); + // println!("Getting client handle"); + let mut client = pool.get_handle().await?; + // println!("Executing query: {}", (*bind_data).query); + let block = client.query(&(*bind_data).query).fetch_all().await?; + + let columns = block.columns(); + let mut names = Vec::new(); + let mut types = Vec::new(); + let mut data: Vec> = Vec::new(); + + // Initialize data vectors for each column + for col in columns { + names.push(col.name().to_string()); + types.push(map_clickhouse_type(col.sql_type())); + data.push(Vec::new()); + } + + let mut row_count = 0; + for row in block.rows() { + for (col_idx, col) in columns.iter().enumerate() { + let value = match col.sql_type() { + SqlType::Int8 | SqlType::Int16 | SqlType::Int32 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + SqlType::Int64 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + SqlType::UInt8 | SqlType::UInt16 | SqlType::UInt32 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + SqlType::UInt64 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + SqlType::Float32 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + SqlType::Float64 => { + match row.get::(col.name()) { + Ok(val) => val.to_string(), + Err(_) => "NULL".to_string() + } + }, + _ => { + match row.get::(col.name()) { + Ok(val) => val, + Err(_) => "NULL".to_string() + } + } + }; + data[col_idx].push(value); + } + row_count += 1; + } + + Ok::<(Vec>, Vec, Vec, usize), Box>((data, names, types, row_count)) + })?; + + let (block_data, column_names, column_types, total_rows) = result; + + unsafe { + // println!("Storing result data"); + (*data) = ClickHouseScanInitData { + runtime, + block_data: Some(block_data), + column_types: Some(column_types), + column_names: Some(column_names), + current_row: 0, + total_rows, + done: false, + }; + } + + Ok(()) + } + + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { + // println!("Executing ClickHouseScanVTab function..."); + let init_data = func.get_init_data::(); + + if init_data.is_null() { + return Err("Invalid init data pointer".into()); + } + + unsafe { + if (*init_data).done || (*init_data).current_row >= (*init_data).total_rows { + output.set_len(0); + (*init_data).done = true; + return Ok(()); + } + + // Get references to the Option vectors + let block_data = match (*init_data).block_data.as_ref() { + Some(data) => data, + None => return Err("Block data is not available".into()), + }; + + let column_types = match (*init_data).column_types.as_ref() { + Some(types) => types, + None => return Err("Column types are not available".into()), + }; + + let batch_size = 1024.min((*init_data).total_rows - (*init_data).current_row); + + for col_idx in 0..column_types.len() { + let mut vector = output.flat_vector(col_idx); + let type_id = &column_types[col_idx]; + + match type_id { + LogicalTypeId::Integer | LogicalTypeId::UInteger => { + let slice = vector.as_mut_slice::(); + for row_offset in 0..batch_size { + let row_idx = (*init_data).current_row + row_offset; + if let Ok(val) = block_data[col_idx][row_idx].parse::() { + slice[row_offset] = val; + } else { + slice[row_offset] = 0; // Default value for parse failure + } + } + }, + _ => { + for row_offset in 0..batch_size { + let row_idx = (*init_data).current_row + row_offset; + vector.insert(row_offset, block_data[col_idx][row_idx].as_str()); + } + } + } + } + + (*init_data).current_row += batch_size; + output.set_len(batch_size); + } + Ok(()) + } + + fn parameters() -> Option> { + Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) + } +} + +pub fn register_clickhouse_scan(con: &Connection) -> Result<(), Box> { + // println!("Registering ClickHouseScanVTab..."); + con.register_table_function::("clickhouse_scan")?; + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index f2651ea..2519733 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,8 @@ use duckdb_loadable_macros::duckdb_entrypoint_c_api; use libduckdb_sys as ffi; use byteorder::{ReadBytesExt, LittleEndian}; +mod clickhouse_scan; + #[allow(dead_code)] #[derive(Debug)] enum ColumnType { @@ -394,5 +396,7 @@ impl VTab for ClickHouseVTab { #[duckdb_entrypoint_c_api(ext_name = "chsql_native", min_duckdb_version = "v0.0.1")] pub unsafe fn extension_entrypoint(con: Connection) -> Result<(), Box> { con.register_table_function::("clickhouse_native")?; + // Register new table function + clickhouse_scan::register_clickhouse_scan(&con)?; Ok(()) } diff --git a/test/sql/chsql_native.test b/test/sql/chsql_native.test index a38262b..7ccdfb5 100644 --- a/test/sql/chsql_native.test +++ b/test/sql/chsql_native.test @@ -18,3 +18,9 @@ query I SELECT count(*) FROM clickhouse_native('./numbers.clickhouse'); ---- 1 + +# Remote +query I +SELECT * FROM clickhouse_scan("SELECT 1"); +---- +1