From cb58fbc703468845b36cff85e0f20bee5facd34f Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sun, 25 Feb 2024 15:36:32 +0100 Subject: [PATCH 01/38] WIP --- poetry.lock | 5997 +++++++++++++------------- pyproject.toml | 1 + sources/sql_database/exceptions.py | 46 + sources/sql_database/pg_cdc_utils.py | 250 ++ sources/sql_database_pipeline.py | 69 +- 5 files changed, 3279 insertions(+), 3084 deletions(-) create mode 100644 sources/sql_database/exceptions.py create mode 100644 sources/sql_database/pg_cdc_utils.py diff --git a/poetry.lock b/poetry.lock index 5af4adf6e..8d68a9f37 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,15 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + [[package]] name = "adlfs" version = "2023.9.0" description = "Access Azure Datalake Gen1 with fsspec and dask" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "adlfs-2023.9.0-py3-none-any.whl", hash = "sha256:e2cff62b8128578c6d1b9da1660ad4c8a5a8cb0d491bba416b529563c65dc5d2"}, + {file = "adlfs-2023.9.0.tar.gz", hash = "sha256:1ce70ffa39f7cffc3efbbd9f79b444958eb5d9de9981442b06e47472d2089d4b"}, +] [package.dependencies] aiohttp = ">=3.7.0" @@ -21,9 +26,12 @@ docs = ["furo", "myst-parser", "numpydoc", "sphinx"] name = "aiobotocore" version = "2.5.4" description = "Async client for aws services using botocore and aiohttp" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "aiobotocore-2.5.4-py3-none-any.whl", hash = "sha256:4b32218728ca3d0be83835b604603a0cd6c329066e884bb78149334267f92440"}, + {file = "aiobotocore-2.5.4.tar.gz", hash = "sha256:60341f19eda77e41e1ab11eef171b5a98b5dbdb90804f5334b6f90e560e31fae"}, +] [package.dependencies] aiohttp = ">=3.3.1,<4.0.0" @@ -39,9 +47,97 @@ boto3 = ["boto3 (>=1.28.17,<1.28.18)"] name = "aiohttp" version = "3.8.6" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"}, + {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"}, + {file = "aiohttp-3.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:253bf92b744b3170eb4c4ca2fa58f9c4b87aeb1df42f71d4e78815e6e8b73c9e"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fd194939b1f764d6bb05490987bfe104287bbf51b8d862261ccf66f48fb4096"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c5f938d199a6fdbdc10bbb9447496561c3a9a565b43be564648d81e1102ac22"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2817b2f66ca82ee699acd90e05c95e79bbf1dc986abb62b61ec8aaf851e81c93"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa375b3d34e71ccccf172cab401cd94a72de7a8cc01847a7b3386204093bb47"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de50a199b7710fa2904be5a4a9b51af587ab24c8e540a7243ab737b45844543"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1d8cb0b56b3587c5c01de3bf2f600f186da7e7b5f7353d1bf26a8ddca57f965"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8e31e9db1bee8b4f407b77fd2507337a0a80665ad7b6c749d08df595d88f1cf5"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7bc88fc494b1f0311d67f29fee6fd636606f4697e8cc793a2d912ac5b19aa38d"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ec00c3305788e04bf6d29d42e504560e159ccaf0be30c09203b468a6c1ccd3b2"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad1407db8f2f49329729564f71685557157bfa42b48f4b93e53721a16eb813ed"}, + {file = "aiohttp-3.8.6-cp310-cp310-win32.whl", hash = "sha256:ccc360e87341ad47c777f5723f68adbb52b37ab450c8bc3ca9ca1f3e849e5fe2"}, + {file = "aiohttp-3.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:93c15c8e48e5e7b89d5cb4613479d144fda8344e2d886cf694fd36db4cc86865"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e2f9cc8e5328f829f6e1fb74a0a3a939b14e67e80832975e01929e320386b34"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e6a00ffcc173e765e200ceefb06399ba09c06db97f401f920513a10c803604ca"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41bdc2ba359032e36c0e9de5a3bd00d6fb7ea558a6ce6b70acedf0da86458321"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14cd52ccf40006c7a6cd34a0f8663734e5363fd981807173faf3a017e202fec9"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d5b785c792802e7b275c420d84f3397668e9d49ab1cb52bd916b3b3ffcf09ad"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1bed815f3dc3d915c5c1e556c397c8667826fbc1b935d95b0ad680787896a358"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96603a562b546632441926cd1293cfcb5b69f0b4159e6077f7c7dbdfb686af4d"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d76e8b13161a202d14c9584590c4df4d068c9567c99506497bdd67eaedf36403"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e3f1e3f1a1751bb62b4a1b7f4e435afcdade6c17a4fd9b9d43607cebd242924a"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:76b36b3124f0223903609944a3c8bf28a599b2cc0ce0be60b45211c8e9be97f8"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a2ece4af1f3c967a4390c284797ab595a9f1bc1130ef8b01828915a05a6ae684"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:16d330b3b9db87c3883e565340d292638a878236418b23cc8b9b11a054aaa887"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42c89579f82e49db436b69c938ab3e1559e5a4409eb8639eb4143989bc390f2f"}, + {file = "aiohttp-3.8.6-cp311-cp311-win32.whl", hash = "sha256:efd2fcf7e7b9d7ab16e6b7d54205beded0a9c8566cb30f09c1abe42b4e22bdcb"}, + {file = "aiohttp-3.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:3b2ab182fc28e7a81f6c70bfbd829045d9480063f5ab06f6e601a3eddbbd49a0"}, + {file = "aiohttp-3.8.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fdee8405931b0615220e5ddf8cd7edd8592c606a8e4ca2a00704883c396e4479"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d25036d161c4fe2225d1abff2bd52c34ed0b1099f02c208cd34d8c05729882f0"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d791245a894be071d5ab04bbb4850534261a7d4fd363b094a7b9963e8cdbd31"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cccd1de239afa866e4ce5c789b3032442f19c261c7d8a01183fd956b1935349"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f13f60d78224f0dace220d8ab4ef1dbc37115eeeab8c06804fec11bec2bbd07"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a9b5a0606faca4f6cc0d338359d6fa137104c337f489cd135bb7fbdbccb1e39"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:13da35c9ceb847732bf5c6c5781dcf4780e14392e5d3b3c689f6d22f8e15ae31"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:4d4cbe4ffa9d05f46a28252efc5941e0462792930caa370a6efaf491f412bc66"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:229852e147f44da0241954fc6cb910ba074e597f06789c867cb7fb0621e0ba7a"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:713103a8bdde61d13490adf47171a1039fd880113981e55401a0f7b42c37d071"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:45ad816b2c8e3b60b510f30dbd37fe74fd4a772248a52bb021f6fd65dff809b6"}, + {file = "aiohttp-3.8.6-cp36-cp36m-win32.whl", hash = "sha256:2b8d4e166e600dcfbff51919c7a3789ff6ca8b3ecce16e1d9c96d95dd569eb4c"}, + {file = "aiohttp-3.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0912ed87fee967940aacc5306d3aa8ba3a459fcd12add0b407081fbefc931e53"}, + {file = "aiohttp-3.8.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2a988a0c673c2e12084f5e6ba3392d76c75ddb8ebc6c7e9ead68248101cd446"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebf3fd9f141700b510d4b190094db0ce37ac6361a6806c153c161dc6c041ccda"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3161ce82ab85acd267c8f4b14aa226047a6bee1e4e6adb74b798bd42c6ae1f80"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d95fc1bf33a9a81469aa760617b5971331cdd74370d1214f0b3109272c0e1e3c"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c43ecfef7deaf0617cee936836518e7424ee12cb709883f2c9a1adda63cc460"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca80e1b90a05a4f476547f904992ae81eda5c2c85c66ee4195bb8f9c5fb47f28"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:90c72ebb7cb3a08a7f40061079817133f502a160561d0675b0a6adf231382c92"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb54c54510e47a8c7c8e63454a6acc817519337b2b78606c4e840871a3e15349"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:de6a1c9f6803b90e20869e6b99c2c18cef5cc691363954c93cb9adeb26d9f3ae"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:a3628b6c7b880b181a3ae0a0683698513874df63783fd89de99b7b7539e3e8a8"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fc37e9aef10a696a5a4474802930079ccfc14d9f9c10b4662169671ff034b7df"}, + {file = "aiohttp-3.8.6-cp37-cp37m-win32.whl", hash = "sha256:f8ef51e459eb2ad8e7a66c1d6440c808485840ad55ecc3cafefadea47d1b1ba2"}, + {file = "aiohttp-3.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b2fe42e523be344124c6c8ef32a011444e869dc5f883c591ed87f84339de5976"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e2ee0ac5a1f5c7dd3197de309adfb99ac4617ff02b0603fd1e65b07dc772e4b"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01770d8c04bd8db568abb636c1fdd4f7140b284b8b3e0b4584f070180c1e5c62"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c68330a59506254b556b99a91857428cab98b2f84061260a67865f7f52899f5"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89341b2c19fb5eac30c341133ae2cc3544d40d9b1892749cdd25892bbc6ac951"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71783b0b6455ac8f34b5ec99d83e686892c50498d5d00b8e56d47f41b38fbe04"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f628dbf3c91e12f4d6c8b3f092069567d8eb17814aebba3d7d60c149391aee3a"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04691bc6601ef47c88f0255043df6f570ada1a9ebef99c34bd0b72866c217ae"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee912f7e78287516df155f69da575a0ba33b02dd7c1d6614dbc9463f43066e3"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9c19b26acdd08dd239e0d3669a3dddafd600902e37881f13fbd8a53943079dbc"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:99c5ac4ad492b4a19fc132306cd57075c28446ec2ed970973bbf036bcda1bcc6"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f0f03211fd14a6a0aed2997d4b1c013d49fb7b50eeb9ffdf5e51f23cfe2c77fa"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:8d399dade330c53b4106160f75f55407e9ae7505263ea86f2ccca6bfcbdb4921"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ec4fd86658c6a8964d75426517dc01cbf840bbf32d055ce64a9e63a40fd7b771"}, + {file = "aiohttp-3.8.6-cp38-cp38-win32.whl", hash = "sha256:33164093be11fcef3ce2571a0dccd9041c9a93fa3bde86569d7b03120d276c6f"}, + {file = "aiohttp-3.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:bdf70bfe5a1414ba9afb9d49f0c912dc524cf60141102f3a11143ba3d291870f"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d52d5dc7c6682b720280f9d9db41d36ebe4791622c842e258c9206232251ab2b"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ac39027011414dbd3d87f7edb31680e1f430834c8cef029f11c66dad0670aa5"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f5c7ce535a1d2429a634310e308fb7d718905487257060e5d4598e29dc17f0b"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b30e963f9e0d52c28f284d554a9469af073030030cef8693106d918b2ca92f54"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:918810ef188f84152af6b938254911055a72e0f935b5fbc4c1a4ed0b0584aed1"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:002f23e6ea8d3dd8d149e569fd580c999232b5fbc601c48d55398fbc2e582e8c"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcf3eabd3fd1a5e6092d1242295fa37d0354b2eb2077e6eb670accad78e40e1"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:255ba9d6d5ff1a382bb9a578cd563605aa69bec845680e21c44afc2670607a95"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d67f8baed00870aa390ea2590798766256f31dc5ed3ecc737debb6e97e2ede78"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:86f20cee0f0a317c76573b627b954c412ea766d6ada1a9fcf1b805763ae7feeb"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:39a312d0e991690ccc1a61f1e9e42daa519dcc34ad03eb6f826d94c1190190dd"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e827d48cf802de06d9c935088c2924e3c7e7533377d66b6f31ed175c1620e05e"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd111d7fc5591ddf377a408ed9067045259ff2770f37e2d94e6478d0f3fc0c17"}, + {file = "aiohttp-3.8.6-cp39-cp39-win32.whl", hash = "sha256:caf486ac1e689dda3502567eb89ffe02876546599bbf915ec94b1fa424eeffd4"}, + {file = "aiohttp-3.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3f0e27e5b733803333bb2371249f41cf42bae8884863e8e8965ec69bebe53132"}, + {file = "aiohttp-3.8.6.tar.gz", hash = "sha256:b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"}, +] [package.dependencies] aiosignal = ">=1.1.2" @@ -59,9 +155,12 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aioitertools" version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, + {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, +] [package.dependencies] typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} @@ -70,9 +169,12 @@ typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] [package.dependencies] frozenlist = ">=1.1.0" @@ -81,9 +183,12 @@ frozenlist = ">=1.1.0" name = "anyio" version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, + {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, +] [package.dependencies] exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} @@ -99,29 +204,38 @@ trio = ["trio (>=0.22)"] name = "argilla" version = "0.0.1" description = "" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"}, + {file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"}, +] [[package]] name = "asana" version = "3.2.2" description = "Asana API client" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "asana-3.2.2-py2.py3-none-any.whl", hash = "sha256:e8426ae5f5cda2c27d29874145acb589b91e673a84e3fbd45404679499d9604a"}, + {file = "asana-3.2.2.tar.gz", hash = "sha256:3a0c64ad5baaa8c52465fe400cedbc873b2127a77df135af518fd8da1af8d6b9"}, +] [package.dependencies] -requests = ">=2.20.0,<3.0.0" +requests = ">=2.20.0,<3.dev0" requests-oauthlib = ">=0.8.0,<2.0" [[package]] name = "astatine" version = "0.3.3" description = "Some handy helper functions for Python's AST module." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, + {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, +] [package.dependencies] asttokens = ">=1.1" @@ -131,9 +245,12 @@ domdf-python-tools = ">=2.7.0" name = "asttokens" version = "2.4.0" description = "Annotate AST trees with source code positions" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, + {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, +] [package.dependencies] six = ">=1.12.0" @@ -145,9 +262,12 @@ test = ["astroid", "pytest"] name = "astunparse" version = "1.6.3" description = "An AST unparser for Python" -category = "main" optional = false python-versions = "*" +files = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] [package.dependencies] six = ">=1.6.1,<2.0" @@ -157,17 +277,23 @@ wheel = ">=0.23.0,<1.0" name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] [[package]] name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] [package.extras] cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] @@ -180,9 +306,12 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "azure-core" version = "1.29.4" description = "Microsoft Azure Core Library for Python" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "azure-core-1.29.4.tar.gz", hash = "sha256:500b3aa9bf2e90c5ccc88bb105d056114ca0ce7d0ce73afb8bc4d714b2fc7568"}, + {file = "azure_core-1.29.4-py3-none-any.whl", hash = "sha256:b03261bcba22c0b9290faf9999cedd23e849ed2577feee90515694cea6bc74bf"}, +] [package.dependencies] requests = ">=2.18.4" @@ -196,9 +325,12 @@ aio = ["aiohttp (>=3.0)"] name = "azure-datalake-store" version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, + {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, +] [package.dependencies] cffi = "*" @@ -209,9 +341,12 @@ requests = ">=2.20.0" name = "azure-identity" version = "1.14.1" description = "Microsoft Azure Identity Library for Python" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "azure-identity-1.14.1.zip", hash = "sha256:48e2a9dbdc59b4f095f841d867d9a8cbe4c1cdbbad8251e055561afd47b4a9b8"}, + {file = "azure_identity-1.14.1-py3-none-any.whl", hash = "sha256:3a5bef8e9c3281e864e869739be8d67424bff616cddae96b546ca2a5168d863d"}, +] [package.dependencies] azure-core = ">=1.11.0,<2.0.0" @@ -223,9 +358,12 @@ msal-extensions = ">=0.3.0,<2.0.0" name = "azure-storage-blob" version = "12.18.3" description = "Microsoft Azure Blob Storage Client Library for Python" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "azure-storage-blob-12.18.3.tar.gz", hash = "sha256:d8ced0deee3367fa3d4f3d1a03cd9edadf4440c0a371f503d623fa6c807554ee"}, + {file = "azure_storage_blob-12.18.3-py3-none-any.whl", hash = "sha256:c278dde2ac41857a68d615c9f2b36d894ba877a7e84d62795603c7e79d0bb5e9"}, +] [package.dependencies] azure-core = ">=1.28.0,<2.0.0" @@ -240,17 +378,23 @@ aio = ["azure-core[aio] (>=1.28.0,<2.0.0)"] name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -category = "dev" optional = false python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] [[package]] name = "bandit" version = "1.7.5" description = "Security oriented static analyser for python code." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, + {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, +] [package.dependencies] colorama = {version = ">=0.3.9", markers = "platform_system == \"Windows\""} @@ -268,9 +412,32 @@ yaml = ["PyYAML"] name = "black" version = "23.9.1" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, + {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, + {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, + {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, + {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, + {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, + {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, + {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, + {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, + {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, + {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, +] [package.dependencies] click = ">=8.0.0" @@ -291,9 +458,12 @@ uvloop = ["uvloop (>=0.15.2)"] name = "botocore" version = "1.31.17" description = "Low-level, data-driven core of boto 3." -category = "main" optional = false python-versions = ">= 3.7" +files = [ + {file = "botocore-1.31.17-py3-none-any.whl", hash = "sha256:6ac34a1d34aa3750e78b77b8596617e2bab938964694d651939dba2cbde2c12b"}, + {file = "botocore-1.31.17.tar.gz", hash = "sha256:396459065dba4339eb4da4ec8b4e6599728eb89b7caaceea199e26f7d824a41c"}, +] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" @@ -307,25 +477,84 @@ crt = ["awscrt (==0.16.26)"] name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, +] [[package]] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] [[package]] name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] [package.dependencies] pycparser = "*" @@ -334,52 +563,152 @@ pycparser = "*" name = "chardet" version = "5.2.0" description = "Universal encoding detector for Python 3" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] [[package]] name = "charset-normalizer" version = "3.3.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" - -[[package]] -name = "chromadb" -version = "0.3.29" -description = "Chroma." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -clickhouse-connect = ">=0.5.7" -duckdb = ">=0.7.1" -fastapi = "0.85.1" -graphlib-backport = {version = ">=1.0.3", markers = "python_version < \"3.9\""} -hnswlib = ">=0.7" -numpy = ">=1.21.6" -onnxruntime = ">=1.14.1" -overrides = ">=7.3.1" -pandas = ">=1.3" -posthog = ">=2.4.0" -pulsar-client = ">=3.1.0" -pydantic = ">=1.9,<2.0" -requests = ">=2.28" -tokenizers = ">=0.13.2" -tqdm = ">=4.65.0" -typing-extensions = ">=4.5.0" +files = [ + {file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4162918ef3098851fcd8a628bf9b6a98d10c380725df9e04caf5ca6dd48c847a"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0570d21da019941634a531444364f2482e8db0b3425fcd5ac0c36565a64142c8"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5707a746c6083a3a74b46b3a631d78d129edab06195a92a8ece755aac25a3f3d"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:278c296c6f96fa686d74eb449ea1697f3c03dc28b75f873b65b5201806346a69"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4b71f4d1765639372a3b32d2638197f5cd5221b19531f9245fcc9ee62d38f56"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5969baeaea61c97efa706b9b107dcba02784b1601c74ac84f2a532ea079403e"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3f93dab657839dfa61025056606600a11d0b696d79386f974e459a3fbc568ec"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:db756e48f9c5c607b5e33dd36b1d5872d0422e960145b08ab0ec7fd420e9d649"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:232ac332403e37e4a03d209a3f92ed9071f7d3dbda70e2a5e9cff1c4ba9f0678"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e5c1502d4ace69a179305abb3f0bb6141cbe4714bc9b31d427329a95acfc8bdd"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2502dd2a736c879c0f0d3e2161e74d9907231e25d35794584b1ca5284e43f596"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23e8565ab7ff33218530bc817922fae827420f143479b753104ab801145b1d5b"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-win32.whl", hash = "sha256:1872d01ac8c618a8da634e232f24793883d6e456a66593135aeafe3784b0848d"}, + {file = "charset_normalizer-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:557b21a44ceac6c6b9773bc65aa1b4cc3e248a5ad2f5b914b91579a32e22204d"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d7eff0f27edc5afa9e405f7165f85a6d782d308f3b6b9d96016c010597958e63"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a685067d05e46641d5d1623d7c7fdf15a357546cbb2f71b0ebde91b175ffc3e"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3d5b7db9ed8a2b11a774db2bbea7ba1884430a205dbd54a32d61d7c2a190fa"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2935ffc78db9645cb2086c2f8f4cfd23d9b73cc0dc80334bc30aac6f03f68f8c"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fe359b2e3a7729010060fbca442ca225280c16e923b37db0e955ac2a2b72a05"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380c4bde80bce25c6e4f77b19386f5ec9db230df9f2f2ac1e5ad7af2caa70459"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0d1e3732768fecb052d90d62b220af62ead5748ac51ef61e7b32c266cac9293"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b2919306936ac6efb3aed1fbf81039f7087ddadb3160882a57ee2ff74fd2382"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f8888e31e3a85943743f8fc15e71536bda1c81d5aa36d014a3c0c44481d7db6e"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82eb849f085624f6a607538ee7b83a6d8126df6d2f7d3b319cb837b289123078"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7b8b8bf1189b3ba9b8de5c8db4d541b406611a71a955bbbd7385bbc45fcb786c"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5adf257bd58c1b8632046bbe43ee38c04e1038e9d37de9c57a94d6bd6ce5da34"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c350354efb159b8767a6244c166f66e67506e06c8924ed74669b2c70bc8735b1"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-win32.whl", hash = "sha256:02af06682e3590ab952599fbadac535ede5d60d78848e555aa58d0c0abbde786"}, + {file = "charset_normalizer-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:86d1f65ac145e2c9ed71d8ffb1905e9bba3a91ae29ba55b4c46ae6fc31d7c0d4"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3b447982ad46348c02cb90d230b75ac34e9886273df3a93eec0539308a6296d7"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:abf0d9f45ea5fb95051c8bfe43cb40cda383772f7e5023a83cc481ca2604d74e"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b09719a17a2301178fac4470d54b1680b18a5048b481cb8890e1ef820cb80455"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3d9b48ee6e3967b7901c052b670c7dda6deb812c309439adaffdec55c6d7b78"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edfe077ab09442d4ef3c52cb1f9dab89bff02f4524afc0acf2d46be17dc479f5"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3debd1150027933210c2fc321527c2299118aa929c2f5a0a80ab6953e3bd1908"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f63face3a527284f7bb8a9d4f78988e3c06823f7bea2bd6f0e0e9298ca0403"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24817cb02cbef7cd499f7c9a2735286b4782bd47a5b3516a0e84c50eab44b98e"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c71f16da1ed8949774ef79f4a0260d28b83b3a50c6576f8f4f0288d109777989"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9cf3126b85822c4e53aa28c7ec9869b924d6fcfb76e77a45c44b83d91afd74f9"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b3b2316b25644b23b54a6f6401074cebcecd1244c0b8e80111c9a3f1c8e83d65"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:03680bb39035fbcffe828eae9c3f8afc0428c91d38e7d61aa992ef7a59fb120e"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cc152c5dd831641e995764f9f0b6589519f6f5123258ccaca8c6d34572fefa8"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-win32.whl", hash = "sha256:b8f3307af845803fb0b060ab76cf6dd3a13adc15b6b451f54281d25911eb92df"}, + {file = "charset_normalizer-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8eaf82f0eccd1505cf39a45a6bd0a8cf1c70dcfc30dba338207a969d91b965c0"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc45229747b67ffc441b3de2f3ae5e62877a282ea828a5bdb67883c4ee4a8810"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4a0033ce9a76e391542c182f0d48d084855b5fcba5010f707c8e8c34663d77"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada214c6fa40f8d800e575de6b91a40d0548139e5dc457d2ebb61470abf50186"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1121de0e9d6e6ca08289583d7491e7fcb18a439305b34a30b20d8215922d43c"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1063da2c85b95f2d1a430f1c33b55c9c17ffaf5e612e10aeaad641c55a9e2b9d"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70f1d09c0d7748b73290b29219e854b3207aea922f839437870d8cc2168e31cc"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:250c9eb0f4600361dd80d46112213dff2286231d92d3e52af1e5a6083d10cad9"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:750b446b2ffce1739e8578576092179160f6d26bd5e23eb1789c4d64d5af7dc7"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:fc52b79d83a3fe3a360902d3f5d79073a993597d48114c29485e9431092905d8"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:588245972aca710b5b68802c8cad9edaa98589b1b42ad2b53accd6910dad3545"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e39c7eb31e3f5b1f88caff88bcff1b7f8334975b46f6ac6e9fc725d829bc35d4"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:abecce40dfebbfa6abf8e324e1860092eeca6f7375c8c4e655a8afb61af58f2c"}, + {file = "charset_normalizer-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a91a981f185721542a0b7c92e9054b7ab4fea0508a795846bc5b0abf8118d4"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:67b8cc9574bb518ec76dc8e705d4c39ae78bb96237cb533edac149352c1f39fe"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac71b2977fb90c35d41c9453116e283fac47bb9096ad917b8819ca8b943abecd"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3ae38d325b512f63f8da31f826e6cb6c367336f95e418137286ba362925c877e"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542da1178c1c6af8873e143910e2269add130a299c9106eef2594e15dae5e482"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a85aed0b864ac88309b7d94be09f6046c834ef60762a8833b660139cfbad13"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aae32c93e0f64469f74ccc730a7cb21c7610af3a775157e50bbd38f816536b38"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b26ddf78d57f1d143bdf32e820fd8935d36abe8a25eb9ec0b5a71c82eb3895"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f5d10bae5d78e4551b7be7a9b29643a95aded9d0f602aa2ba584f0388e7a557"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:249c6470a2b60935bafd1d1d13cd613f8cd8388d53461c67397ee6a0f5dce741"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c5a74c359b2d47d26cdbbc7845e9662d6b08a1e915eb015d044729e92e7050b7"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:b5bcf60a228acae568e9911f410f9d9e0d43197d030ae5799e20dca8df588287"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:187d18082694a29005ba2944c882344b6748d5be69e3a89bf3cc9d878e548d5a"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:81bf654678e575403736b85ba3a7867e31c2c30a69bc57fe88e3ace52fb17b89"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-win32.whl", hash = "sha256:85a32721ddde63c9df9ebb0d2045b9691d9750cb139c161c80e500d210f5e26e"}, + {file = "charset_normalizer-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:468d2a840567b13a590e67dd276c570f8de00ed767ecc611994c301d0f8c014f"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e0fc42822278451bc13a2e8626cf2218ba570f27856b536e00cfa53099724828"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09c77f964f351a7369cc343911e0df63e762e42bac24cd7d18525961c81754f4"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12ebea541c44fdc88ccb794a13fe861cc5e35d64ed689513a5c03d05b53b7c82"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805dfea4ca10411a5296bcc75638017215a93ffb584c9e344731eef0dcfb026a"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96c2b49eb6a72c0e4991d62406e365d87067ca14c1a729a870d22354e6f68115"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf7b34c5bc56b38c931a54f7952f1ff0ae77a2e82496583b247f7c969eb1479"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:619d1c96099be5823db34fe89e2582b336b5b074a7f47f819d6b3a57ff7bdb86"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ac5e7015a5920cfce654c06618ec40c33e12801711da6b4258af59a8eff00a"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93aa7eef6ee71c629b51ef873991d6911b906d7312c6e8e99790c0f33c576f89"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7966951325782121e67c81299a031f4c115615e68046f79b85856b86ebffc4cd"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:02673e456dc5ab13659f85196c534dc596d4ef260e4d86e856c3b2773ce09843"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c2af80fb58f0f24b3f3adcb9148e6203fa67dd3f61c4af146ecad033024dde43"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:153e7b6e724761741e0974fc4dcd406d35ba70b92bfe3fedcb497226c93b9da7"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-win32.whl", hash = "sha256:d47ecf253780c90ee181d4d871cd655a789da937454045b17b5798da9393901a"}, + {file = "charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d97d85fa63f315a8bdaba2af9a6a686e0eceab77b3089af45133252618e70884"}, + {file = "charset_normalizer-3.3.0-py3-none-any.whl", hash = "sha256:e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2"}, +] + +[[package]] +name = "chromadb" +version = "0.3.29" +description = "Chroma." +optional = false +python-versions = ">=3.7" +files = [ + {file = "chromadb-0.3.29-py3-none-any.whl", hash = "sha256:d681a3e4f3284715dd146774be84cad3d2f8c529bd004ba249e1d3deb70ac68e"}, + {file = "chromadb-0.3.29.tar.gz", hash = "sha256:29d47835da494fc1b58da40abb1435689d4ba1c93df6c64664a5d91521cb80e9"}, +] + +[package.dependencies] +clickhouse-connect = ">=0.5.7" +duckdb = ">=0.7.1" +fastapi = "0.85.1" +graphlib-backport = {version = ">=1.0.3", markers = "python_version < \"3.9\""} +hnswlib = ">=0.7" +numpy = ">=1.21.6" +onnxruntime = ">=1.14.1" +overrides = ">=7.3.1" +pandas = ">=1.3" +posthog = ">=2.4.0" +pulsar-client = ">=3.1.0" +pydantic = ">=1.9,<2.0" +requests = ">=2.28" +tokenizers = ">=0.13.2" +tqdm = ">=4.65.0" +typing-extensions = ">=4.5.0" uvicorn = {version = ">=0.18.3", extras = ["standard"]} [[package]] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -388,9 +717,75 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "clickhouse-connect" version = "0.6.14" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" -category = "dev" optional = false python-versions = "~=3.7" +files = [ + {file = "clickhouse-connect-0.6.14.tar.gz", hash = "sha256:0531bbd5b8bdee616bf1cca5ddcb0af86db12e2b48fd39257a8ecdf32200bd57"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04affbd255fb8b1e4a882ddc1336c86530976d05578f47bb65e3a53471d291e4"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5bd61f2665f1890fa632b1181df2900ea838cf152cd9a3f775841ea2deab680"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79897a0987008993f32737e17045a5c1982f9193f7511a3832a7ba3429cbf6b4"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa95c8a96bdff593924407b074d616ee8a1bfb989579c17b330c6f3b27febfe3"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501c0d843be30c86719b61089fb1de6298ac44b3670594f0a1cb0dc3ad97651e"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1ec9672c9ed9d5e62f66ac14d6470b9b6be9946d6d24ddac87376437863b8f59"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:92173354a6c7c5862fab09dab338197b86a192e0c117137e899e8cf92cc3b5b7"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:757b4c05ebf10bdcb916334c3021ee571a61238907cdeee8c54bcf0550cd0d19"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-win32.whl", hash = "sha256:2e74badf6c7569e1a0ad32f3be250a3ebf28a9df3b15c9709104e5f050486016"}, + {file = "clickhouse_connect-0.6.14-cp310-cp310-win_amd64.whl", hash = "sha256:7b56c422467df5a0b2790e0943b747639f1f172fac7f8d9585adb3302c961fb1"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d2aa6d28d79eb5ca94d7c756ec4dc599d2354897f5ef40fd0d8bdc579a81dd94"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:70cd5b2e0d80dc030355d09db213c73caa78ef259f2b04ce30c1c8cb513bf45b"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:826c85e37555443af945a0d977598814ba7cb09447b0cdd167eae57dfd3f0724"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cdb1f843d134a1e30828900bc51c9c1b4f4e638aac693767685e512fb095af5"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a8ea6ca6e0d6b1af50078413e280f271559c462a8644541002e44c2cb5c371"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8b72a5e5d54069dff419a6ec9bbc7f3896fe558551cae6a2b2cba60eaa0607a3"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c531ed454ca9b6d85e739de3770a82eec2087ed2cb9660fb8ff0e62f7f1446cc"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ae6ebf7c507f9d0fece9d1e387c9eec77762693f91647bca18f588cf1d594d24"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-win32.whl", hash = "sha256:cf1e3067c2da8525b6f59a37f8e13cd6c4524f439be8fd7d8fa03f007f96c863"}, + {file = "clickhouse_connect-0.6.14-cp311-cp311-win_amd64.whl", hash = "sha256:15a040210877cc34155943c7870bf78247d4d4fa3bd4e0595ca22e97760679b7"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eb91e4ee0435088fc8bd36de51a93ff9286a514d82ac373b57b2d6cad4655d77"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48108bb1cfe99b6ff60344838859aec1315213dfa618f6ca4b92c0c6e5ae8d41"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c75d4bd8ef0b90f9e89ea70c16ff099278e4bb8f1e045008376ac34c6122b73d"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:897f40eda84e9c45d0bdaf3a9e638e614e236a4a5eeab5cddd920857f9f8f22a"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ecc88656df05ae49e70062aee7022982eec3f87fb14db97c25276fef6633d7c"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:76cec48265774ae3fa61a77b290dcc8385aad4312a8d7dfcaffb9fc00f79458e"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:dba280e00ec4cfe0e4d69f88baa9a0491bc1ed83ec57336e5197adae8d42d0c9"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-win32.whl", hash = "sha256:6c77f537e04747702e009c05f4a7f6f96cbe1696bb89d29f72e39e7370924836"}, + {file = "clickhouse_connect-0.6.14-cp37-cp37m-win_amd64.whl", hash = "sha256:d0eceaff68a53f71384bb9aee7fc1630f68ac10538727c8516ae0af1103f2580"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dfa09948caeed539cdd019a1e341a379a1dcacdd755b278d12484b4a703afa3"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a30d99cb1fd57b8fed4449632e51d48386d0eec1673f905572c5fc7059215c20"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e88de4fe66ae2b1c15726760cc87a703e4d1162de52a19c8d8b57a4429f08e"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03d721de610beae823068665d6c604a5f390a99e7b2354264b17136a3a520b13"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a627762f2f692e226b3cb574a83133844213c6507c6313d3fefd8a3de08e5798"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62a596f8d9db8592477a2032c329be7449ea32d133cdc4e5d6f804e251b8617a"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e8ab9e5a61968c328a0fdc254b02b96142ebb4ec2bc1623f9498538f0ebfdc7c"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6c21fe379b1b8822eb9644600e38220b5c4b530fd0f2b1da824a0918120a8f01"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-win32.whl", hash = "sha256:2a17b336781d3fbb67ed556918c17e63c7d462709aa6a953bb3410ddb67fd7f4"}, + {file = "clickhouse_connect-0.6.14-cp38-cp38-win_amd64.whl", hash = "sha256:838a008c0f7d911ab81f741ea27a64ef7bdcc2508698b70f018987dfc742ffa9"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:083649a97c3f366f66f0f2578b9f88d86c1d3a40b9015c9403db524fda36a952"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e9bd6849852b2c55e51a477e10bc8b61990c5f37f31cce5ea6fc970b447b5af"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9152c45423f488cf6229bce1f9e695cd81e7ffcd3ae0f1e40e5e62079b18d4a5"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341e068d4a6a423ed22fb3b96cfe16be0d6305943c3fb1cc48251b7d9729931d"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ead7acb841524bd7a73b1f10592a36e901d63bc89af3270ab76b89a11d44fe20"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8bce432f72dcf6679c2d0bac4e3a82a126389ad7951d316f213109cee6925c7c"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1f403499f169574cafb05888dfdaf18065cc49ff1321e5e108c504c8c220e172"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3189fcd339bfd7ae4e703ff40b110b9740d6b1ec8385ed8bd1547663fd046578"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-win32.whl", hash = "sha256:a30de3f0997a9157e840c2d4e07fd9c6fc6e359f1ff9f3a46386b5abdca73c1a"}, + {file = "clickhouse_connect-0.6.14-cp39-cp39-win_amd64.whl", hash = "sha256:c3476a95780374e94dfba2a28093d15f8370bfa6f4cb46a02e0af8813e5f7368"}, + {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:22affe46983e67e3923e9330336d21e9ec4b4812b6fbeb1865514145b3870170"}, + {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62727090af8875631115911f58442967386b31cd4efa93c951c2aa7e57d1ce4b"}, + {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee4ea5ac58de0580f2e12b46cfd2f8d13c1e690378bf9775bfed0c935232de71"}, + {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a126fe486dd02fa5f8adb0b9d8fd0fc701fb73b2275e1040ed210afadd189f90"}, + {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:de6bf773c8776033ca5fb5a6a376729ae69afdd0b19a71d1460d1a221fc5a627"}, + {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d27d2c9698d1acb550ac8c30c4d9440c7d826a16444e4aea4dacf11ed7ec8988"}, + {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f57efbe536dfbfb7e10dd16ced6fe02441fb174450760f0b29b2b60d23c6462f"}, + {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c134483da38a3d3e38c44da9f3d519d73e177998052d36129e21863c7a3497ee"}, + {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2d6ae7ccb4ca3d310c2971ead9839935890e40da8602dcc92ecda9bbbb24366"}, + {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:0acf6b69b11b757d60545b0ccac3df4980f69351994e30074df84729bb5af5d1"}, + {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e043b3b606749d23eca7601a1a44f188c6f117ae57a2852c66c21f11b7296fe4"}, + {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a887dfef3f3914454c7d7a428db8063b1678c66678cbabcd6368f0b67876f1"}, + {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e255e7c9c38fb9bceefc659374d04914ef2222a6f121fccf86a865b81110e96b"}, + {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2be9a6ba1d3055bb6956be218ffecfa3bfbe47121dfa34467815aa883f15d159"}, + {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59faa034fdd58c1e7c8b2f4a033e9c611a0c58e193339cdd62d9d91a62f11195"}, +] [package.dependencies] certifi = "*" @@ -410,17 +805,23 @@ sqlalchemy = ["sqlalchemy (>1.3.21,<2.0)"] name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] [[package]] name = "coloredlogs" version = "15.0.1" description = "Colored terminal output for Python's logging module" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] [package.dependencies] humanfriendly = ">=9.1" @@ -432,9 +833,44 @@ cron = ["capturer (>=2.4)"] name = "confluent-kafka" version = "2.3.0" description = "Confluent's Python client for Apache Kafka" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "confluent-kafka-2.3.0.tar.gz", hash = "sha256:4069e7b56e0baf9db18c053a605213f0ab2d8f23715dca7b3bd97108df446ced"}, + {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5df845755cd3ebb9165ca00fd1d3a7d514c61e84d9fcbe7babb91193fe9b369c"}, + {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ab2217875b731bd390582952e0f9cbe3e7b34774490f01afca70728f0d8b469"}, + {file = "confluent_kafka-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62046e8a75c7a6883a0f1f4a635573fd7e1665eeacace65e7f6d59cbaa94697d"}, + {file = "confluent_kafka-2.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1eba38061e9ed1c0a369c129bf01d07499286cc3cb295398b88a7037c14371fb"}, + {file = "confluent_kafka-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a6abece28598fa2b59d2b9399fcec03440aaa73fd207fdad048a6030d7e897e1"}, + {file = "confluent_kafka-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d55fbdcd75586dd17fe3fe64f4b4efa1c93ce9dd09c275de46f75772826e8860"}, + {file = "confluent_kafka-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec17b26d6155feeaded4a435ba949095aea9699afb65309d8f22e55722f53c48"}, + {file = "confluent_kafka-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9b42bf1b75fdd9aa20c77b27f166f6289440ac649f70622a0117a8e7aa6169d"}, + {file = "confluent_kafka-2.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7f9f4099aaf2c5daae828d2f356e4277d0ef0485ec883dbe395f0c0e054450d0"}, + {file = "confluent_kafka-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1c6b29d57df99dabd45e67fd0aa46f17f195b057734ad84cf9cfdc2542855c10"}, + {file = "confluent_kafka-2.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b46ce75bda0c092da103dbd55cb0ba429c73c232e70b476b19a0ab247ec9057"}, + {file = "confluent_kafka-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af60af786a7b8cbeafea51a9416664b96b0f5ef6243172b0bc59e5f75e8bd86a"}, + {file = "confluent_kafka-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08b601e09a584c6a4a8c323a71e92fca31a8826ed33b5b95b26783b7a996026"}, + {file = "confluent_kafka-2.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7fd1ab257d4fa0e2a98529e4eb2102cf8352ad6b3d22110d6cf0bb1f598893d9"}, + {file = "confluent_kafka-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1ccf6483d86535627cad7b94982ea95d9fa9ae04ddb552e097c1211ffcde5ea7"}, + {file = "confluent_kafka-2.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:030fb237927ec2296882a9bb96237ebf86e48388166b15ec0bbf3fdeb48df81a"}, + {file = "confluent_kafka-2.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc24c57a52c206648685e1c536afb8447d1cbbbf3871cacebccf2e5b67bdf535"}, + {file = "confluent_kafka-2.3.0-cp36-cp36m-manylinux_2_28_aarch64.whl", hash = "sha256:25292a9a8ef7765c85636851d6c4d5e5e98d6ead627b59637b24a5779e8a4b02"}, + {file = "confluent_kafka-2.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d634d4d9914b0a28ec3e37ab7b150173aa34c81fd5bd0b4dcac972b520ad56cc"}, + {file = "confluent_kafka-2.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ebf460d90478bcd1b4564023a5b081c6e5390b28dbabbb17ee664e223830465d"}, + {file = "confluent_kafka-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cec97f8c6564b16504d30fe42c22fd4a86c406dbcd45c337b93c21e876e20628"}, + {file = "confluent_kafka-2.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:128ddb28c19ab57c18c0e3d8209d089b6b90ff111b20108764f6798468432693"}, + {file = "confluent_kafka-2.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0470dc5e56e639693149961409bc6b663df94d68ceae296ae9c42e079fe65d00"}, + {file = "confluent_kafka-2.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b539064fef35386936a0d2dadf8a82b8b0ae325af95d9263a2431b82671c4702"}, + {file = "confluent_kafka-2.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f9998f781a1da0c9dcb5506792a39799cb54e28c6f986ddc73e362887042f7c"}, + {file = "confluent_kafka-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f175e11facaf12130abd5d2d471db39d7cc89126c4d991527cf14e3da22c635c"}, + {file = "confluent_kafka-2.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f9842720ed0debcf4620710e01d356681a4812441f1ff49664fc205d1f9120e5"}, + {file = "confluent_kafka-2.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:cf015e547b82a74a87d7363d0d42e4cd0ca23b01cdb479639a340f385581ea04"}, + {file = "confluent_kafka-2.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5c740ead14a2510e15f63e67b19d48ae48a7f30ef4823d5af125bad528033d1"}, + {file = "confluent_kafka-2.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6ae5e6a6dcd5ce85b9153c21c9f0b83e0cc88a5955b5334079db76c2267deb63"}, + {file = "confluent_kafka-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca36a8d1d49fd55cca1b7ec3090ca2684a933e63f196f0e3e506194b189fc31e"}, + {file = "confluent_kafka-2.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:210f2d346d1006e9b95c5204f7255735d4cb5ec962a3d1a68ac60c02e2763ae4"}, + {file = "confluent_kafka-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb279e369121e07ccb419220fc039127345a9e5f72f4abf7dda0e2e06a12b604"}, +] [package.extras] avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] @@ -448,9 +884,33 @@ schema-registry = ["requests"] name = "cryptography" version = "41.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"}, + {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"}, + {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"}, + {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"}, +] [package.dependencies] cffi = ">=1.12" @@ -469,9 +929,11 @@ test-randomorder = ["pytest-randomly"] name = "curlify" version = "2.2.1" description = "Library to convert python requests object to curl command." -category = "dev" optional = false python-versions = "*" +files = [ + {file = "curlify-2.2.1.tar.gz", hash = "sha256:0d3f02e7235faf952de8ef45ef469845196d30632d5838bcd5aee217726ddd6d"}, +] [package.dependencies] requests = "*" @@ -480,10 +942,13 @@ requests = "*" name = "dataclasses-json" version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" -category = "dev" optional = false python-versions = ">=3.6" - +files = [ + {file = "dataclasses-json-0.5.9.tar.gz", hash = "sha256:e9ac87b73edc0141aafbce02b44e93553c3123ad574958f0fe52a534b6707e8e"}, + {file = "dataclasses_json-0.5.9-py3-none-any.whl", hash = "sha256:1280542631df1c375b7bc92e5b86d39e06c44760d7e3571a537b3b8acabf2f0c"}, +] + [package.dependencies] marshmallow = ">=3.3.0,<4.0.0" marshmallow-enum = ">=1.5.1,<2.0.0" @@ -496,17 +961,23 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] [[package]] name = "dlt" version = "0.4.4" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." -category = "main" optional = false python-versions = ">=3.8.1,<3.13" +files = [ + {file = "dlt-0.4.4-py3-none-any.whl", hash = "sha256:dfa1d0fd1ba5e2741f0d58314ca56aad26ec25032039bc3fa5d873d4611d8568"}, + {file = "dlt-0.4.4.tar.gz", hash = "sha256:9a9619f78fe06cc157a23179b4fb17a059606e8c980756ea0652b167b91356fa"}, +] [package.dependencies] astunparse = ">=1.6.3" @@ -571,9 +1042,12 @@ weaviate = ["weaviate-client (>=3.22)"] name = "dnspython" version = "2.4.2" description = "DNS toolkit" -category = "dev" optional = false python-versions = ">=3.8,<4.0" +files = [ + {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, + {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, +] [package.extras] dnssec = ["cryptography (>=2.6,<42.0)"] @@ -587,9 +1061,12 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "domdf-python-tools" version = "3.6.1" description = "Helpful functions for Python 🐍 🛠️" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, + {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, +] [package.dependencies] importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.9\""} @@ -604,25 +1081,84 @@ dates = ["pytz (>=2019.1)"] name = "duckdb" version = "0.8.1" description = "DuckDB embedded database" -category = "main" optional = false python-versions = "*" +files = [ + {file = "duckdb-0.8.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:14781d21580ee72aba1f5dcae7734674c9b6c078dd60470a08b2b420d15b996d"}, + {file = "duckdb-0.8.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f13bf7ab0e56ddd2014ef762ae4ee5ea4df5a69545ce1191b8d7df8118ba3167"}, + {file = "duckdb-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4032042d8363e55365bbca3faafc6dc336ed2aad088f10ae1a534ebc5bcc181"}, + {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a71bd8f0b0ca77c27fa89b99349ef22599ffefe1e7684ae2e1aa2904a08684"}, + {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24568d6e48f3dbbf4a933109e323507a46b9399ed24c5d4388c4987ddc694fd0"}, + {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297226c0dadaa07f7c5ae7cbdb9adba9567db7b16693dbd1b406b739ce0d7924"}, + {file = "duckdb-0.8.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5792cf777ece2c0591194006b4d3e531f720186102492872cb32ddb9363919cf"}, + {file = "duckdb-0.8.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:12803f9f41582b68921d6b21f95ba7a51e1d8f36832b7d8006186f58c3d1b344"}, + {file = "duckdb-0.8.1-cp310-cp310-win32.whl", hash = "sha256:d0953d5a2355ddc49095e7aef1392b7f59c5be5cec8cdc98b9d9dc1f01e7ce2b"}, + {file = "duckdb-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:6e6583c98a7d6637e83bcadfbd86e1f183917ea539f23b6b41178f32f813a5eb"}, + {file = "duckdb-0.8.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fad7ed0d4415f633d955ac24717fa13a500012b600751d4edb050b75fb940c25"}, + {file = "duckdb-0.8.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81ae602f34d38d9c48dd60f94b89f28df3ef346830978441b83c5b4eae131d08"}, + {file = "duckdb-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d75cfe563aaa058d3b4ccaaa371c6271e00e3070df5de72361fd161b2fe6780"}, + {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbb55e7a3336f2462e5e916fc128c47fe1c03b6208d6bd413ac11ed95132aa0"}, + {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6df53efd63b6fdf04657385a791a4e3c4fb94bfd5db181c4843e2c46b04fef5"}, + {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b188b80b70d1159b17c9baaf541c1799c1ce8b2af4add179a9eed8e2616be96"}, + {file = "duckdb-0.8.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5ad481ee353f31250b45d64b4a104e53b21415577943aa8f84d0af266dc9af85"}, + {file = "duckdb-0.8.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1d1b1729993611b1892509d21c21628917625cdbe824a61ce891baadf684b32"}, + {file = "duckdb-0.8.1-cp311-cp311-win32.whl", hash = "sha256:2d8f9cc301e8455a4f89aa1088b8a2d628f0c1f158d4cf9bc78971ed88d82eea"}, + {file = "duckdb-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:07457a43605223f62d93d2a5a66b3f97731f79bbbe81fdd5b79954306122f612"}, + {file = "duckdb-0.8.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d2c8062c3e978dbcd80d712ca3e307de8a06bd4f343aa457d7dd7294692a3842"}, + {file = "duckdb-0.8.1-cp36-cp36m-win32.whl", hash = "sha256:fad486c65ae944eae2de0d590a0a4fb91a9893df98411d66cab03359f9cba39b"}, + {file = "duckdb-0.8.1-cp36-cp36m-win_amd64.whl", hash = "sha256:86fa4506622c52d2df93089c8e7075f1c4d0ba56f4bf27faebde8725355edf32"}, + {file = "duckdb-0.8.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:60e07a62782f88420046e30cc0e3de842d0901c4fd5b8e4d28b73826ec0c3f5e"}, + {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f18563675977f8cbf03748efee0165b4c8ef64e0cbe48366f78e2914d82138bb"}, + {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16e179443832bea8439ae4dff93cf1e42c545144ead7a4ef5f473e373eea925a"}, + {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a413d5267cb41a1afe69d30dd6d4842c588256a6fed7554c7e07dad251ede095"}, + {file = "duckdb-0.8.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3784680df59eadd683b0a4c2375d451a64470ca54bd171c01e36951962b1d332"}, + {file = "duckdb-0.8.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:67a1725c2b01f9b53571ecf3f92959b652f60156c1c48fb35798302e39b3c1a2"}, + {file = "duckdb-0.8.1-cp37-cp37m-win32.whl", hash = "sha256:197d37e2588c5ad063e79819054eedb7550d43bf1a557d03ba8f8f67f71acc42"}, + {file = "duckdb-0.8.1-cp37-cp37m-win_amd64.whl", hash = "sha256:3843feb79edf100800f5037c32d5d5a5474fb94b32ace66c707b96605e7c16b2"}, + {file = "duckdb-0.8.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:624c889b0f2d656794757b3cc4fc58030d5e285f5ad2ef9fba1ea34a01dab7fb"}, + {file = "duckdb-0.8.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fcbe3742d77eb5add2d617d487266d825e663270ef90253366137a47eaab9448"}, + {file = "duckdb-0.8.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47516c9299d09e9dbba097b9fb339b389313c4941da5c54109df01df0f05e78c"}, + {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf1ba718b7522d34399446ebd5d4b9fcac0b56b6ac07bfebf618fd190ec37c1d"}, + {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e36e35d38a9ae798fe8cf6a839e81494d5b634af89f4ec9483f4d0a313fc6bdb"}, + {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23493313f88ce6e708a512daacad13e83e6d1ea0be204b175df1348f7fc78671"}, + {file = "duckdb-0.8.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1fb9bf0b6f63616c8a4b9a6a32789045e98c108df100e6bac783dc1e36073737"}, + {file = "duckdb-0.8.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:12fc13ecd5eddd28b203b9e3999040d3a7374a8f4b833b04bd26b8c5685c2635"}, + {file = "duckdb-0.8.1-cp38-cp38-win32.whl", hash = "sha256:a12bf4b18306c9cb2c9ba50520317e6cf2de861f121d6f0678505fa83468c627"}, + {file = "duckdb-0.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:e4e809358b9559c00caac4233e0e2014f3f55cd753a31c4bcbbd1b55ad0d35e4"}, + {file = "duckdb-0.8.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7acedfc00d97fbdb8c3d120418c41ef3cb86ef59367f3a9a30dff24470d38680"}, + {file = "duckdb-0.8.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:99bfe264059cdc1e318769103f656f98e819cd4e231cd76c1d1a0327f3e5cef8"}, + {file = "duckdb-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:538b225f361066231bc6cd66c04a5561de3eea56115a5dd773e99e5d47eb1b89"}, + {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae0be3f71a18cd8492d05d0fc1bc67d01d5a9457b04822d025b0fc8ee6efe32e"}, + {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd82ba63b58672e46c8ec60bc9946aa4dd7b77f21c1ba09633d8847ad9eb0d7b"}, + {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:780a34559aaec8354e83aa4b7b31b3555f1b2cf75728bf5ce11b89a950f5cdd9"}, + {file = "duckdb-0.8.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:01f0d4e9f7103523672bda8d3f77f440b3e0155dd3b2f24997bc0c77f8deb460"}, + {file = "duckdb-0.8.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:31f692decb98c2d57891da27180201d9e93bb470a3051fcf413e8da65bca37a5"}, + {file = "duckdb-0.8.1-cp39-cp39-win32.whl", hash = "sha256:e7fe93449cd309bbc67d1bf6f6392a6118e94a9a4479ab8a80518742e855370a"}, + {file = "duckdb-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:81d670bc6807672f038332d9bf587037aabdd741b0810de191984325ed307abd"}, + {file = "duckdb-0.8.1.tar.gz", hash = "sha256:a54d37f4abc2afc4f92314aaa56ecf215a411f40af4bffe1e86bd25e62aceee9"}, +] [[package]] name = "et-xmlfile" version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] [[package]] name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] [package.extras] test = ["pytest (>=6)"] @@ -631,9 +1167,12 @@ test = ["pytest (>=6)"] name = "facebook-business" version = "17.0.4" description = "Facebook Business SDK" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "facebook_business-17.0.4-py3-none-any.whl", hash = "sha256:c3a4afbe019c1fd2454eeeefb4e895ed3276d506115fbf9a993135f6af1c1a88"}, + {file = "facebook_business-17.0.4.tar.gz", hash = "sha256:52b516a237ab4cbf083053d3cc062995ff4732fca487b46543c4eab3bdbbf188"}, +] [package.dependencies] aiohttp = {version = "*", markers = "python_version >= \"3.5.3\""} @@ -646,9 +1185,12 @@ six = ">=1.7.3" name = "fastapi" version = "0.85.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "fastapi-0.85.1-py3-none-any.whl", hash = "sha256:de3166b6b1163dc22da4dc4ebdc3192fcbac7700dd1870a1afa44de636a636b5"}, + {file = "fastapi-0.85.1.tar.gz", hash = "sha256:1facd097189682a4ff11cbd01334a992e51b56be663b2bd50c2c09523624f144"}, +] [package.dependencies] pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" @@ -664,9 +1206,12 @@ test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==22.8.0)", "databases[sqlite] ( name = "filelock" version = "3.12.4" description = "A platform independent file lock." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, + {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, +] [package.extras] docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] @@ -677,17 +1222,23 @@ typing = ["typing-extensions (>=4.7.1)"] name = "filetype" version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." -category = "dev" optional = false python-versions = "*" +files = [ + {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, + {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, +] [[package]] name = "flake8" version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" @@ -698,9 +1249,12 @@ pyflakes = ">=3.1.0,<3.2.0" name = "flake8-bugbear" version = "22.12.6" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, + {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, +] [package.dependencies] attrs = ">=19.2.0" @@ -713,9 +1267,12 @@ dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "tox"] name = "flake8-builtins" version = "2.1.0" description = "Check for python builtins being used as variables or parameters." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "flake8-builtins-2.1.0.tar.gz", hash = "sha256:12ff1ee96dd4e1f3141141ee6c45a5c7d3b3c440d0949e9b8d345c42b39c51d4"}, + {file = "flake8_builtins-2.1.0-py3-none-any.whl", hash = "sha256:469e8f03d6d0edf4b1e62b6d5a97dce4598592c8a13ec8f0952e7a185eba50a1"}, +] [package.dependencies] flake8 = "*" @@ -727,9 +1284,12 @@ test = ["pytest"] name = "flake8-encodings" version = "0.5.0.post1" description = "A Flake8 plugin to identify incorrect use of encodings." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, + {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, +] [package.dependencies] astatine = ">=0.3.1" @@ -745,9 +1305,12 @@ classes = ["jedi (>=0.18.0)"] name = "flake8-helper" version = "0.2.1" description = "A helper library for Flake8 plugins." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, + {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, +] [package.dependencies] flake8 = ">=3.8.4" @@ -756,9 +1319,12 @@ flake8 = ">=3.8.4" name = "flake8-tidy-imports" version = "4.10.0" description = "A flake8 plugin that helps you write tidier imports." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, + {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, +] [package.dependencies] flake8 = ">=3.8.0" @@ -767,2845 +1333,20 @@ flake8 = ">=3.8.0" name = "flatbuffers" version = "23.5.26" description = "The FlatBuffers serialization format for Python" -category = "dev" optional = false python-versions = "*" - -[[package]] -name = "frozenlist" -version = "1.4.0" -description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "fsspec" -version = "2023.9.2" -description = "File-system specification" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -abfs = ["adlfs"] -adl = ["adlfs"] -arrow = ["pyarrow (>=1)"] -dask = ["dask", "distributed"] -devel = ["pytest", "pytest-cov"] -dropbox = ["dropbox", "dropboxdrivefs", "requests"] -full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] -fuse = ["fusepy"] -gcs = ["gcsfs"] -git = ["pygit2"] -github = ["requests"] -gs = ["gcsfs"] -gui = ["panel"] -hdfs = ["pyarrow (>=1)"] -http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] -libarchive = ["libarchive-c"] -oci = ["ocifs"] -s3 = ["s3fs"] -sftp = ["paramiko"] -smb = ["smbprotocol"] -ssh = ["paramiko"] -tqdm = ["tqdm"] - -[[package]] -name = "gcsfs" -version = "2023.9.2" -description = "Convenient Filesystem interface over GCS" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -decorator = ">4.1.2" -fsspec = "2023.9.2" -google-auth = ">=1.2" -google-auth-oauthlib = "*" -google-cloud-storage = "*" -requests = "*" - -[package.extras] -crc = ["crcmod"] -gcsfuse = ["fusepy"] - -[[package]] -name = "gitdb" -version = "4.0.10" -description = "Git Object Database" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -smmap = ">=3.0.1,<6" - -[[package]] -name = "gitpython" -version = "3.1.37" -description = "GitPython is a Python library used to interact with Git repositories" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -gitdb = ">=4.0.1,<5" - -[package.extras] -test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-sugar"] - -[[package]] -name = "giturlparse" -version = "0.12.0" -description = "A Git URL parsing module (supports parsing and rewriting)" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "google-analytics-data" -version = "0.16.3" -description = "Google Analytics Data API client library" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} -proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, - {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, -] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-api-core" -version = "2.12.0" -description = "Google API client core library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-auth = ">=2.14.1,<3.0.dev0" -googleapis-common-protos = ">=1.56.2,<2.0.dev0" -grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\""}, -] -grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, - {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\""}, -] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" -requests = ">=2.18.0,<3.0.0.dev0" - -[package.extras] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] -grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] - -[[package]] -name = "google-api-python-client" -version = "2.103.0" -description = "Google API Client Library for Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0.dev0" -google-auth = ">=1.19.0,<3.0.0.dev0" -google-auth-httplib2 = ">=0.1.0" -httplib2 = ">=0.15.0,<1.dev0" -uritemplate = ">=3.0.1,<5" - -[[package]] -name = "google-auth" -version = "2.23.3" -description = "Google Authentication Library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cachetools = ">=2.0.0,<6.0" -pyasn1-modules = ">=0.2.1" -rsa = ">=3.1.4,<5" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] -enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] -pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] -reauth = ["pyu2f (>=0.1.5)"] -requests = ["requests (>=2.20.0,<3.0.0.dev0)"] - -[[package]] -name = "google-auth-httplib2" -version = "0.1.1" -description = "Google Authentication Library: httplib2 transport" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -google-auth = "*" -httplib2 = ">=0.19.0" - -[[package]] -name = "google-auth-oauthlib" -version = "1.1.0" -description = "Google Authentication Library" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -google-auth = ">=2.15.0" -requests-oauthlib = ">=0.7.0" - -[package.extras] -tool = ["click (>=6.0.0)"] - -[[package]] -name = "google-cloud-bigquery" -version = "3.12.0" -description = "Google BigQuery API client library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} -google-cloud-core = ">=1.6.0,<3.0.0dev" -google-resumable-media = ">=0.6.0,<3.0dev" -grpcio = [ - {version = ">=1.47.0,<2.0dev", markers = "python_version < \"3.11\""}, - {version = ">=1.49.1,<2.0dev", markers = "python_version >= \"3.11\""}, -] -packaging = ">=20.0.0" -proto-plus = ">=1.15.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" -python-dateutil = ">=2.7.2,<3.0dev" -requests = ">=2.21.0,<3.0.0dev" - -[package.extras] -all = ["Shapely (>=1.8.4,<2.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] -bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] -geopandas = ["Shapely (>=1.8.4,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)"] -ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] -ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] -opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] -pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] -tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] - -[[package]] -name = "google-cloud-core" -version = "2.3.3" -description = "Google Cloud API client core library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-api-core = ">=1.31.6,<2.0.0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0,<3.0dev" - -[package.extras] -grpc = ["grpcio (>=1.38.0,<2.0dev)"] - -[[package]] -name = "google-cloud-storage" -version = "2.12.0" -description = "Google Cloud Storage API client library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" -google-auth = ">=2.23.3,<3.0dev" -google-cloud-core = ">=2.3.0,<3.0dev" -google-crc32c = ">=1.0,<2.0dev" -google-resumable-media = ">=2.6.0" -requests = ">=2.18.0,<3.0.0dev" - -[package.extras] -protobuf = ["protobuf (<5.0.0dev)"] - -[[package]] -name = "google-crc32c" -version = "1.5.0" -description = "A python wrapper of the C library 'Google CRC32C'" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -testing = ["pytest"] - -[[package]] -name = "google-resumable-media" -version = "2.6.0" -description = "Utilities for Google Media Downloads and Resumable Uploads" -category = "main" -optional = false -python-versions = ">= 3.7" - -[package.dependencies] -google-crc32c = ">=1.0,<2.0dev" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] -requests = ["requests (>=2.18.0,<3.0.0dev)"] - -[[package]] -name = "googleapis-common-protos" -version = "1.61.0" -description = "Common protobufs used in Google APIs" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" - -[package.extras] -grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] - -[[package]] -name = "graphlib-backport" -version = "1.0.3" -description = "Backport of the Python 3.9 graphlib module for Python 3.6+" -category = "dev" -optional = false -python-versions = ">=3.6,<4.0" - -[[package]] -name = "greenlet" -version = "2.0.2" -description = "Lightweight in-process concurrent programming" -category = "main" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" - -[package.extras] -docs = ["Sphinx", "docutils (<0.18)"] -test = ["objgraph", "psutil"] - -[[package]] -name = "grpcio" -version = "1.59.0" -description = "HTTP/2-based RPC framework" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -protobuf = ["grpcio-tools (>=1.59.0)"] - -[[package]] -name = "grpcio-status" -version = "1.59.0" -description = "Status proto mapping for gRPC" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -googleapis-common-protos = ">=1.5.5" -grpcio = ">=1.59.0" -protobuf = ">=4.21.6" - -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "hexbytes" -version = "0.3.1" -description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" -category = "main" -optional = false -python-versions = ">=3.7, <4" - -[package.extras] -dev = ["black (>=22)", "bumpversion (>=0.5.3)", "eth-utils (>=1.0.1,<3)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "hypothesis (>=3.44.24,<=6.31.6)", "ipython", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)", "pytest (>=7.0.0)", "pytest-watch (>=4.1.0)", "pytest-xdist (>=2.4.0)", "sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)", "tox (>=4.0.0)", "twine", "wheel"] -doc = ["sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)"] -lint = ["black (>=22)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)"] -test = ["eth-utils (>=1.0.1,<3)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0.0)", "pytest-xdist (>=2.4.0)"] - -[[package]] -name = "hnswlib" -version = "0.7.0" -description = "hnswlib" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -numpy = "*" - -[[package]] -name = "httplib2" -version = "0.22.0" -description = "A comprehensive HTTP client library." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} - -[[package]] -name = "httptools" -version = "0.6.0" -description = "A collection of framework independent HTTP protocol utils." -category = "dev" -optional = false -python-versions = ">=3.5.0" - -[package.extras] -test = ["Cython (>=0.29.24,<0.30.0)"] - -[[package]] -name = "huggingface-hub" -version = "0.17.3" -description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -category = "dev" -optional = false -python-versions = ">=3.8.0" - -[package.dependencies] -filelock = "*" -fsspec = "*" -packaging = ">=20.9" -pyyaml = ">=5.1" -requests = "*" -tqdm = ">=4.42.1" -typing-extensions = ">=3.7.4.3" - -[package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] -cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] -docs = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "hf-doc-builder", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)", "watchdog"] -fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] -inference = ["aiohttp", "pydantic (<2.0)"] -quality = ["black (==23.7)", "mypy (==1.5.1)", "ruff (>=0.0.241)"] -tensorflow = ["graphviz", "pydot", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] -torch = ["torch"] -typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] - -[[package]] -name = "humanfriendly" -version = "10.0" -description = "Human friendly output for text interfaces using Python" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.dependencies] -pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} - -[[package]] -name = "humanize" -version = "4.8.0" -description = "Python humanize utilities" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -tests = ["freezegun", "pytest", "pytest-cov"] - -[[package]] -name = "idna" -version = "3.4" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "importlib-metadata" -version = "6.8.0" -description = "Read metadata from Python packages" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -zipp = ">=0.5" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] - -[[package]] -name = "inflection" -version = "0.5.1" -description = "A port of Ruby on Rails inflector to Python" -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "iniconfig" -version = "2.0.0" -description = "brain-dead simple config-ini parsing" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "isodate" -version = "0.6.1" -description = "An ISO 8601 date/time/duration parser and formatter" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -six = "*" - -[[package]] -name = "jmespath" -version = "1.0.1" -description = "JSON Matching Expressions" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "joblib" -version = "1.3.2" -description = "Lightweight pipelining with Python functions" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "jsonpath-ng" -version = "1.6.0" -description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -ply = "*" - -[[package]] -name = "langchain" -version = "0.0.219" -description = "Building applications with LLMs through composability" -category = "dev" -optional = false -python-versions = ">=3.8.1,<4.0" - -[package.dependencies] -aiohttp = ">=3.8.3,<4.0.0" -async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} -dataclasses-json = ">=0.5.7,<0.6.0" -langchainplus-sdk = ">=0.0.17" -numexpr = ">=2.8.4,<3.0.0" -numpy = ">=1,<2" -openapi-schema-pydantic = ">=1.2,<2.0" -pydantic = ">=1,<2" -PyYAML = ">=5.4.1" -requests = ">=2,<3" -SQLAlchemy = ">=1.4,<3" -tenacity = ">=8.1.0,<9.0.0" - -[package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] -azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0a20230509004)", "openai (>=0,<1)"] -clarifai = ["clarifai (==9.1.0)"] -cohere = ["cohere (>=3,<4)"] -docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] -embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.31)"] -javascript = ["esprima (>=4.0.1,<5.0.0)"] -llms = ["anthropic (>=0.2.6,<0.3.0)", "clarifai (==9.1.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.6)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] -qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"] -text-helpers = ["chardet (>=5.1.0,<6.0.0)"] - -[[package]] -name = "langchainplus-sdk" -version = "0.0.20" -description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -category = "dev" -optional = false -python-versions = ">=3.8.1,<4.0" - -[package.dependencies] -pydantic = ">=1,<2" -requests = ">=2,<3" -tenacity = ">=8.1.0,<9.0.0" - -[[package]] -name = "lxml" -version = "4.9.3" -description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" - -[package.extras] -cssselect = ["cssselect (>=0.7)"] -html5 = ["html5lib"] -htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=0.29.35)"] - -[[package]] -name = "lz4" -version = "4.3.2" -description = "LZ4 Bindings for Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] -flake8 = ["flake8"] -tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] - -[[package]] -name = "makefun" -version = "1.15.1" -description = "Small library to dynamically create python functions." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "markdown" -version = "3.5" -description = "Python implementation of John Gruber's Markdown." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} - -[package.extras] -docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] -testing = ["coverage", "pyyaml"] - -[[package]] -name = "markdown-it-py" -version = "3.0.0" -description = "Python port of markdown-it. Markdown parsing, done right!" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mdurl = ">=0.1,<1.0" - -[package.extras] -benchmarking = ["psutil", "pytest", "pytest-benchmark"] -code-style = ["pre-commit (>=3.0,<4.0)"] -compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] -linkify = ["linkify-it-py (>=1,<3)"] -plugins = ["mdit-py-plugins"] -profiling = ["gprof2dot"] -rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] - -[[package]] -name = "marshmallow" -version = "3.20.1" -description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -packaging = ">=17.0" - -[package.extras] -dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] -docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] -lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] -tests = ["pytest", "pytz", "simplejson"] - -[[package]] -name = "marshmallow-enum" -version = "1.5.1" -description = "Enum field for Marshmallow" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -marshmallow = ">=2.0.0" - -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "mdurl" -version = "0.1.2" -description = "Markdown URL utilities" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mimesis" -version = "7.1.0" -description = "Mimesis: Fake Data Generator." -category = "dev" -optional = false -python-versions = ">=3.8,<4.0" - -[[package]] -name = "monotonic" -version = "1.6" -description = "An implementation of time.monotonic() for Python 2 & < 3.3" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "more-itertools" -version = "10.1.0" -description = "More routines for operating on iterables, beyond itertools" -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "mpmath" -version = "1.3.0" -description = "Python library for arbitrary-precision floating-point arithmetic" -category = "dev" -optional = false -python-versions = "*" - -[package.extras] -develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] -docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] -tests = ["pytest (>=4.6)"] - -[[package]] -name = "msal" -version = "1.24.1" -description = "The Microsoft Authentication Library (MSAL) for Python library" -category = "dev" -optional = false -python-versions = ">=2.7" - -[package.dependencies] -cryptography = ">=0.6,<44" -PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} -requests = ">=2.0.0,<3" - -[package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.14)"] - -[[package]] -name = "msal-extensions" -version = "1.0.0" -description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -msal = ">=0.4.1,<2.0.0" -portalocker = [ - {version = ">=1.0,<3", markers = "python_version >= \"3.5\" and platform_system != \"Windows\""}, - {version = ">=1.6,<3", markers = "python_version >= \"3.5\" and platform_system == \"Windows\""}, -] - -[[package]] -name = "msg-parser" -version = "1.2.0" -description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." -category = "dev" -optional = false -python-versions = ">=3.4" - -[package.dependencies] -olefile = ">=0.46" - -[package.extras] -rtf = ["compressed-rtf (>=1.0.5)"] - -[[package]] -name = "multidict" -version = "6.0.4" -description = "multidict implementation" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mypy" -version = "1.6.1" -description = "Optional static typing for Python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mypy-extensions = ">=1.0.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=4.1.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -install-types = ["pip"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "natsort" -version = "8.4.0" -description = "Simple yet flexible natural sorting in Python." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -fast = ["fastnumbers (>=2.0.0)"] -icu = ["PyICU (>=1.0.0)"] - -[[package]] -name = "nltk" -version = "3.8.1" -description = "Natural Language Toolkit" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = "*" -joblib = "*" -regex = ">=2021.8.3" -tqdm = "*" - -[package.extras] -all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] -corenlp = ["requests"] -machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] -plot = ["matplotlib"] -tgrep = ["pyparsing"] -twitter = ["twython"] - -[[package]] -name = "numexpr" -version = "2.8.6" -description = "Fast numerical expression evaluator for NumPy" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = ">=1.13.3" - -[[package]] -name = "numpy" -version = "1.24.4" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "oauthlib" -version = "3.2.2" -description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -rsa = ["cryptography (>=3.0.0)"] -signals = ["blinker (>=1.4.0)"] -signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] - -[[package]] -name = "olefile" -version = "0.46" -description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "onnxruntime" -version = "1.16.1" -description = "ONNX Runtime is a runtime accelerator for Machine Learning models" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -coloredlogs = "*" -flatbuffers = "*" -numpy = ">=1.21.6" -packaging = "*" -protobuf = "*" -sympy = "*" - -[[package]] -name = "openai" -version = "0.27.10" -description = "Python client library for the OpenAI API" -category = "dev" -optional = false -python-versions = ">=3.7.1" - -[package.dependencies] -aiohttp = "*" -requests = ">=2.20" -tqdm = "*" - -[package.extras] -datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] -embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] -wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] - -[[package]] -name = "openapi-schema-pydantic" -version = "1.2.4" -description = "OpenAPI (v3) specification schema as pydantic class" -category = "dev" -optional = false -python-versions = ">=3.6.1" - -[package.dependencies] -pydantic = ">=1.8.2" - -[[package]] -name = "openpyxl" -version = "3.1.2" -description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -et-xmlfile = "*" - -[[package]] -name = "orjson" -version = "3.9.9" -description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "overrides" -version = "7.4.0" -description = "A decorator to automatically detect mismatch when overriding a method." -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "packaging" -version = "23.2" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pandas" -version = "2.0.3" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] -aws = ["s3fs (>=2021.08.0)"] -clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] -compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] -computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2021.07.0)"] -gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] -hdf5 = ["tables (>=3.6.1)"] -html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] -mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] -spss = ["pyreadstat (>=1.1.2)"] -sql-other = ["SQLAlchemy (>=1.4.16)"] -test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.6.3)"] - -[[package]] -name = "pandas-stubs" -version = "2.0.2.230605" -description = "Type annotations for pandas" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = ">=1.24.3" -types-pytz = ">=2022.1.1" - -[[package]] -name = "pathspec" -version = "0.11.2" -description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pathvalidate" -version = "3.2.0" -description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] -test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.4)", "pytest-md-report (>=0.4.1)"] - -[[package]] -name = "pbr" -version = "5.11.1" -description = "Python Build Reasonableness" -category = "dev" -optional = false -python-versions = ">=2.6" - -[[package]] -name = "pdf2image" -version = "1.16.3" -description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -pillow = "*" - -[[package]] -name = "pdfminer-six" -version = "20221105" -description = "PDF parser and analyzer" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -charset-normalizer = ">=2.0.0" -cryptography = ">=36.0.0" - -[package.extras] -dev = ["black", "mypy (==0.931)", "nox", "pytest"] -docs = ["sphinx", "sphinx-argparse"] -image = ["Pillow"] - -[[package]] -name = "pendulum" -version = "2.1.2" -description = "Python datetimes made easy" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.dependencies] -python-dateutil = ">=2.6,<3.0" -pytzdata = ">=2020.1" - -[[package]] -name = "pillow" -version = "9.5.0" -description = "Python Imaging Library (Fork)" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] -tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] - -[[package]] -name = "platformdirs" -version = "3.11.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] - -[[package]] -name = "pluggy" -version = "1.3.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "ply" -version = "3.11" -description = "Python Lex & Yacc" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "portalocker" -version = "2.8.2" -description = "Wraps the portalocker recipe for easy usage" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} - -[package.extras] -docs = ["sphinx (>=1.7.1)"] -redis = ["redis"] -tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] - -[[package]] -name = "posthog" -version = "3.0.2" -description = "Integrate PostHog into any python application." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -backoff = ">=1.10.0" -monotonic = ">=1.5" -python-dateutil = ">2.1" -requests = ">=2.7,<3.0" -six = ">=1.5" - -[package.extras] -dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] -sentry = ["django", "sentry-sdk"] -test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest"] - -[[package]] -name = "proto-plus" -version = "1.22.3" -description = "Beautiful, Pythonic protocol buffers." -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -protobuf = ">=3.19.0,<5.0.0dev" - -[package.extras] -testing = ["google-api-core[grpc] (>=1.31.5)"] - -[[package]] -name = "protobuf" -version = "4.24.4" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "psycopg2-binary" -version = "2.9.9" -description = "psycopg2 - Python-PostgreSQL Database Adapter" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "psycopg2cffi" -version = "2.9.0" -description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -cffi = ">=1.0" -six = "*" - -[[package]] -name = "pulsar-client" -version = "3.3.0" -description = "Apache Pulsar Python client library" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -certifi = "*" - -[package.extras] -all = ["apache-bookkeeper-client (>=4.16.1)", "fastavro (==1.7.3)", "grpcio (>=1.8.2)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] -avro = ["fastavro (==1.7.3)"] -functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.8.2)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] - -[[package]] -name = "pyairtable" -version = "2.1.0.post1" -description = "Python Client for the Airtable API" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -inflection = "*" -pydantic = "*" -requests = ">=2.22.0" -typing-extensions = "*" -urllib3 = ">=1.26" - -[[package]] -name = "pyarrow" -version = "13.0.0" -description = "Python library for Apache Arrow" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = ">=1.16.6" - -[[package]] -name = "pyasn1" -version = "0.5.0" -description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[[package]] -name = "pyasn1-modules" -version = "0.3.0" -description = "A collection of ASN.1-based protocols modules" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[package.dependencies] -pyasn1 = ">=0.4.6,<0.6.0" - -[[package]] -name = "pycodestyle" -version = "2.11.1" -description = "Python style guide checker" -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "pycountry" -version = "22.3.5" -description = "ISO country, subdivision, language, currency and script definitions and their translations" -category = "dev" -optional = false -python-versions = ">=3.6, <4" - -[package.dependencies] -setuptools = "*" - -[[package]] -name = "pycparser" -version = "2.21" -description = "C parser in Python" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pydantic" -version = "1.10.13" -description = "Data validation and settings management using python type hints" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = ">=4.2.0" - -[package.extras] -dotenv = ["python-dotenv (>=0.10.4)"] -email = ["email-validator (>=1.0.3)"] - -[[package]] -name = "pyflakes" -version = "3.1.0" -description = "passive checker of Python programs" -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "pygments" -version = "2.16.1" -description = "Pygments is a syntax highlighting package written in Python." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pyjwt" -version = "2.8.0" -description = "JSON Web Token implementation in Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} - -[package.extras] -crypto = ["cryptography (>=3.4.0)"] -dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] -docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] -tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] - -[[package]] -name = "pymongo" -version = "4.5.0" -description = "Python driver for MongoDB " -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -dnspython = ">=1.16.0,<3.0.0" - -[package.extras] -aws = ["pymongo-auth-aws (<2.0.0)"] -encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] -gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] -ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] -snappy = ["python-snappy"] -zstd = ["zstandard"] - -[[package]] -name = "pymysql" -version = "1.1.0" -description = "Pure Python MySQL Driver" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -ed25519 = ["PyNaCl (>=1.4.0)"] -rsa = ["cryptography"] - -[[package]] -name = "pypandoc" -version = "1.11" -description = "Thin wrapper for pandoc." -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "pypdf2" -version = "3.0.1" -description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -crypto = ["PyCryptodome"] -dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] -docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] -full = ["Pillow", "PyCryptodome"] -image = ["Pillow"] - -[[package]] -name = "pyreadline3" -version = "3.4.1" -description = "A python implementation of GNU readline." -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "pytest" -version = "7.4.2" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-docx" -version = "1.0.1" -description = "Create, read, and update Microsoft Word .docx files." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -lxml = ">=3.1.0" -typing-extensions = "*" - -[[package]] -name = "python-dotenv" -version = "1.0.0" -description = "Read key-value pairs from a .env file and set them as environment variables" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -cli = ["click (>=5.0)"] - -[[package]] -name = "python-magic" -version = "0.4.27" -description = "File type identification using libmagic" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "python-pptx" -version = "0.6.22" -description = "Generate and manipulate Open XML PowerPoint (.pptx) files" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -lxml = ">=3.1.0" -Pillow = ">=3.3.2,<=9.5.0" -XlsxWriter = ">=0.5.7" - -[[package]] -name = "pytz" -version = "2023.3.post1" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pytzdata" -version = "2020.1" -description = "The Olson timezone database for Python." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pywin32" -version = "306" -description = "Python for Window Extensions" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "pyyaml" -version = "6.0.1" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "regex" -version = "2023.10.3" -description = "Alternative regular expression module, to replace re." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "requests" -version = "2.31.0" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "requests-file" -version = "1.5.1" -description = "File transport adapter for Requests" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -requests = ">=1.0.0" -six = "*" - -[[package]] -name = "requests-mock" -version = "1.11.0" -description = "Mock out responses from the requests package" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -requests = ">=2.3,<3" -six = "*" - -[package.extras] -fixture = ["fixtures"] -test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] - -[[package]] -name = "requests-oauthlib" -version = "1.3.1" -description = "OAuthlib authentication support for Requests." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -oauthlib = ">=3.0.0" -requests = ">=2.0.0" - -[package.extras] -rsa = ["oauthlib[signedtoken] (>=3.0.0)"] - -[[package]] -name = "requests-toolbelt" -version = "1.0.0" -description = "A utility belt for advanced users of python-requests" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -requests = ">=2.0.1,<3.0.0" - -[[package]] -name = "requirements-parser" -version = "0.5.0" -description = "This is a small Python module for parsing Pip requirement files." -category = "main" -optional = false -python-versions = ">=3.6,<4.0" - -[package.dependencies] -types-setuptools = ">=57.0.0" - -[[package]] -name = "rich" -version = "13.6.0" -description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -markdown-it-py = ">=2.2.0" -pygments = ">=2.13.0,<3.0.0" -typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} - -[package.extras] -jupyter = ["ipywidgets (>=7.5.1,<9)"] - -[[package]] -name = "rsa" -version = "4.9" -description = "Pure-Python RSA implementation" -category = "main" -optional = false -python-versions = ">=3.6,<4" - -[package.dependencies] -pyasn1 = ">=0.1.3" - -[[package]] -name = "s3fs" -version = "2023.9.2" -description = "Convenient Filesystem interface over S3" -category = "main" -optional = false -python-versions = ">= 3.8" - -[package.dependencies] -aiobotocore = ">=2.5.4,<2.6.0" -aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2023.9.2" - -[package.extras] -awscli = ["aiobotocore[awscli] (>=2.5.4,<2.6.0)"] -boto3 = ["aiobotocore[boto3] (>=2.5.4,<2.6.0)"] - -[[package]] -name = "semver" -version = "3.0.2" -description = "Python helper for Semantic Versioning (https://semver.org)" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "setuptools" -version = "68.2.2" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - -[[package]] -name = "simple-salesforce" -version = "1.12.5" -description = "A basic Salesforce.com REST API client." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -cryptography = "*" -more-itertools = "*" -pendulum = "*" -pyjwt = "*" -requests = ">=2.22.0" -zeep = "*" - -[[package]] -name = "simplejson" -version = "3.19.2" -description = "Simple, fast, extensible JSON encoder/decoder for Python" -category = "main" -optional = false -python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "smmap" -version = "5.0.1" -description = "A pure Python implementation of a sliding window memory map manager" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "sniffio" -version = "1.3.0" -description = "Sniff out which async library your code is running under" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "sqlalchemy" -version = "2.0.22" -description = "Database Abstraction Library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} -typing-extensions = ">=4.2.0" - -[package.extras] -aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] -asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] -mssql = ["pyodbc"] -mssql-pymssql = ["pymssql"] -mssql-pyodbc = ["pyodbc"] -mypy = ["mypy (>=0.910)"] -mysql = ["mysqlclient (>=1.4.0)"] -mysql-connector = ["mysql-connector-python"] -oracle = ["cx-oracle (>=7)"] -oracle-oracledb = ["oracledb (>=1.0.1)"] -postgresql = ["psycopg2 (>=2.7)"] -postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] -postgresql-pg8000 = ["pg8000 (>=1.29.1)"] -postgresql-psycopg = ["psycopg (>=3.0.7)"] -postgresql-psycopg2binary = ["psycopg2-binary"] -postgresql-psycopg2cffi = ["psycopg2cffi"] -postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] -pymysql = ["pymysql"] -sqlcipher = ["sqlcipher3-binary"] - -[[package]] -name = "starlette" -version = "0.20.4" -description = "The little ASGI library that shines." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -anyio = ">=3.4.0,<5" -typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} - -[package.extras] -full = ["itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests"] - -[[package]] -name = "stevedore" -version = "5.1.0" -description = "Manage dynamic plugins for Python applications" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pbr = ">=2.0.0,<2.1.0 || >2.1.0" - -[[package]] -name = "stripe" -version = "5.5.0" -description = "Python bindings for the Stripe API" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -requests = {version = ">=2.20", markers = "python_version >= \"3.0\""} - -[[package]] -name = "sympy" -version = "1.12" -description = "Computer algebra system (CAS) in Python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mpmath = ">=0.19" - -[[package]] -name = "tabulate" -version = "0.9.0" -description = "Pretty-print tabular data" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -widechars = ["wcwidth"] - -[[package]] -name = "tenacity" -version = "8.2.3" -description = "Retry code until it succeeds" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -doc = ["reno", "sphinx", "tornado (>=4.5)"] - -[[package]] -name = "tiktoken" -version = "0.4.0" -description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -regex = ">=2022.1.18" -requests = ">=2.26.0" - -[package.extras] -blobfile = ["blobfile (>=2)"] - -[[package]] -name = "tokenizers" -version = "0.14.1" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -huggingface_hub = ">=0.16.4,<0.18" - -[package.extras] -dev = ["tokenizers[testing]"] -docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] -testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tomlkit" -version = "0.12.1" -description = "Style preserving TOML library" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tqdm" -version = "4.66.1" -description = "Fast, Extensible Progress Meter" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "types-pytz" -version = "2023.3.1.1" -description = "Typing stubs for pytz" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-requests" -version = "2.31.0.6" -description = "Typing stubs for requests" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -types-urllib3 = "*" - -[[package]] -name = "types-setuptools" -version = "68.2.0.0" -description = "Typing stubs for setuptools" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "types-stripe" -version = "3.5.2.14" -description = "Typing stubs for stripe" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "typing-extensions" -version = "4.8.0" -description = "Backported and Experimental Type Hints for Python 3.8+" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "typing-inspect" -version = "0.9.0" -description = "Runtime inspection utilities for typing module." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -mypy-extensions = ">=0.3.0" -typing-extensions = ">=3.7.4" - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -category = "main" -optional = false -python-versions = ">=2" - -[[package]] -name = "unstructured" -version = "0.7.12" -description = "A library that prepares raw documents for downstream ML tasks." -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -argilla = "*" -chardet = "*" -filetype = "*" -lxml = "*" -markdown = "*" -msg-parser = "*" -nltk = "*" -openpyxl = "*" -pandas = "*" -pdf2image = "*" -"pdfminer.six" = "*" -pillow = "*" -pypandoc = "*" -python-docx = "*" -python-magic = "*" -python-pptx = "*" -requests = "*" -tabulate = "*" -xlrd = "*" - -[package.extras] -azure = ["adlfs", "fsspec"] -discord = ["discord-py"] -dropbox = ["dropboxdrivefs", "fsspec"] -gcs = ["fsspec", "gcsfs"] -github = ["pygithub (==1.58.2)"] -gitlab = ["python-gitlab"] -google-drive = ["google-api-python-client"] -huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] -local-inference = ["unstructured-inference (==0.5.4)"] -reddit = ["praw"] -s3 = ["fsspec", "s3fs"] -slack = ["slack-sdk"] -wikipedia = ["wikipedia"] - -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "urllib3" -version = "1.26.17" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "uvicorn" -version = "0.23.2" -description = "The lightning-fast ASGI server." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -click = ">=7.0" -colorama = {version = ">=0.4", optional = true, markers = "sys_platform == \"win32\" and extra == \"standard\""} -h11 = ">=0.8" -httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} -python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} -pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} -watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} -websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} - -[package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] - -[[package]] -name = "uvloop" -version = "0.18.0" -description = "Fast implementation of asyncio event loop on top of libuv" -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.extras] -docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] - -[[package]] -name = "watchfiles" -version = "0.21.0" -description = "Simple, modern and high performance file watching and code reload in python." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -anyio = ">=3.0.0" - -[[package]] -name = "websockets" -version = "11.0.3" -description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "wheel" -version = "0.41.2" -description = "A built-package format for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest (>=6.0.0)", "setuptools (>=65)"] - -[[package]] -name = "win-precise-time" -version = "1.4.2" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "wrapt" -version = "1.15.0" -description = "Module for decorators, wrappers and monkey patching." -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" - -[[package]] -name = "xlrd" -version = "2.0.1" -description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -build = ["twine", "wheel"] -docs = ["sphinx"] -test = ["pytest", "pytest-cov"] - -[[package]] -name = "xlsxwriter" -version = "3.1.7" -description = "A Python module for creating Excel XLSX files." -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "yarl" -version = "1.9.2" -description = "Yet another URL library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -idna = ">=2.0" -multidict = ">=4.0" - -[[package]] -name = "zeep" -version = "4.2.1" -description = "A Python SOAP client" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -attrs = ">=17.2.0" -isodate = ">=0.5.4" -lxml = ">=4.6.0" -platformdirs = ">=1.4.0" -pytz = "*" -requests = ">=2.7.0" -requests-file = ">=1.5.1" -requests-toolbelt = ">=0.7.1" - -[package.extras] -async = ["httpx (>=0.15.0)"] -docs = ["sphinx (>=1.4.0)"] -test = ["coverage[toml] (==5.2.1)", "flake8 (==3.8.3)", "flake8-blind-except (==0.1.1)", "flake8-debugger (==3.2.1)", "flake8-imports (==0.1.1)", "freezegun (==0.3.15)", "isort (==5.3.2)", "pretend (==1.0.9)", "pytest (==6.2.5)", "pytest-asyncio", "pytest-cov (==2.8.1)", "pytest-httpx", "requests-mock (>=0.7.0)"] -xmlsec = ["xmlsec (>=0.6.1)"] - -[[package]] -name = "zipp" -version = "3.17.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] - -[[package]] -name = "zstandard" -version = "0.21.0" -description = "Zstandard bindings for Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} - -[package.extras] -cffi = ["cffi (>=1.11)"] - -[metadata] -lock-version = "1.1" -python-versions = ">=3.8.1,<3.13" -content-hash = "373607ba2955222555af9ae89fd0a180c75de2225344b0f2a633204340c98aa7" - -[metadata.files] -adlfs = [ - {file = "adlfs-2023.9.0-py3-none-any.whl", hash = "sha256:e2cff62b8128578c6d1b9da1660ad4c8a5a8cb0d491bba416b529563c65dc5d2"}, - {file = "adlfs-2023.9.0.tar.gz", hash = "sha256:1ce70ffa39f7cffc3efbbd9f79b444958eb5d9de9981442b06e47472d2089d4b"}, -] -aiobotocore = [ - {file = "aiobotocore-2.5.4-py3-none-any.whl", hash = "sha256:4b32218728ca3d0be83835b604603a0cd6c329066e884bb78149334267f92440"}, - {file = "aiobotocore-2.5.4.tar.gz", hash = "sha256:60341f19eda77e41e1ab11eef171b5a98b5dbdb90804f5334b6f90e560e31fae"}, -] -aiohttp = [ - {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"}, - {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"}, - {file = "aiohttp-3.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:253bf92b744b3170eb4c4ca2fa58f9c4b87aeb1df42f71d4e78815e6e8b73c9e"}, - {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fd194939b1f764d6bb05490987bfe104287bbf51b8d862261ccf66f48fb4096"}, - {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c5f938d199a6fdbdc10bbb9447496561c3a9a565b43be564648d81e1102ac22"}, - {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2817b2f66ca82ee699acd90e05c95e79bbf1dc986abb62b61ec8aaf851e81c93"}, - {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa375b3d34e71ccccf172cab401cd94a72de7a8cc01847a7b3386204093bb47"}, - {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de50a199b7710fa2904be5a4a9b51af587ab24c8e540a7243ab737b45844543"}, - {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1d8cb0b56b3587c5c01de3bf2f600f186da7e7b5f7353d1bf26a8ddca57f965"}, - {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8e31e9db1bee8b4f407b77fd2507337a0a80665ad7b6c749d08df595d88f1cf5"}, - {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7bc88fc494b1f0311d67f29fee6fd636606f4697e8cc793a2d912ac5b19aa38d"}, - {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ec00c3305788e04bf6d29d42e504560e159ccaf0be30c09203b468a6c1ccd3b2"}, - {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad1407db8f2f49329729564f71685557157bfa42b48f4b93e53721a16eb813ed"}, - {file = "aiohttp-3.8.6-cp310-cp310-win32.whl", hash = "sha256:ccc360e87341ad47c777f5723f68adbb52b37ab450c8bc3ca9ca1f3e849e5fe2"}, - {file = "aiohttp-3.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:93c15c8e48e5e7b89d5cb4613479d144fda8344e2d886cf694fd36db4cc86865"}, - {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e2f9cc8e5328f829f6e1fb74a0a3a939b14e67e80832975e01929e320386b34"}, - {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e6a00ffcc173e765e200ceefb06399ba09c06db97f401f920513a10c803604ca"}, - {file = "aiohttp-3.8.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41bdc2ba359032e36c0e9de5a3bd00d6fb7ea558a6ce6b70acedf0da86458321"}, - {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14cd52ccf40006c7a6cd34a0f8663734e5363fd981807173faf3a017e202fec9"}, - {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d5b785c792802e7b275c420d84f3397668e9d49ab1cb52bd916b3b3ffcf09ad"}, - {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1bed815f3dc3d915c5c1e556c397c8667826fbc1b935d95b0ad680787896a358"}, - {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96603a562b546632441926cd1293cfcb5b69f0b4159e6077f7c7dbdfb686af4d"}, - {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d76e8b13161a202d14c9584590c4df4d068c9567c99506497bdd67eaedf36403"}, - {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e3f1e3f1a1751bb62b4a1b7f4e435afcdade6c17a4fd9b9d43607cebd242924a"}, - {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:76b36b3124f0223903609944a3c8bf28a599b2cc0ce0be60b45211c8e9be97f8"}, - {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a2ece4af1f3c967a4390c284797ab595a9f1bc1130ef8b01828915a05a6ae684"}, - {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:16d330b3b9db87c3883e565340d292638a878236418b23cc8b9b11a054aaa887"}, - {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42c89579f82e49db436b69c938ab3e1559e5a4409eb8639eb4143989bc390f2f"}, - {file = "aiohttp-3.8.6-cp311-cp311-win32.whl", hash = "sha256:efd2fcf7e7b9d7ab16e6b7d54205beded0a9c8566cb30f09c1abe42b4e22bdcb"}, - {file = "aiohttp-3.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:3b2ab182fc28e7a81f6c70bfbd829045d9480063f5ab06f6e601a3eddbbd49a0"}, - {file = "aiohttp-3.8.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fdee8405931b0615220e5ddf8cd7edd8592c606a8e4ca2a00704883c396e4479"}, - {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d25036d161c4fe2225d1abff2bd52c34ed0b1099f02c208cd34d8c05729882f0"}, - {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d791245a894be071d5ab04bbb4850534261a7d4fd363b094a7b9963e8cdbd31"}, - {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cccd1de239afa866e4ce5c789b3032442f19c261c7d8a01183fd956b1935349"}, - {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f13f60d78224f0dace220d8ab4ef1dbc37115eeeab8c06804fec11bec2bbd07"}, - {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a9b5a0606faca4f6cc0d338359d6fa137104c337f489cd135bb7fbdbccb1e39"}, - {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:13da35c9ceb847732bf5c6c5781dcf4780e14392e5d3b3c689f6d22f8e15ae31"}, - {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:4d4cbe4ffa9d05f46a28252efc5941e0462792930caa370a6efaf491f412bc66"}, - {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:229852e147f44da0241954fc6cb910ba074e597f06789c867cb7fb0621e0ba7a"}, - {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:713103a8bdde61d13490adf47171a1039fd880113981e55401a0f7b42c37d071"}, - {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:45ad816b2c8e3b60b510f30dbd37fe74fd4a772248a52bb021f6fd65dff809b6"}, - {file = "aiohttp-3.8.6-cp36-cp36m-win32.whl", hash = "sha256:2b8d4e166e600dcfbff51919c7a3789ff6ca8b3ecce16e1d9c96d95dd569eb4c"}, - {file = "aiohttp-3.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0912ed87fee967940aacc5306d3aa8ba3a459fcd12add0b407081fbefc931e53"}, - {file = "aiohttp-3.8.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2a988a0c673c2e12084f5e6ba3392d76c75ddb8ebc6c7e9ead68248101cd446"}, - {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebf3fd9f141700b510d4b190094db0ce37ac6361a6806c153c161dc6c041ccda"}, - {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3161ce82ab85acd267c8f4b14aa226047a6bee1e4e6adb74b798bd42c6ae1f80"}, - {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d95fc1bf33a9a81469aa760617b5971331cdd74370d1214f0b3109272c0e1e3c"}, - {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c43ecfef7deaf0617cee936836518e7424ee12cb709883f2c9a1adda63cc460"}, - {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca80e1b90a05a4f476547f904992ae81eda5c2c85c66ee4195bb8f9c5fb47f28"}, - {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:90c72ebb7cb3a08a7f40061079817133f502a160561d0675b0a6adf231382c92"}, - {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb54c54510e47a8c7c8e63454a6acc817519337b2b78606c4e840871a3e15349"}, - {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:de6a1c9f6803b90e20869e6b99c2c18cef5cc691363954c93cb9adeb26d9f3ae"}, - {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:a3628b6c7b880b181a3ae0a0683698513874df63783fd89de99b7b7539e3e8a8"}, - {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fc37e9aef10a696a5a4474802930079ccfc14d9f9c10b4662169671ff034b7df"}, - {file = "aiohttp-3.8.6-cp37-cp37m-win32.whl", hash = "sha256:f8ef51e459eb2ad8e7a66c1d6440c808485840ad55ecc3cafefadea47d1b1ba2"}, - {file = "aiohttp-3.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b2fe42e523be344124c6c8ef32a011444e869dc5f883c591ed87f84339de5976"}, - {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e2ee0ac5a1f5c7dd3197de309adfb99ac4617ff02b0603fd1e65b07dc772e4b"}, - {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01770d8c04bd8db568abb636c1fdd4f7140b284b8b3e0b4584f070180c1e5c62"}, - {file = "aiohttp-3.8.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c68330a59506254b556b99a91857428cab98b2f84061260a67865f7f52899f5"}, - {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89341b2c19fb5eac30c341133ae2cc3544d40d9b1892749cdd25892bbc6ac951"}, - {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71783b0b6455ac8f34b5ec99d83e686892c50498d5d00b8e56d47f41b38fbe04"}, - {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f628dbf3c91e12f4d6c8b3f092069567d8eb17814aebba3d7d60c149391aee3a"}, - {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04691bc6601ef47c88f0255043df6f570ada1a9ebef99c34bd0b72866c217ae"}, - {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee912f7e78287516df155f69da575a0ba33b02dd7c1d6614dbc9463f43066e3"}, - {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9c19b26acdd08dd239e0d3669a3dddafd600902e37881f13fbd8a53943079dbc"}, - {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:99c5ac4ad492b4a19fc132306cd57075c28446ec2ed970973bbf036bcda1bcc6"}, - {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f0f03211fd14a6a0aed2997d4b1c013d49fb7b50eeb9ffdf5e51f23cfe2c77fa"}, - {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:8d399dade330c53b4106160f75f55407e9ae7505263ea86f2ccca6bfcbdb4921"}, - {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ec4fd86658c6a8964d75426517dc01cbf840bbf32d055ce64a9e63a40fd7b771"}, - {file = "aiohttp-3.8.6-cp38-cp38-win32.whl", hash = "sha256:33164093be11fcef3ce2571a0dccd9041c9a93fa3bde86569d7b03120d276c6f"}, - {file = "aiohttp-3.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:bdf70bfe5a1414ba9afb9d49f0c912dc524cf60141102f3a11143ba3d291870f"}, - {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d52d5dc7c6682b720280f9d9db41d36ebe4791622c842e258c9206232251ab2b"}, - {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ac39027011414dbd3d87f7edb31680e1f430834c8cef029f11c66dad0670aa5"}, - {file = "aiohttp-3.8.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f5c7ce535a1d2429a634310e308fb7d718905487257060e5d4598e29dc17f0b"}, - {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b30e963f9e0d52c28f284d554a9469af073030030cef8693106d918b2ca92f54"}, - {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:918810ef188f84152af6b938254911055a72e0f935b5fbc4c1a4ed0b0584aed1"}, - {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:002f23e6ea8d3dd8d149e569fd580c999232b5fbc601c48d55398fbc2e582e8c"}, - {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcf3eabd3fd1a5e6092d1242295fa37d0354b2eb2077e6eb670accad78e40e1"}, - {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:255ba9d6d5ff1a382bb9a578cd563605aa69bec845680e21c44afc2670607a95"}, - {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d67f8baed00870aa390ea2590798766256f31dc5ed3ecc737debb6e97e2ede78"}, - {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:86f20cee0f0a317c76573b627b954c412ea766d6ada1a9fcf1b805763ae7feeb"}, - {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:39a312d0e991690ccc1a61f1e9e42daa519dcc34ad03eb6f826d94c1190190dd"}, - {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e827d48cf802de06d9c935088c2924e3c7e7533377d66b6f31ed175c1620e05e"}, - {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd111d7fc5591ddf377a408ed9067045259ff2770f37e2d94e6478d0f3fc0c17"}, - {file = "aiohttp-3.8.6-cp39-cp39-win32.whl", hash = "sha256:caf486ac1e689dda3502567eb89ffe02876546599bbf915ec94b1fa424eeffd4"}, - {file = "aiohttp-3.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3f0e27e5b733803333bb2371249f41cf42bae8884863e8e8965ec69bebe53132"}, - {file = "aiohttp-3.8.6.tar.gz", hash = "sha256:b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"}, -] -aioitertools = [ - {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, - {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, -] -aiosignal = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] -anyio = [ - {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, - {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, -] -argilla = [ - {file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"}, - {file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"}, -] -asana = [ - {file = "asana-3.2.2-py2.py3-none-any.whl", hash = "sha256:e8426ae5f5cda2c27d29874145acb589b91e673a84e3fbd45404679499d9604a"}, - {file = "asana-3.2.2.tar.gz", hash = "sha256:3a0c64ad5baaa8c52465fe400cedbc873b2127a77df135af518fd8da1af8d6b9"}, -] -astatine = [ - {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, - {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, -] -asttokens = [ - {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, - {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, -] -astunparse = [ - {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, - {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, -] -async-timeout = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] -attrs = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, -] -azure-core = [ - {file = "azure-core-1.29.4.tar.gz", hash = "sha256:500b3aa9bf2e90c5ccc88bb105d056114ca0ce7d0ce73afb8bc4d714b2fc7568"}, - {file = "azure_core-1.29.4-py3-none-any.whl", hash = "sha256:b03261bcba22c0b9290faf9999cedd23e849ed2577feee90515694cea6bc74bf"}, -] -azure-datalake-store = [ - {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, - {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, -] -azure-identity = [ - {file = "azure-identity-1.14.1.zip", hash = "sha256:48e2a9dbdc59b4f095f841d867d9a8cbe4c1cdbbad8251e055561afd47b4a9b8"}, - {file = "azure_identity-1.14.1-py3-none-any.whl", hash = "sha256:3a5bef8e9c3281e864e869739be8d67424bff616cddae96b546ca2a5168d863d"}, -] -azure-storage-blob = [ - {file = "azure-storage-blob-12.18.3.tar.gz", hash = "sha256:d8ced0deee3367fa3d4f3d1a03cd9edadf4440c0a371f503d623fa6c807554ee"}, - {file = "azure_storage_blob-12.18.3-py3-none-any.whl", hash = "sha256:c278dde2ac41857a68d615c9f2b36d894ba877a7e84d62795603c7e79d0bb5e9"}, -] -backoff = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] -bandit = [ - {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, - {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, -] -black = [ - {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, - {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, - {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, - {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, - {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, - {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, - {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, - {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, - {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, - {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, - {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, -] -botocore = [ - {file = "botocore-1.31.17-py3-none-any.whl", hash = "sha256:6ac34a1d34aa3750e78b77b8596617e2bab938964694d651939dba2cbde2c12b"}, - {file = "botocore-1.31.17.tar.gz", hash = "sha256:396459065dba4339eb4da4ec8b4e6599728eb89b7caaceea199e26f7d824a41c"}, -] -cachetools = [ - {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, - {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, -] -certifi = [ - {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, - {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, -] -cffi = [ - {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, - {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, - {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, - {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, - {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, - {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, - {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, - {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, - {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, - {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, - {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, - {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, - {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, - {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, - {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, -] -chardet = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] -charset-normalizer = [ - {file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4162918ef3098851fcd8a628bf9b6a98d10c380725df9e04caf5ca6dd48c847a"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0570d21da019941634a531444364f2482e8db0b3425fcd5ac0c36565a64142c8"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5707a746c6083a3a74b46b3a631d78d129edab06195a92a8ece755aac25a3f3d"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:278c296c6f96fa686d74eb449ea1697f3c03dc28b75f873b65b5201806346a69"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4b71f4d1765639372a3b32d2638197f5cd5221b19531f9245fcc9ee62d38f56"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5969baeaea61c97efa706b9b107dcba02784b1601c74ac84f2a532ea079403e"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3f93dab657839dfa61025056606600a11d0b696d79386f974e459a3fbc568ec"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:db756e48f9c5c607b5e33dd36b1d5872d0422e960145b08ab0ec7fd420e9d649"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:232ac332403e37e4a03d209a3f92ed9071f7d3dbda70e2a5e9cff1c4ba9f0678"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e5c1502d4ace69a179305abb3f0bb6141cbe4714bc9b31d427329a95acfc8bdd"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2502dd2a736c879c0f0d3e2161e74d9907231e25d35794584b1ca5284e43f596"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23e8565ab7ff33218530bc817922fae827420f143479b753104ab801145b1d5b"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-win32.whl", hash = "sha256:1872d01ac8c618a8da634e232f24793883d6e456a66593135aeafe3784b0848d"}, - {file = "charset_normalizer-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:557b21a44ceac6c6b9773bc65aa1b4cc3e248a5ad2f5b914b91579a32e22204d"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d7eff0f27edc5afa9e405f7165f85a6d782d308f3b6b9d96016c010597958e63"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a685067d05e46641d5d1623d7c7fdf15a357546cbb2f71b0ebde91b175ffc3e"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3d5b7db9ed8a2b11a774db2bbea7ba1884430a205dbd54a32d61d7c2a190fa"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2935ffc78db9645cb2086c2f8f4cfd23d9b73cc0dc80334bc30aac6f03f68f8c"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fe359b2e3a7729010060fbca442ca225280c16e923b37db0e955ac2a2b72a05"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380c4bde80bce25c6e4f77b19386f5ec9db230df9f2f2ac1e5ad7af2caa70459"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0d1e3732768fecb052d90d62b220af62ead5748ac51ef61e7b32c266cac9293"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b2919306936ac6efb3aed1fbf81039f7087ddadb3160882a57ee2ff74fd2382"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f8888e31e3a85943743f8fc15e71536bda1c81d5aa36d014a3c0c44481d7db6e"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82eb849f085624f6a607538ee7b83a6d8126df6d2f7d3b319cb837b289123078"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7b8b8bf1189b3ba9b8de5c8db4d541b406611a71a955bbbd7385bbc45fcb786c"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5adf257bd58c1b8632046bbe43ee38c04e1038e9d37de9c57a94d6bd6ce5da34"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c350354efb159b8767a6244c166f66e67506e06c8924ed74669b2c70bc8735b1"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-win32.whl", hash = "sha256:02af06682e3590ab952599fbadac535ede5d60d78848e555aa58d0c0abbde786"}, - {file = "charset_normalizer-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:86d1f65ac145e2c9ed71d8ffb1905e9bba3a91ae29ba55b4c46ae6fc31d7c0d4"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3b447982ad46348c02cb90d230b75ac34e9886273df3a93eec0539308a6296d7"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:abf0d9f45ea5fb95051c8bfe43cb40cda383772f7e5023a83cc481ca2604d74e"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b09719a17a2301178fac4470d54b1680b18a5048b481cb8890e1ef820cb80455"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3d9b48ee6e3967b7901c052b670c7dda6deb812c309439adaffdec55c6d7b78"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edfe077ab09442d4ef3c52cb1f9dab89bff02f4524afc0acf2d46be17dc479f5"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3debd1150027933210c2fc321527c2299118aa929c2f5a0a80ab6953e3bd1908"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f63face3a527284f7bb8a9d4f78988e3c06823f7bea2bd6f0e0e9298ca0403"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24817cb02cbef7cd499f7c9a2735286b4782bd47a5b3516a0e84c50eab44b98e"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c71f16da1ed8949774ef79f4a0260d28b83b3a50c6576f8f4f0288d109777989"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9cf3126b85822c4e53aa28c7ec9869b924d6fcfb76e77a45c44b83d91afd74f9"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b3b2316b25644b23b54a6f6401074cebcecd1244c0b8e80111c9a3f1c8e83d65"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:03680bb39035fbcffe828eae9c3f8afc0428c91d38e7d61aa992ef7a59fb120e"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cc152c5dd831641e995764f9f0b6589519f6f5123258ccaca8c6d34572fefa8"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-win32.whl", hash = "sha256:b8f3307af845803fb0b060ab76cf6dd3a13adc15b6b451f54281d25911eb92df"}, - {file = "charset_normalizer-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8eaf82f0eccd1505cf39a45a6bd0a8cf1c70dcfc30dba338207a969d91b965c0"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc45229747b67ffc441b3de2f3ae5e62877a282ea828a5bdb67883c4ee4a8810"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4a0033ce9a76e391542c182f0d48d084855b5fcba5010f707c8e8c34663d77"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada214c6fa40f8d800e575de6b91a40d0548139e5dc457d2ebb61470abf50186"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1121de0e9d6e6ca08289583d7491e7fcb18a439305b34a30b20d8215922d43c"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1063da2c85b95f2d1a430f1c33b55c9c17ffaf5e612e10aeaad641c55a9e2b9d"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70f1d09c0d7748b73290b29219e854b3207aea922f839437870d8cc2168e31cc"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:250c9eb0f4600361dd80d46112213dff2286231d92d3e52af1e5a6083d10cad9"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:750b446b2ffce1739e8578576092179160f6d26bd5e23eb1789c4d64d5af7dc7"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:fc52b79d83a3fe3a360902d3f5d79073a993597d48114c29485e9431092905d8"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:588245972aca710b5b68802c8cad9edaa98589b1b42ad2b53accd6910dad3545"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e39c7eb31e3f5b1f88caff88bcff1b7f8334975b46f6ac6e9fc725d829bc35d4"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:abecce40dfebbfa6abf8e324e1860092eeca6f7375c8c4e655a8afb61af58f2c"}, - {file = "charset_normalizer-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a91a981f185721542a0b7c92e9054b7ab4fea0508a795846bc5b0abf8118d4"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:67b8cc9574bb518ec76dc8e705d4c39ae78bb96237cb533edac149352c1f39fe"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac71b2977fb90c35d41c9453116e283fac47bb9096ad917b8819ca8b943abecd"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3ae38d325b512f63f8da31f826e6cb6c367336f95e418137286ba362925c877e"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542da1178c1c6af8873e143910e2269add130a299c9106eef2594e15dae5e482"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a85aed0b864ac88309b7d94be09f6046c834ef60762a8833b660139cfbad13"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aae32c93e0f64469f74ccc730a7cb21c7610af3a775157e50bbd38f816536b38"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b26ddf78d57f1d143bdf32e820fd8935d36abe8a25eb9ec0b5a71c82eb3895"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f5d10bae5d78e4551b7be7a9b29643a95aded9d0f602aa2ba584f0388e7a557"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:249c6470a2b60935bafd1d1d13cd613f8cd8388d53461c67397ee6a0f5dce741"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c5a74c359b2d47d26cdbbc7845e9662d6b08a1e915eb015d044729e92e7050b7"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:b5bcf60a228acae568e9911f410f9d9e0d43197d030ae5799e20dca8df588287"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:187d18082694a29005ba2944c882344b6748d5be69e3a89bf3cc9d878e548d5a"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:81bf654678e575403736b85ba3a7867e31c2c30a69bc57fe88e3ace52fb17b89"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-win32.whl", hash = "sha256:85a32721ddde63c9df9ebb0d2045b9691d9750cb139c161c80e500d210f5e26e"}, - {file = "charset_normalizer-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:468d2a840567b13a590e67dd276c570f8de00ed767ecc611994c301d0f8c014f"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e0fc42822278451bc13a2e8626cf2218ba570f27856b536e00cfa53099724828"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09c77f964f351a7369cc343911e0df63e762e42bac24cd7d18525961c81754f4"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12ebea541c44fdc88ccb794a13fe861cc5e35d64ed689513a5c03d05b53b7c82"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805dfea4ca10411a5296bcc75638017215a93ffb584c9e344731eef0dcfb026a"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96c2b49eb6a72c0e4991d62406e365d87067ca14c1a729a870d22354e6f68115"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf7b34c5bc56b38c931a54f7952f1ff0ae77a2e82496583b247f7c969eb1479"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:619d1c96099be5823db34fe89e2582b336b5b074a7f47f819d6b3a57ff7bdb86"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ac5e7015a5920cfce654c06618ec40c33e12801711da6b4258af59a8eff00a"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93aa7eef6ee71c629b51ef873991d6911b906d7312c6e8e99790c0f33c576f89"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7966951325782121e67c81299a031f4c115615e68046f79b85856b86ebffc4cd"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:02673e456dc5ab13659f85196c534dc596d4ef260e4d86e856c3b2773ce09843"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c2af80fb58f0f24b3f3adcb9148e6203fa67dd3f61c4af146ecad033024dde43"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:153e7b6e724761741e0974fc4dcd406d35ba70b92bfe3fedcb497226c93b9da7"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-win32.whl", hash = "sha256:d47ecf253780c90ee181d4d871cd655a789da937454045b17b5798da9393901a"}, - {file = "charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d97d85fa63f315a8bdaba2af9a6a686e0eceab77b3089af45133252618e70884"}, - {file = "charset_normalizer-3.3.0-py3-none-any.whl", hash = "sha256:e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2"}, -] -chromadb = [ - {file = "chromadb-0.3.29-py3-none-any.whl", hash = "sha256:d681a3e4f3284715dd146774be84cad3d2f8c529bd004ba249e1d3deb70ac68e"}, - {file = "chromadb-0.3.29.tar.gz", hash = "sha256:29d47835da494fc1b58da40abb1435689d4ba1c93df6c64664a5d91521cb80e9"}, -] -click = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] -clickhouse-connect = [ - {file = "clickhouse-connect-0.6.14.tar.gz", hash = "sha256:0531bbd5b8bdee616bf1cca5ddcb0af86db12e2b48fd39257a8ecdf32200bd57"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04affbd255fb8b1e4a882ddc1336c86530976d05578f47bb65e3a53471d291e4"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5bd61f2665f1890fa632b1181df2900ea838cf152cd9a3f775841ea2deab680"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79897a0987008993f32737e17045a5c1982f9193f7511a3832a7ba3429cbf6b4"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa95c8a96bdff593924407b074d616ee8a1bfb989579c17b330c6f3b27febfe3"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501c0d843be30c86719b61089fb1de6298ac44b3670594f0a1cb0dc3ad97651e"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1ec9672c9ed9d5e62f66ac14d6470b9b6be9946d6d24ddac87376437863b8f59"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:92173354a6c7c5862fab09dab338197b86a192e0c117137e899e8cf92cc3b5b7"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:757b4c05ebf10bdcb916334c3021ee571a61238907cdeee8c54bcf0550cd0d19"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-win32.whl", hash = "sha256:2e74badf6c7569e1a0ad32f3be250a3ebf28a9df3b15c9709104e5f050486016"}, - {file = "clickhouse_connect-0.6.14-cp310-cp310-win_amd64.whl", hash = "sha256:7b56c422467df5a0b2790e0943b747639f1f172fac7f8d9585adb3302c961fb1"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d2aa6d28d79eb5ca94d7c756ec4dc599d2354897f5ef40fd0d8bdc579a81dd94"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:70cd5b2e0d80dc030355d09db213c73caa78ef259f2b04ce30c1c8cb513bf45b"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:826c85e37555443af945a0d977598814ba7cb09447b0cdd167eae57dfd3f0724"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cdb1f843d134a1e30828900bc51c9c1b4f4e638aac693767685e512fb095af5"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a8ea6ca6e0d6b1af50078413e280f271559c462a8644541002e44c2cb5c371"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8b72a5e5d54069dff419a6ec9bbc7f3896fe558551cae6a2b2cba60eaa0607a3"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c531ed454ca9b6d85e739de3770a82eec2087ed2cb9660fb8ff0e62f7f1446cc"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ae6ebf7c507f9d0fece9d1e387c9eec77762693f91647bca18f588cf1d594d24"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-win32.whl", hash = "sha256:cf1e3067c2da8525b6f59a37f8e13cd6c4524f439be8fd7d8fa03f007f96c863"}, - {file = "clickhouse_connect-0.6.14-cp311-cp311-win_amd64.whl", hash = "sha256:15a040210877cc34155943c7870bf78247d4d4fa3bd4e0595ca22e97760679b7"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eb91e4ee0435088fc8bd36de51a93ff9286a514d82ac373b57b2d6cad4655d77"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48108bb1cfe99b6ff60344838859aec1315213dfa618f6ca4b92c0c6e5ae8d41"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c75d4bd8ef0b90f9e89ea70c16ff099278e4bb8f1e045008376ac34c6122b73d"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:897f40eda84e9c45d0bdaf3a9e638e614e236a4a5eeab5cddd920857f9f8f22a"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ecc88656df05ae49e70062aee7022982eec3f87fb14db97c25276fef6633d7c"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:76cec48265774ae3fa61a77b290dcc8385aad4312a8d7dfcaffb9fc00f79458e"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:dba280e00ec4cfe0e4d69f88baa9a0491bc1ed83ec57336e5197adae8d42d0c9"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-win32.whl", hash = "sha256:6c77f537e04747702e009c05f4a7f6f96cbe1696bb89d29f72e39e7370924836"}, - {file = "clickhouse_connect-0.6.14-cp37-cp37m-win_amd64.whl", hash = "sha256:d0eceaff68a53f71384bb9aee7fc1630f68ac10538727c8516ae0af1103f2580"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dfa09948caeed539cdd019a1e341a379a1dcacdd755b278d12484b4a703afa3"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a30d99cb1fd57b8fed4449632e51d48386d0eec1673f905572c5fc7059215c20"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e88de4fe66ae2b1c15726760cc87a703e4d1162de52a19c8d8b57a4429f08e"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03d721de610beae823068665d6c604a5f390a99e7b2354264b17136a3a520b13"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a627762f2f692e226b3cb574a83133844213c6507c6313d3fefd8a3de08e5798"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62a596f8d9db8592477a2032c329be7449ea32d133cdc4e5d6f804e251b8617a"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e8ab9e5a61968c328a0fdc254b02b96142ebb4ec2bc1623f9498538f0ebfdc7c"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6c21fe379b1b8822eb9644600e38220b5c4b530fd0f2b1da824a0918120a8f01"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-win32.whl", hash = "sha256:2a17b336781d3fbb67ed556918c17e63c7d462709aa6a953bb3410ddb67fd7f4"}, - {file = "clickhouse_connect-0.6.14-cp38-cp38-win_amd64.whl", hash = "sha256:838a008c0f7d911ab81f741ea27a64ef7bdcc2508698b70f018987dfc742ffa9"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:083649a97c3f366f66f0f2578b9f88d86c1d3a40b9015c9403db524fda36a952"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e9bd6849852b2c55e51a477e10bc8b61990c5f37f31cce5ea6fc970b447b5af"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9152c45423f488cf6229bce1f9e695cd81e7ffcd3ae0f1e40e5e62079b18d4a5"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341e068d4a6a423ed22fb3b96cfe16be0d6305943c3fb1cc48251b7d9729931d"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ead7acb841524bd7a73b1f10592a36e901d63bc89af3270ab76b89a11d44fe20"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8bce432f72dcf6679c2d0bac4e3a82a126389ad7951d316f213109cee6925c7c"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1f403499f169574cafb05888dfdaf18065cc49ff1321e5e108c504c8c220e172"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3189fcd339bfd7ae4e703ff40b110b9740d6b1ec8385ed8bd1547663fd046578"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-win32.whl", hash = "sha256:a30de3f0997a9157e840c2d4e07fd9c6fc6e359f1ff9f3a46386b5abdca73c1a"}, - {file = "clickhouse_connect-0.6.14-cp39-cp39-win_amd64.whl", hash = "sha256:c3476a95780374e94dfba2a28093d15f8370bfa6f4cb46a02e0af8813e5f7368"}, - {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:22affe46983e67e3923e9330336d21e9ec4b4812b6fbeb1865514145b3870170"}, - {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62727090af8875631115911f58442967386b31cd4efa93c951c2aa7e57d1ce4b"}, - {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee4ea5ac58de0580f2e12b46cfd2f8d13c1e690378bf9775bfed0c935232de71"}, - {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a126fe486dd02fa5f8adb0b9d8fd0fc701fb73b2275e1040ed210afadd189f90"}, - {file = "clickhouse_connect-0.6.14-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:de6bf773c8776033ca5fb5a6a376729ae69afdd0b19a71d1460d1a221fc5a627"}, - {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d27d2c9698d1acb550ac8c30c4d9440c7d826a16444e4aea4dacf11ed7ec8988"}, - {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f57efbe536dfbfb7e10dd16ced6fe02441fb174450760f0b29b2b60d23c6462f"}, - {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c134483da38a3d3e38c44da9f3d519d73e177998052d36129e21863c7a3497ee"}, - {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2d6ae7ccb4ca3d310c2971ead9839935890e40da8602dcc92ecda9bbbb24366"}, - {file = "clickhouse_connect-0.6.14-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:0acf6b69b11b757d60545b0ccac3df4980f69351994e30074df84729bb5af5d1"}, - {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e043b3b606749d23eca7601a1a44f188c6f117ae57a2852c66c21f11b7296fe4"}, - {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a887dfef3f3914454c7d7a428db8063b1678c66678cbabcd6368f0b67876f1"}, - {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e255e7c9c38fb9bceefc659374d04914ef2222a6f121fccf86a865b81110e96b"}, - {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2be9a6ba1d3055bb6956be218ffecfa3bfbe47121dfa34467815aa883f15d159"}, - {file = "clickhouse_connect-0.6.14-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59faa034fdd58c1e7c8b2f4a033e9c611a0c58e193339cdd62d9d91a62f11195"}, -] -colorama = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] -coloredlogs = [ - {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, - {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, -] -confluent-kafka = [ - {file = "confluent-kafka-2.3.0.tar.gz", hash = "sha256:4069e7b56e0baf9db18c053a605213f0ab2d8f23715dca7b3bd97108df446ced"}, - {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5df845755cd3ebb9165ca00fd1d3a7d514c61e84d9fcbe7babb91193fe9b369c"}, - {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ab2217875b731bd390582952e0f9cbe3e7b34774490f01afca70728f0d8b469"}, - {file = "confluent_kafka-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62046e8a75c7a6883a0f1f4a635573fd7e1665eeacace65e7f6d59cbaa94697d"}, - {file = "confluent_kafka-2.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1eba38061e9ed1c0a369c129bf01d07499286cc3cb295398b88a7037c14371fb"}, - {file = "confluent_kafka-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a6abece28598fa2b59d2b9399fcec03440aaa73fd207fdad048a6030d7e897e1"}, - {file = "confluent_kafka-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d55fbdcd75586dd17fe3fe64f4b4efa1c93ce9dd09c275de46f75772826e8860"}, - {file = "confluent_kafka-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec17b26d6155feeaded4a435ba949095aea9699afb65309d8f22e55722f53c48"}, - {file = "confluent_kafka-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9b42bf1b75fdd9aa20c77b27f166f6289440ac649f70622a0117a8e7aa6169d"}, - {file = "confluent_kafka-2.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7f9f4099aaf2c5daae828d2f356e4277d0ef0485ec883dbe395f0c0e054450d0"}, - {file = "confluent_kafka-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1c6b29d57df99dabd45e67fd0aa46f17f195b057734ad84cf9cfdc2542855c10"}, - {file = "confluent_kafka-2.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b46ce75bda0c092da103dbd55cb0ba429c73c232e70b476b19a0ab247ec9057"}, - {file = "confluent_kafka-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af60af786a7b8cbeafea51a9416664b96b0f5ef6243172b0bc59e5f75e8bd86a"}, - {file = "confluent_kafka-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08b601e09a584c6a4a8c323a71e92fca31a8826ed33b5b95b26783b7a996026"}, - {file = "confluent_kafka-2.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:7fd1ab257d4fa0e2a98529e4eb2102cf8352ad6b3d22110d6cf0bb1f598893d9"}, - {file = "confluent_kafka-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1ccf6483d86535627cad7b94982ea95d9fa9ae04ddb552e097c1211ffcde5ea7"}, - {file = "confluent_kafka-2.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:030fb237927ec2296882a9bb96237ebf86e48388166b15ec0bbf3fdeb48df81a"}, - {file = "confluent_kafka-2.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc24c57a52c206648685e1c536afb8447d1cbbbf3871cacebccf2e5b67bdf535"}, - {file = "confluent_kafka-2.3.0-cp36-cp36m-manylinux_2_28_aarch64.whl", hash = "sha256:25292a9a8ef7765c85636851d6c4d5e5e98d6ead627b59637b24a5779e8a4b02"}, - {file = "confluent_kafka-2.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d634d4d9914b0a28ec3e37ab7b150173aa34c81fd5bd0b4dcac972b520ad56cc"}, - {file = "confluent_kafka-2.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ebf460d90478bcd1b4564023a5b081c6e5390b28dbabbb17ee664e223830465d"}, - {file = "confluent_kafka-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cec97f8c6564b16504d30fe42c22fd4a86c406dbcd45c337b93c21e876e20628"}, - {file = "confluent_kafka-2.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:128ddb28c19ab57c18c0e3d8209d089b6b90ff111b20108764f6798468432693"}, - {file = "confluent_kafka-2.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0470dc5e56e639693149961409bc6b663df94d68ceae296ae9c42e079fe65d00"}, - {file = "confluent_kafka-2.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b539064fef35386936a0d2dadf8a82b8b0ae325af95d9263a2431b82671c4702"}, - {file = "confluent_kafka-2.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f9998f781a1da0c9dcb5506792a39799cb54e28c6f986ddc73e362887042f7c"}, - {file = "confluent_kafka-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f175e11facaf12130abd5d2d471db39d7cc89126c4d991527cf14e3da22c635c"}, - {file = "confluent_kafka-2.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f9842720ed0debcf4620710e01d356681a4812441f1ff49664fc205d1f9120e5"}, - {file = "confluent_kafka-2.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:cf015e547b82a74a87d7363d0d42e4cd0ca23b01cdb479639a340f385581ea04"}, - {file = "confluent_kafka-2.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5c740ead14a2510e15f63e67b19d48ae48a7f30ef4823d5af125bad528033d1"}, - {file = "confluent_kafka-2.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6ae5e6a6dcd5ce85b9153c21c9f0b83e0cc88a5955b5334079db76c2267deb63"}, - {file = "confluent_kafka-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca36a8d1d49fd55cca1b7ec3090ca2684a933e63f196f0e3e506194b189fc31e"}, - {file = "confluent_kafka-2.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:210f2d346d1006e9b95c5204f7255735d4cb5ec962a3d1a68ac60c02e2763ae4"}, - {file = "confluent_kafka-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb279e369121e07ccb419220fc039127345a9e5f72f4abf7dda0e2e06a12b604"}, -] -cryptography = [ - {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, - {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, - {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"}, - {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"}, - {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"}, - {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"}, - {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"}, - {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"}, - {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"}, - {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"}, - {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"}, - {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"}, - {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"}, - {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"}, - {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"}, - {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"}, - {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"}, - {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"}, - {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"}, - {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"}, - {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"}, - {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"}, - {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"}, -] -curlify = [ - {file = "curlify-2.2.1.tar.gz", hash = "sha256:0d3f02e7235faf952de8ef45ef469845196d30632d5838bcd5aee217726ddd6d"}, -] -dataclasses-json = [ - {file = "dataclasses-json-0.5.9.tar.gz", hash = "sha256:e9ac87b73edc0141aafbce02b44e93553c3123ad574958f0fe52a534b6707e8e"}, - {file = "dataclasses_json-0.5.9-py3-none-any.whl", hash = "sha256:1280542631df1c375b7bc92e5b86d39e06c44760d7e3571a537b3b8acabf2f0c"}, -] -decorator = [ - {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, - {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, -] -dlt = [ - {file = "dlt-0.4.4-py3-none-any.whl", hash = "sha256:dfa1d0fd1ba5e2741f0d58314ca56aad26ec25032039bc3fa5d873d4611d8568"}, - {file = "dlt-0.4.4.tar.gz", hash = "sha256:9a9619f78fe06cc157a23179b4fb17a059606e8c980756ea0652b167b91356fa"}, -] -dnspython = [ - {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, - {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, -] -domdf-python-tools = [ - {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, - {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, -] -duckdb = [ - {file = "duckdb-0.8.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:14781d21580ee72aba1f5dcae7734674c9b6c078dd60470a08b2b420d15b996d"}, - {file = "duckdb-0.8.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f13bf7ab0e56ddd2014ef762ae4ee5ea4df5a69545ce1191b8d7df8118ba3167"}, - {file = "duckdb-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4032042d8363e55365bbca3faafc6dc336ed2aad088f10ae1a534ebc5bcc181"}, - {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a71bd8f0b0ca77c27fa89b99349ef22599ffefe1e7684ae2e1aa2904a08684"}, - {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24568d6e48f3dbbf4a933109e323507a46b9399ed24c5d4388c4987ddc694fd0"}, - {file = "duckdb-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297226c0dadaa07f7c5ae7cbdb9adba9567db7b16693dbd1b406b739ce0d7924"}, - {file = "duckdb-0.8.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5792cf777ece2c0591194006b4d3e531f720186102492872cb32ddb9363919cf"}, - {file = "duckdb-0.8.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:12803f9f41582b68921d6b21f95ba7a51e1d8f36832b7d8006186f58c3d1b344"}, - {file = "duckdb-0.8.1-cp310-cp310-win32.whl", hash = "sha256:d0953d5a2355ddc49095e7aef1392b7f59c5be5cec8cdc98b9d9dc1f01e7ce2b"}, - {file = "duckdb-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:6e6583c98a7d6637e83bcadfbd86e1f183917ea539f23b6b41178f32f813a5eb"}, - {file = "duckdb-0.8.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fad7ed0d4415f633d955ac24717fa13a500012b600751d4edb050b75fb940c25"}, - {file = "duckdb-0.8.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81ae602f34d38d9c48dd60f94b89f28df3ef346830978441b83c5b4eae131d08"}, - {file = "duckdb-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d75cfe563aaa058d3b4ccaaa371c6271e00e3070df5de72361fd161b2fe6780"}, - {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbb55e7a3336f2462e5e916fc128c47fe1c03b6208d6bd413ac11ed95132aa0"}, - {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6df53efd63b6fdf04657385a791a4e3c4fb94bfd5db181c4843e2c46b04fef5"}, - {file = "duckdb-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b188b80b70d1159b17c9baaf541c1799c1ce8b2af4add179a9eed8e2616be96"}, - {file = "duckdb-0.8.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5ad481ee353f31250b45d64b4a104e53b21415577943aa8f84d0af266dc9af85"}, - {file = "duckdb-0.8.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1d1b1729993611b1892509d21c21628917625cdbe824a61ce891baadf684b32"}, - {file = "duckdb-0.8.1-cp311-cp311-win32.whl", hash = "sha256:2d8f9cc301e8455a4f89aa1088b8a2d628f0c1f158d4cf9bc78971ed88d82eea"}, - {file = "duckdb-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:07457a43605223f62d93d2a5a66b3f97731f79bbbe81fdd5b79954306122f612"}, - {file = "duckdb-0.8.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d2c8062c3e978dbcd80d712ca3e307de8a06bd4f343aa457d7dd7294692a3842"}, - {file = "duckdb-0.8.1-cp36-cp36m-win32.whl", hash = "sha256:fad486c65ae944eae2de0d590a0a4fb91a9893df98411d66cab03359f9cba39b"}, - {file = "duckdb-0.8.1-cp36-cp36m-win_amd64.whl", hash = "sha256:86fa4506622c52d2df93089c8e7075f1c4d0ba56f4bf27faebde8725355edf32"}, - {file = "duckdb-0.8.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:60e07a62782f88420046e30cc0e3de842d0901c4fd5b8e4d28b73826ec0c3f5e"}, - {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f18563675977f8cbf03748efee0165b4c8ef64e0cbe48366f78e2914d82138bb"}, - {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16e179443832bea8439ae4dff93cf1e42c545144ead7a4ef5f473e373eea925a"}, - {file = "duckdb-0.8.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a413d5267cb41a1afe69d30dd6d4842c588256a6fed7554c7e07dad251ede095"}, - {file = "duckdb-0.8.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3784680df59eadd683b0a4c2375d451a64470ca54bd171c01e36951962b1d332"}, - {file = "duckdb-0.8.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:67a1725c2b01f9b53571ecf3f92959b652f60156c1c48fb35798302e39b3c1a2"}, - {file = "duckdb-0.8.1-cp37-cp37m-win32.whl", hash = "sha256:197d37e2588c5ad063e79819054eedb7550d43bf1a557d03ba8f8f67f71acc42"}, - {file = "duckdb-0.8.1-cp37-cp37m-win_amd64.whl", hash = "sha256:3843feb79edf100800f5037c32d5d5a5474fb94b32ace66c707b96605e7c16b2"}, - {file = "duckdb-0.8.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:624c889b0f2d656794757b3cc4fc58030d5e285f5ad2ef9fba1ea34a01dab7fb"}, - {file = "duckdb-0.8.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fcbe3742d77eb5add2d617d487266d825e663270ef90253366137a47eaab9448"}, - {file = "duckdb-0.8.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47516c9299d09e9dbba097b9fb339b389313c4941da5c54109df01df0f05e78c"}, - {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf1ba718b7522d34399446ebd5d4b9fcac0b56b6ac07bfebf618fd190ec37c1d"}, - {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e36e35d38a9ae798fe8cf6a839e81494d5b634af89f4ec9483f4d0a313fc6bdb"}, - {file = "duckdb-0.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23493313f88ce6e708a512daacad13e83e6d1ea0be204b175df1348f7fc78671"}, - {file = "duckdb-0.8.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1fb9bf0b6f63616c8a4b9a6a32789045e98c108df100e6bac783dc1e36073737"}, - {file = "duckdb-0.8.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:12fc13ecd5eddd28b203b9e3999040d3a7374a8f4b833b04bd26b8c5685c2635"}, - {file = "duckdb-0.8.1-cp38-cp38-win32.whl", hash = "sha256:a12bf4b18306c9cb2c9ba50520317e6cf2de861f121d6f0678505fa83468c627"}, - {file = "duckdb-0.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:e4e809358b9559c00caac4233e0e2014f3f55cd753a31c4bcbbd1b55ad0d35e4"}, - {file = "duckdb-0.8.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7acedfc00d97fbdb8c3d120418c41ef3cb86ef59367f3a9a30dff24470d38680"}, - {file = "duckdb-0.8.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:99bfe264059cdc1e318769103f656f98e819cd4e231cd76c1d1a0327f3e5cef8"}, - {file = "duckdb-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:538b225f361066231bc6cd66c04a5561de3eea56115a5dd773e99e5d47eb1b89"}, - {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae0be3f71a18cd8492d05d0fc1bc67d01d5a9457b04822d025b0fc8ee6efe32e"}, - {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd82ba63b58672e46c8ec60bc9946aa4dd7b77f21c1ba09633d8847ad9eb0d7b"}, - {file = "duckdb-0.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:780a34559aaec8354e83aa4b7b31b3555f1b2cf75728bf5ce11b89a950f5cdd9"}, - {file = "duckdb-0.8.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:01f0d4e9f7103523672bda8d3f77f440b3e0155dd3b2f24997bc0c77f8deb460"}, - {file = "duckdb-0.8.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:31f692decb98c2d57891da27180201d9e93bb470a3051fcf413e8da65bca37a5"}, - {file = "duckdb-0.8.1-cp39-cp39-win32.whl", hash = "sha256:e7fe93449cd309bbc67d1bf6f6392a6118e94a9a4479ab8a80518742e855370a"}, - {file = "duckdb-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:81d670bc6807672f038332d9bf587037aabdd741b0810de191984325ed307abd"}, - {file = "duckdb-0.8.1.tar.gz", hash = "sha256:a54d37f4abc2afc4f92314aaa56ecf215a411f40af4bffe1e86bd25e62aceee9"}, -] -et-xmlfile = [ - {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, - {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, -] -exceptiongroup = [ - {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, - {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, -] -facebook-business = [ - {file = "facebook_business-17.0.4-py3-none-any.whl", hash = "sha256:c3a4afbe019c1fd2454eeeefb4e895ed3276d506115fbf9a993135f6af1c1a88"}, - {file = "facebook_business-17.0.4.tar.gz", hash = "sha256:52b516a237ab4cbf083053d3cc062995ff4732fca487b46543c4eab3bdbbf188"}, -] -fastapi = [ - {file = "fastapi-0.85.1-py3-none-any.whl", hash = "sha256:de3166b6b1163dc22da4dc4ebdc3192fcbac7700dd1870a1afa44de636a636b5"}, - {file = "fastapi-0.85.1.tar.gz", hash = "sha256:1facd097189682a4ff11cbd01334a992e51b56be663b2bd50c2c09523624f144"}, -] -filelock = [ - {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, - {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, -] -filetype = [ - {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, - {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, -] -flake8 = [ - {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, - {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, -] -flake8-bugbear = [ - {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, - {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, -] -flake8-builtins = [ - {file = "flake8-builtins-2.1.0.tar.gz", hash = "sha256:12ff1ee96dd4e1f3141141ee6c45a5c7d3b3c440d0949e9b8d345c42b39c51d4"}, - {file = "flake8_builtins-2.1.0-py3-none-any.whl", hash = "sha256:469e8f03d6d0edf4b1e62b6d5a97dce4598592c8a13ec8f0952e7a185eba50a1"}, -] -flake8-encodings = [ - {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, - {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, -] -flake8-helper = [ - {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, - {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, -] -flake8-tidy-imports = [ - {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, - {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, -] -flatbuffers = [ +files = [ {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, ] -frozenlist = [ + +[[package]] +name = "frozenlist" +version = "1.4.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, @@ -3668,63 +1409,313 @@ frozenlist = [ {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, ] -fsspec = [ + +[[package]] +name = "fsspec" +version = "2023.9.2" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ {file = "fsspec-2023.9.2-py3-none-any.whl", hash = "sha256:603dbc52c75b84da501b9b2ec8c11e1f61c25984c4a0dda1f129ef391fbfc9b4"}, {file = "fsspec-2023.9.2.tar.gz", hash = "sha256:80bfb8c70cc27b2178cc62a935ecf242fc6e8c3fb801f9c571fc01b1e715ba7d"}, ] -gcsfs = [ + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "gcsfs" +version = "2023.9.2" +description = "Convenient Filesystem interface over GCS" +optional = false +python-versions = ">=3.8" +files = [ {file = "gcsfs-2023.9.2-py2.py3-none-any.whl", hash = "sha256:b3e61d07b0ecf3e04627b0cc0df30ee728bc49e31d42de180815601041e62c1b"}, {file = "gcsfs-2023.9.2.tar.gz", hash = "sha256:7ca430816fa99b3df428506b557f08dbafab563a048393747507d0809fa4576b"}, ] -gitdb = [ + +[package.dependencies] +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" +decorator = ">4.1.2" +fsspec = "2023.9.2" +google-auth = ">=1.2" +google-auth-oauthlib = "*" +google-cloud-storage = "*" +requests = "*" + +[package.extras] +crc = ["crcmod"] +gcsfuse = ["fusepy"] + +[[package]] +name = "gitdb" +version = "4.0.10" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +files = [ {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, ] -gitpython = [ + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.37" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +files = [ {file = "GitPython-3.1.37-py3-none-any.whl", hash = "sha256:5f4c4187de49616d710a77e98ddf17b4782060a1788df441846bddefbb89ab33"}, {file = "GitPython-3.1.37.tar.gz", hash = "sha256:f9b9ddc0761c125d5780eab2d64be4873fc6817c2899cbcb34b02344bdc7bc54"}, ] -giturlparse = [ + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-sugar"] + +[[package]] +name = "giturlparse" +version = "0.12.0" +description = "A Git URL parsing module (supports parsing and rewriting)" +optional = false +python-versions = ">=3.8" +files = [ {file = "giturlparse-0.12.0-py2.py3-none-any.whl", hash = "sha256:412b74f2855f1da2fefa89fd8dde62df48476077a72fc19b62039554d27360eb"}, {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"}, ] -google-analytics-data = [ + +[[package]] +name = "google-analytics-data" +version = "0.16.3" +description = "Google Analytics Data API client library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-analytics-data-0.16.3.tar.gz", hash = "sha256:f29431ec63ab462f7a9b42227521d148c877307c629e308c284025ad834aab52"}, {file = "google_analytics_data-0.16.3-py2.py3-none-any.whl", hash = "sha256:bb73f36707a5a2966e87c9439c25cd8004d58305b0ef01c6f2f50128c08feb13"}, ] -google-api-core = [ + +[package.dependencies] +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +proto-plus = [ + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, +] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" + +[[package]] +name = "google-api-core" +version = "2.12.0" +description = "Google API client core library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-api-core-2.12.0.tar.gz", hash = "sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553"}, {file = "google_api_core-2.12.0-py3-none-any.whl", hash = "sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160"}, ] -google-api-python-client = [ + +[package.dependencies] +google-auth = ">=2.14.1,<3.0.dev0" +googleapis-common-protos = ">=1.56.2,<2.0.dev0" +grpcio = [ + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] +grpcio-status = [ + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +requests = ">=2.18.0,<3.0.0.dev0" + +[package.extras] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] + +[[package]] +name = "google-api-python-client" +version = "2.103.0" +description = "Google API Client Library for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-api-python-client-2.103.0.tar.gz", hash = "sha256:5b48dc23913b9a1b447991add03f27c335831559b5a870c522316eae671caf44"}, {file = "google_api_python_client-2.103.0-py2.py3-none-any.whl", hash = "sha256:5d6cf80cc34598a85b73e7e689e6eb1ba34f342095aeab9ec408f94521382a7c"}, ] -google-auth = [ + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" +google-auth = ">=1.19.0,<3.0.0.dev0" +google-auth-httplib2 = ">=0.1.0" +httplib2 = ">=0.15.0,<1.dev0" +uritemplate = ">=3.0.1,<5" + +[[package]] +name = "google-auth" +version = "2.23.3" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-auth-2.23.3.tar.gz", hash = "sha256:6864247895eea5d13b9c57c9e03abb49cb94ce2dc7c58e91cba3248c7477c9e3"}, {file = "google_auth-2.23.3-py2.py3-none-any.whl", hash = "sha256:a8f4608e65c244ead9e0538f181a96c6e11199ec114d41f1d7b1bffa96937bda"}, ] -google-auth-httplib2 = [ + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] + +[[package]] +name = "google-auth-httplib2" +version = "0.1.1" +description = "Google Authentication Library: httplib2 transport" +optional = false +python-versions = "*" +files = [ {file = "google-auth-httplib2-0.1.1.tar.gz", hash = "sha256:c64bc555fdc6dd788ea62ecf7bccffcf497bf77244887a3f3d7a5a02f8e3fc29"}, {file = "google_auth_httplib2-0.1.1-py2.py3-none-any.whl", hash = "sha256:42c50900b8e4dcdf8222364d1f0efe32b8421fb6ed72f2613f12f75cc933478c"}, ] -google-auth-oauthlib = [ + +[package.dependencies] +google-auth = "*" +httplib2 = ">=0.19.0" + +[[package]] +name = "google-auth-oauthlib" +version = "1.1.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.6" +files = [ {file = "google-auth-oauthlib-1.1.0.tar.gz", hash = "sha256:83ea8c3b0881e453790baff4448e8a6112ac8778d1de9da0b68010b843937afb"}, {file = "google_auth_oauthlib-1.1.0-py2.py3-none-any.whl", hash = "sha256:089c6e587d36f4803ac7e0720c045c6a8b1fd1790088b8424975b90d0ee61c12"}, ] -google-cloud-bigquery = [ + +[package.dependencies] +google-auth = ">=2.15.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]] +name = "google-cloud-bigquery" +version = "3.12.0" +description = "Google BigQuery API client library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-cloud-bigquery-3.12.0.tar.gz", hash = "sha256:1af93c5c28a18b13190bce479b793aaae56c0ecf7e1da73b4ba5798eca054a07"}, {file = "google_cloud_bigquery-3.12.0-py2.py3-none-any.whl", hash = "sha256:e68851addfe0394ab7662bd58aa0e5a4527e7156568cc0410129ccb8a460a009"}, ] -google-cloud-core = [ + +[package.dependencies] +google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-cloud-core = ">=1.6.0,<3.0.0dev" +google-resumable-media = ">=0.6.0,<3.0dev" +grpcio = [ + {version = ">=1.47.0,<2.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.49.1,<2.0dev", markers = "python_version >= \"3.11\""}, +] +packaging = ">=20.0.0" +proto-plus = ">=1.15.0,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +python-dateutil = ">=2.7.2,<3.0dev" +requests = ">=2.21.0,<3.0.0dev" + +[package.extras] +all = ["Shapely (>=1.8.4,<2.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +geopandas = ["Shapely (>=1.8.4,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)"] +ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] +ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] +opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] +tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] + +[[package]] +name = "google-cloud-core" +version = "2.3.3" +description = "Google Cloud API client core library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, ] -google-cloud-storage = [ + +[package.dependencies] +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0dev" + +[package.extras] +grpc = ["grpcio (>=1.38.0,<2.0dev)"] + +[[package]] +name = "google-cloud-storage" +version = "2.12.0" +description = "Google Cloud Storage API client library" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-cloud-storage-2.12.0.tar.gz", hash = "sha256:57c0bcda2f5e11f008a155d8636d8381d5abab46b58e0cae0e46dd5e595e6b46"}, {file = "google_cloud_storage-2.12.0-py2.py3-none-any.whl", hash = "sha256:bc52563439d42981b6e21b071a76da2791672776eda3ba99d13a8061ebbd6e5e"}, ] -google-crc32c = [ + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=2.23.3,<3.0dev" +google-cloud-core = ">=2.3.0,<3.0dev" +google-crc32c = ">=1.0,<2.0dev" +google-resumable-media = ">=2.6.0" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +protobuf = ["protobuf (<5.0.0dev)"] + +[[package]] +name = "google-crc32c" +version = "1.5.0" +description = "A python wrapper of the C library 'Google CRC32C'" +optional = false +python-versions = ">=3.7" +files = [ {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, @@ -3794,24 +1785,69 @@ google-crc32c = [ {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, ] -google-resumable-media = [ + +[package.extras] +testing = ["pytest"] + +[[package]] +name = "google-resumable-media" +version = "2.6.0" +description = "Utilities for Google Media Downloads and Resumable Uploads" +optional = false +python-versions = ">= 3.7" +files = [ {file = "google-resumable-media-2.6.0.tar.gz", hash = "sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7"}, {file = "google_resumable_media-2.6.0-py2.py3-none-any.whl", hash = "sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b"}, ] -googleapis-common-protos = [ + +[package.dependencies] +google-crc32c = ">=1.0,<2.0dev" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] +requests = ["requests (>=2.18.0,<3.0.0dev)"] + +[[package]] +name = "googleapis-common-protos" +version = "1.61.0" +description = "Common protobufs used in Google APIs" +optional = false +python-versions = ">=3.7" +files = [ {file = "googleapis-common-protos-1.61.0.tar.gz", hash = "sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b"}, {file = "googleapis_common_protos-1.61.0-py2.py3-none-any.whl", hash = "sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0"}, ] -graphlib-backport = [ + +[package.dependencies] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" + +[package.extras] +grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] + +[[package]] +name = "graphlib-backport" +version = "1.0.3" +description = "Backport of the Python 3.9 graphlib module for Python 3.6+" +optional = false +python-versions = ">=3.6,<4.0" +files = [ {file = "graphlib_backport-1.0.3-py3-none-any.whl", hash = "sha256:24246967b9e7e6a91550bc770e6169585d35aa32790258579a8a3899a8c18fde"}, {file = "graphlib_backport-1.0.3.tar.gz", hash = "sha256:7bb8fc7757b8ae4e6d8000a26cd49e9232aaa9a3aa57edb478474b8424bfaae2"}, ] -greenlet = [ + +[[package]] +name = "greenlet" +version = "2.0.2" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +files = [ {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -3820,6 +1856,7 @@ greenlet = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -3849,6 +1886,7 @@ greenlet = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -3857,6 +1895,7 @@ greenlet = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -3868,7 +1907,18 @@ greenlet = [ {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, ] -grpcio = [ + +[package.extras] +docs = ["Sphinx", "docutils (<0.18)"] +test = ["objgraph", "psutil"] + +[[package]] +name = "grpcio" +version = "1.59.0" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.7" +files = [ {file = "grpcio-1.59.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:225e5fa61c35eeaebb4e7491cd2d768cd8eb6ed00f2664fa83a58f29418b39fd"}, {file = "grpcio-1.59.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b95ec8ecc4f703f5caaa8d96e93e40c7f589bad299a2617bdb8becbcce525539"}, {file = "grpcio-1.59.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:1a839ba86764cc48226f50b924216000c79779c563a301586a107bda9cbe9dcf"}, @@ -3924,26 +1974,88 @@ grpcio = [ {file = "grpcio-1.59.0-cp39-cp39-win_amd64.whl", hash = "sha256:38823bd088c69f59966f594d087d3a929d1ef310506bee9e3648317660d65b81"}, {file = "grpcio-1.59.0.tar.gz", hash = "sha256:acf70a63cf09dd494000007b798aff88a436e1c03b394995ce450be437b8e54f"}, ] -grpcio-status = [ + +[package.extras] +protobuf = ["grpcio-tools (>=1.59.0)"] + +[[package]] +name = "grpcio-status" +version = "1.59.0" +description = "Status proto mapping for gRPC" +optional = false +python-versions = ">=3.6" +files = [ {file = "grpcio-status-1.59.0.tar.gz", hash = "sha256:f93b9c33e0a26162ef8431bfcffcc3e1fb217ccd8d7b5b3061b6e9f813e698b5"}, {file = "grpcio_status-1.59.0-py3-none-any.whl", hash = "sha256:cb5a222b14a80ee050bff9676623822e953bff0c50d2d29180de723652fdf10d"}, ] -h11 = [ + +[package.dependencies] +googleapis-common-protos = ">=1.5.5" +grpcio = ">=1.59.0" +protobuf = ">=4.21.6" + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] -hexbytes = [ + +[[package]] +name = "hexbytes" +version = "0.3.1" +description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" +optional = false +python-versions = ">=3.7, <4" +files = [ {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, ] -hnswlib = [ + +[package.extras] +dev = ["black (>=22)", "bumpversion (>=0.5.3)", "eth-utils (>=1.0.1,<3)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "hypothesis (>=3.44.24,<=6.31.6)", "ipython", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)", "pytest (>=7.0.0)", "pytest-watch (>=4.1.0)", "pytest-xdist (>=2.4.0)", "sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)", "tox (>=4.0.0)", "twine", "wheel"] +doc = ["sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)"] +lint = ["black (>=22)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)"] +test = ["eth-utils (>=1.0.1,<3)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0.0)", "pytest-xdist (>=2.4.0)"] + +[[package]] +name = "hnswlib" +version = "0.7.0" +description = "hnswlib" +optional = false +python-versions = "*" +files = [ {file = "hnswlib-0.7.0.tar.gz", hash = "sha256:bc459668e7e44bb7454b256b90c98c5af750653919d9a91698dafcf416cf64c4"}, ] -httplib2 = [ + +[package.dependencies] +numpy = "*" + +[[package]] +name = "httplib2" +version = "0.22.0" +description = "A comprehensive HTTP client library." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, ] -httptools = [ + +[package.dependencies] +pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} + +[[package]] +name = "httptools" +version = "0.6.0" +description = "A collection of framework independent HTTP protocol utils." +optional = false +python-versions = ">=3.5.0" +files = [ {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:818325afee467d483bfab1647a72054246d29f9053fd17cc4b86cda09cc60339"}, {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72205730bf1be875003692ca54a4a7c35fac77b4746008966061d9d41a61b0f5"}, {file = "httptools-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33eb1d4e609c835966e969a31b1dedf5ba16b38cab356c2ce4f3e33ffa94cad3"}, @@ -3980,59 +2092,235 @@ httptools = [ {file = "httptools-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:23b09537086a5a611fad5696fc8963d67c7e7f98cb329d38ee114d588b0b74cd"}, {file = "httptools-0.6.0.tar.gz", hash = "sha256:9fc6e409ad38cbd68b177cd5158fc4042c796b82ca88d99ec78f07bed6c6b796"}, ] -huggingface-hub = [ + +[package.extras] +test = ["Cython (>=0.29.24,<0.30.0)"] + +[[package]] +name = "huggingface-hub" +version = "0.17.3" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.8.0" +files = [ {file = "huggingface_hub-0.17.3-py3-none-any.whl", hash = "sha256:545eb3665f6ac587add946e73984148f2ea5c7877eac2e845549730570c1933a"}, {file = "huggingface_hub-0.17.3.tar.gz", hash = "sha256:40439632b211311f788964602bf8b0d9d6b7a2314fba4e8d67b2ce3ecea0e3fd"}, ] -humanfriendly = [ + +[package.dependencies] +filelock = "*" +fsspec = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +docs = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "hf-doc-builder", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)", "watchdog"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic (<2.0)"] +quality = ["black (==23.7)", "mypy (==1.5.1)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "humanfriendly" +version = "10.0" +description = "Human friendly output for text interfaces using Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, ] -humanize = [ + +[package.dependencies] +pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} + +[[package]] +name = "humanize" +version = "4.8.0" +description = "Python humanize utilities" +optional = false +python-versions = ">=3.8" +files = [ {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, ] -idna = [ + +[package.extras] +tests = ["freezegun", "pytest", "pytest-cov"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] -importlib-metadata = [ + +[[package]] +name = "importlib-metadata" +version = "6.8.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, ] -inflection = [ + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] + +[[package]] +name = "inflection" +version = "0.5.1" +description = "A port of Ruby on Rails inflector to Python" +optional = false +python-versions = ">=3.5" +files = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, ] -iniconfig = [ + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -isodate = [ + +[[package]] +name = "isodate" +version = "0.6.1" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = "*" +files = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, ] -jmespath = [ + +[package.dependencies] +six = "*" + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] -joblib = [ + +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.7" +files = [ {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, ] -jsonpath-ng = [ + +[[package]] +name = "jsonpath-ng" +version = "1.6.0" +description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." +optional = false +python-versions = "*" +files = [ {file = "jsonpath-ng-1.6.0.tar.gz", hash = "sha256:5483f8e9d74c39c9abfab554c070ae783c1c8cbadf5df60d561bc705ac68a07e"}, {file = "jsonpath_ng-1.6.0-py3-none-any.whl", hash = "sha256:6fd04833412c4b3d9299edf369542f5e67095ca84efa17cbb7f06a34958adc9f"}, ] -langchain = [ + +[package.dependencies] +ply = "*" + +[[package]] +name = "langchain" +version = "0.0.219" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ {file = "langchain-0.0.219-py3-none-any.whl", hash = "sha256:1f08a00e622f1c75087d6013f34e82be3f8dd1859266eb583a0fd7bc045090cf"}, {file = "langchain-0.0.219.tar.gz", hash = "sha256:842f8212939e5ac4005906d2215574ffb3e34d2fe28f5bc0f46eb3b28fb29c5d"}, ] -langchainplus-sdk = [ + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} +dataclasses-json = ">=0.5.7,<0.6.0" +langchainplus-sdk = ">=0.0.17" +numexpr = ">=2.8.4,<3.0.0" +numpy = ">=1,<2" +openapi-schema-pydantic = ">=1.2,<2.0" +pydantic = ">=1,<2" +PyYAML = ">=5.4.1" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0a20230509004)", "openai (>=0,<1)"] +clarifai = ["clarifai (==9.1.0)"] +cohere = ["cohere (>=3,<4)"] +docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] +embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.31)"] +javascript = ["esprima (>=4.0.1,<5.0.0)"] +llms = ["anthropic (>=0.2.6,<0.3.0)", "clarifai (==9.1.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.6)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] +qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"] +text-helpers = ["chardet (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langchainplus-sdk" +version = "0.0.20" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ {file = "langchainplus_sdk-0.0.20-py3-none-any.whl", hash = "sha256:07a869d476755803aa04c4986ce78d00c2fe4ff584c0eaa57d7570c9664188db"}, {file = "langchainplus_sdk-0.0.20.tar.gz", hash = "sha256:3d300e2e3290f68cc9d842c059f9458deba60e776c9e790309688cad1bfbb219"}, ] -lxml = [ + +[package.dependencies] +pydantic = ">=1,<2" +requests = ">=2,<3" +tenacity = ">=8.1.0,<9.0.0" + +[[package]] +name = "lxml" +version = "4.9.3" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, @@ -4126,7 +2414,20 @@ lxml = [ {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, ] -lz4 = [ + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.35)"] + +[[package]] +name = "lz4" +version = "4.3.2" +description = "LZ4 Bindings for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "lz4-4.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c4c100d99eed7c08d4e8852dd11e7d1ec47a3340f49e3a96f8dfbba17ffb300"}, {file = "lz4-4.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:edd8987d8415b5dad25e797043936d91535017237f72fa456601be1479386c92"}, {file = "lz4-4.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7c50542b4ddceb74ab4f8b3435327a0861f06257ca501d59067a6a482535a77"}, @@ -4163,63 +2464,232 @@ lz4 = [ {file = "lz4-4.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:4caedeb19e3ede6c7a178968b800f910db6503cb4cb1e9cc9221157572139b49"}, {file = "lz4-4.3.2.tar.gz", hash = "sha256:e1431d84a9cfb23e6773e72078ce8e65cad6745816d4cbf9ae67da5ea419acda"}, ] -makefun = [ + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + +[[package]] +name = "makefun" +version = "1.15.1" +description = "Small library to dynamically create python functions." +optional = false +python-versions = "*" +files = [ {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, ] -markdown = [ + +[[package]] +name = "markdown" +version = "3.5" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.8" +files = [ {file = "Markdown-3.5-py3-none-any.whl", hash = "sha256:4afb124395ce5fc34e6d9886dab977fd9ae987fc6e85689f08278cf0c69d4bf3"}, {file = "Markdown-3.5.tar.gz", hash = "sha256:a807eb2e4778d9156c8f07876c6e4d50b5494c5665c4834f67b06459dfd877b3"}, ] -markdown-it-py = [ + +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, ] -marshmallow = [ + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "marshmallow" +version = "3.20.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, ] -marshmallow-enum = [ + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + +[[package]] +name = "marshmallow-enum" +version = "1.5.1" +description = "Enum field for Marshmallow" +optional = false +python-versions = "*" +files = [ {file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"}, {file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"}, ] -mccabe = [ + +[package.dependencies] +marshmallow = ">=2.0.0" + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -mdurl = [ + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -mimesis = [ + +[[package]] +name = "mimesis" +version = "7.1.0" +description = "Mimesis: Fake Data Generator." +optional = false +python-versions = ">=3.8,<4.0" +files = [ {file = "mimesis-7.1.0-py3-none-any.whl", hash = "sha256:da65bea6d6d5d5d87d5c008e6b23ef5f96a49cce436d9f8708dabb5152da0290"}, {file = "mimesis-7.1.0.tar.gz", hash = "sha256:c83b55d35536d7e9b9700a596b7ccfb639a740e3e1fb5e08062e8ab2a67dcb37"}, ] -monotonic = [ + +[[package]] +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" +optional = false +python-versions = "*" +files = [ {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, ] -more-itertools = [ + +[[package]] +name = "more-itertools" +version = "10.1.0" +description = "More routines for operating on iterables, beyond itertools" +optional = false +python-versions = ">=3.8" +files = [ {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, ] -mpmath = [ + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, ] -msal = [ + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + +[[package]] +name = "msal" +version = "1.24.1" +description = "The Microsoft Authentication Library (MSAL) for Python library" +optional = false +python-versions = ">=2.7" +files = [ {file = "msal-1.24.1-py2.py3-none-any.whl", hash = "sha256:ce4320688f95c301ee74a4d0e9dbcfe029a63663a8cc61756f40d0d0d36574ad"}, {file = "msal-1.24.1.tar.gz", hash = "sha256:aa0972884b3c6fdec53d9a0bd15c12e5bd7b71ac1b66d746f54d128709f3f8f8"}, ] -msal-extensions = [ + +[package.dependencies] +cryptography = ">=0.6,<44" +PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} +requests = ">=2.0.0,<3" + +[package.extras] +broker = ["pymsalruntime (>=0.13.2,<0.14)"] + +[[package]] +name = "msal-extensions" +version = "1.0.0" +description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +optional = false +python-versions = "*" +files = [ {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, ] -msg-parser = [ + +[package.dependencies] +msal = ">=0.4.1,<2.0.0" +portalocker = [ + {version = ">=1.0,<3", markers = "python_version >= \"3.5\" and platform_system != \"Windows\""}, + {version = ">=1.6,<3", markers = "python_version >= \"3.5\" and platform_system == \"Windows\""}, +] + +[[package]] +name = "msg-parser" +version = "1.2.0" +description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." +optional = false +python-versions = ">=3.4" +files = [ {file = "msg_parser-1.2.0-py2.py3-none-any.whl", hash = "sha256:d47a2f0b2a359cb189fad83cc991b63ea781ecc70d91410324273fbf93e95375"}, {file = "msg_parser-1.2.0.tar.gz", hash = "sha256:0de858d4fcebb6c8f6f028da83a17a20fe01cdce67c490779cf43b3b0162aa66"}, ] -multidict = [ + +[package.dependencies] +olefile = ">=0.46" + +[package.extras] +rtf = ["compressed-rtf (>=1.0.5)"] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, @@ -4295,7 +2765,14 @@ multidict = [ {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] -mypy = [ + +[[package]] +name = "mypy" +version = "1.6.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "mypy-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5012e5cc2ac628177eaac0e83d622b2dd499e28253d4107a08ecc59ede3fc2c"}, {file = "mypy-1.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8fbb68711905f8912e5af474ca8b78d077447d8f3918997fecbf26943ff3cbb"}, {file = "mypy-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a1ad938fee7d2d96ca666c77b7c494c3c5bd88dff792220e1afbebb2925b5e"}, @@ -4324,19 +2801,75 @@ mypy = [ {file = "mypy-1.6.1-py3-none-any.whl", hash = "sha256:4cbe68ef919c28ea561165206a2dcb68591c50f3bcf777932323bc208d949cf1"}, {file = "mypy-1.6.1.tar.gz", hash = "sha256:4d01c00d09a0be62a4ca3f933e315455bde83f37f892ba4b08ce92f3cf44bcc1"}, ] -mypy-extensions = [ + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -natsort = [ + +[[package]] +name = "natsort" +version = "8.4.0" +description = "Simple yet flexible natural sorting in Python." +optional = false +python-versions = ">=3.7" +files = [ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, ] -nltk = [ + +[package.extras] +fast = ["fastnumbers (>=2.0.0)"] +icu = ["PyICU (>=1.0.0)"] + +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +optional = false +python-versions = ">=3.7" +files = [ {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, ] -numexpr = [ + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + +[[package]] +name = "numexpr" +version = "2.8.6" +description = "Fast numerical expression evaluator for NumPy" +optional = false +python-versions = ">=3.7" +files = [ {file = "numexpr-2.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80acbfefb68bd92e708e09f0a02b29e04d388b9ae72f9fcd57988aca172a7833"}, {file = "numexpr-2.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e884687da8af5955dc9beb6a12d469675c90b8fb38b6c93668c989cfc2cd982"}, {file = "numexpr-2.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ef7e8aaa84fce3aba2e65f243d14a9f8cc92aafd5d90d67283815febfe43eeb"}, @@ -4368,7 +2901,17 @@ numexpr = [ {file = "numexpr-2.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:6d7003497d82ef19458dce380b36a99343b96a3bd5773465c2d898bf8f5a38f9"}, {file = "numexpr-2.8.6.tar.gz", hash = "sha256:6336f8dba3f456e41a4ffc3c97eb63d89c73589ff6e1707141224b930263260d"}, ] -numpy = [ + +[package.dependencies] +numpy = ">=1.13.3" + +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, @@ -4398,14 +2941,40 @@ numpy = [ {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] -oauthlib = [ + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, ] -olefile = [ + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "olefile" +version = "0.46" +description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"}, ] -onnxruntime = [ + +[[package]] +name = "onnxruntime" +version = "1.16.1" +description = "ONNX Runtime is a runtime accelerator for Machine Learning models" +optional = false +python-versions = "*" +files = [ {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fa93b166f2d97063dc9f33c5118c5729a4a5dd5617296b6dbef42f9047b3e81"}, @@ -4431,19 +3000,72 @@ onnxruntime = [ {file = "onnxruntime-1.16.1-cp39-cp39-win32.whl", hash = "sha256:85771adb75190db9364b25ddec353ebf07635b83eb94b64ed014f1f6d57a3857"}, {file = "onnxruntime-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:d32d2b30799c1f950123c60ae8390818381fd5f88bdf3627eeca10071c155dc5"}, ] -openai = [ + +[package.dependencies] +coloredlogs = "*" +flatbuffers = "*" +numpy = ">=1.21.6" +packaging = "*" +protobuf = "*" +sympy = "*" + +[[package]] +name = "openai" +version = "0.27.10" +description = "Python client library for the OpenAI API" +optional = false +python-versions = ">=3.7.1" +files = [ {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"}, {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"}, ] -openapi-schema-pydantic = [ + +[package.dependencies] +aiohttp = "*" +requests = ">=2.20" +tqdm = "*" + +[package.extras] +datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] +wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] + +[[package]] +name = "openapi-schema-pydantic" +version = "1.2.4" +description = "OpenAPI (v3) specification schema as pydantic class" +optional = false +python-versions = ">=3.6.1" +files = [ {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"}, {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"}, ] -openpyxl = [ + +[package.dependencies] +pydantic = ">=1.8.2" + +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.6" +files = [ {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, ] -orjson = [ + +[package.dependencies] +et-xmlfile = "*" + +[[package]] +name = "orjson" +version = "3.9.9" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ {file = "orjson-3.9.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f28090060a31f4d11221f9ba48b2273b0d04b702f4dcaa197c38c64ce639cc51"}, {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8038ba245d0c0a6337cfb6747ea0c51fe18b0cf1a4bc943d530fd66799fae33d"}, {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:543b36df56db195739c70d645ecd43e49b44d5ead5f8f645d2782af118249b37"}, @@ -4495,15 +3117,36 @@ orjson = [ {file = "orjson-3.9.9-cp39-none-win_amd64.whl", hash = "sha256:920814e02e3dd7af12f0262bbc18b9fe353f75a0d0c237f6a67d270da1a1bb44"}, {file = "orjson-3.9.9.tar.gz", hash = "sha256:02e693843c2959befdd82d1ebae8b05ed12d1cb821605d5f9fe9f98ca5c9fd2b"}, ] -overrides = [ + +[[package]] +name = "overrides" +version = "7.4.0" +description = "A decorator to automatically detect mismatch when overriding a method." +optional = false +python-versions = ">=3.6" +files = [ {file = "overrides-7.4.0-py3-none-any.whl", hash = "sha256:3ad24583f86d6d7a49049695efe9933e67ba62f0c7625d53c59fa832ce4b8b7d"}, {file = "overrides-7.4.0.tar.gz", hash = "sha256:9502a3cca51f4fac40b5feca985b6703a5c1f6ad815588a7ca9e285b9dca6757"}, ] -packaging = [ + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] -pandas = [ + +[[package]] +name = "pandas" +version = "2.0.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.8" +files = [ {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, @@ -4530,31 +3173,133 @@ pandas = [ {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, ] -pandas-stubs = [ + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] + +[[package]] +name = "pandas-stubs" +version = "2.0.2.230605" +description = "Type annotations for pandas" +optional = false +python-versions = ">=3.8" +files = [ {file = "pandas_stubs-2.0.2.230605-py3-none-any.whl", hash = "sha256:39106b602f3cb6dc5f728b84e1b32bde6ecf41ee34ee714c66228009609fbada"}, {file = "pandas_stubs-2.0.2.230605.tar.gz", hash = "sha256:624c7bb06d38145a44b61be459ccd19b038e0bf20364a025ecaab78fea65e858"}, ] -pathspec = [ + +[package.dependencies] +numpy = ">=1.24.3" +types-pytz = ">=2022.1.1" + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] -pathvalidate = [ + +[[package]] +name = "pathvalidate" +version = "3.2.0" +description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathvalidate-3.2.0-py3-none-any.whl", hash = "sha256:cc593caa6299b22b37f228148257997e2fa850eea2daf7e4cc9205cef6908dee"}, {file = "pathvalidate-3.2.0.tar.gz", hash = "sha256:5e8378cf6712bff67fbe7a8307d99fa8c1a0cb28aa477056f8fc374f0dff24ad"}, ] -pbr = [ + +[package.extras] +docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] +test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.4)", "pytest-md-report (>=0.4.1)"] + +[[package]] +name = "pbr" +version = "5.11.1" +description = "Python Build Reasonableness" +optional = false +python-versions = ">=2.6" +files = [ {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, ] -pdf2image = [ + +[[package]] +name = "pdf2image" +version = "1.16.3" +description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." +optional = false +python-versions = "*" +files = [ {file = "pdf2image-1.16.3-py3-none-any.whl", hash = "sha256:b6154164af3677211c22cbb38b2bd778b43aca02758e962fe1e231f6d3b0e380"}, {file = "pdf2image-1.16.3.tar.gz", hash = "sha256:74208810c2cef4d9e347769b8e62a52303982ddb4f2dfd744c7ab4b940ae287e"}, ] -pdfminer-six = [ + +[package.dependencies] +pillow = "*" + +[[package]] +name = "pdfminer-six" +version = "20221105" +description = "PDF parser and analyzer" +optional = false +python-versions = ">=3.6" +files = [ {file = "pdfminer.six-20221105-py3-none-any.whl", hash = "sha256:1eaddd712d5b2732f8ac8486824533514f8ba12a0787b3d5fe1e686cd826532d"}, {file = "pdfminer.six-20221105.tar.gz", hash = "sha256:8448ab7b939d18b64820478ecac5394f482d7a79f5f7eaa7703c6c959c175e1d"}, ] -pendulum = [ + +[package.dependencies] +charset-normalizer = ">=2.0.0" +cryptography = ">=36.0.0" + +[package.extras] +dev = ["black", "mypy (==0.931)", "nox", "pytest"] +docs = ["sphinx", "sphinx-argparse"] +image = ["Pillow"] + +[[package]] +name = "pendulum" +version = "2.1.2" +description = "Python datetimes made easy" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ {file = "pendulum-2.1.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b6c352f4bd32dff1ea7066bd31ad0f71f8d8100b9ff709fb343f3b86cee43efe"}, {file = "pendulum-2.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:318f72f62e8e23cd6660dbafe1e346950281a9aed144b5c596b2ddabc1d19739"}, {file = "pendulum-2.1.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394"}, @@ -4577,7 +3322,18 @@ pendulum = [ {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, ] -pillow = [ + +[package.dependencies] +python-dateutil = ">=2.6,<3.0" +pytzdata = ">=2020.1" + +[[package]] +name = "pillow" +version = "9.5.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.7" +files = [ {file = "Pillow-9.5.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16"}, {file = "Pillow-9.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa"}, {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ba1b81ee69573fe7124881762bb4cd2e4b6ed9dd28c9c60a632902fe8db8b38"}, @@ -4645,31 +3401,118 @@ pillow = [ {file = "Pillow-9.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1e7723bd90ef94eda669a3c2c19d549874dd5badaeefabefd26053304abe5799"}, {file = "Pillow-9.5.0.tar.gz", hash = "sha256:bf548479d336726d7a0eceb6e767e179fbde37833ae42794602631a070d630f1"}, ] -platformdirs = [ + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "platformdirs" +version = "3.11.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, ] -pluggy = [ + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] -ply = [ + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, ] -portalocker = [ + +[[package]] +name = "portalocker" +version = "2.8.2" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.8" +files = [ {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, ] -posthog = [ + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + +[[package]] +name = "posthog" +version = "3.0.2" +description = "Integrate PostHog into any python application." +optional = false +python-versions = "*" +files = [ {file = "posthog-3.0.2-py2.py3-none-any.whl", hash = "sha256:a8c0af6f2401fbe50f90e68c4143d0824b54e872de036b1c2f23b5abb39d88ce"}, {file = "posthog-3.0.2.tar.gz", hash = "sha256:701fba6e446a4de687c6e861b587e7b7741955ad624bf34fe013c06a0fec6fb3"}, ] -proto-plus = [ + +[package.dependencies] +backoff = ">=1.10.0" +monotonic = ">=1.5" +python-dateutil = ">2.1" +requests = ">=2.7,<3.0" +six = ">=1.5" + +[package.extras] +dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] +sentry = ["django", "sentry-sdk"] +test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest"] + +[[package]] +name = "proto-plus" +version = "1.22.3" +description = "Beautiful, Pythonic protocol buffers." +optional = false +python-versions = ">=3.6" +files = [ {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, ] -protobuf = [ + +[package.dependencies] +protobuf = ">=3.19.0,<5.0.0dev" + +[package.extras] +testing = ["google-api-core[grpc] (>=1.31.5)"] + +[[package]] +name = "protobuf" +version = "4.24.4" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "protobuf-4.24.4-cp310-abi3-win32.whl", hash = "sha256:ec9912d5cb6714a5710e28e592ee1093d68c5ebfeda61983b3f40331da0b1ebb"}, {file = "protobuf-4.24.4-cp310-abi3-win_amd64.whl", hash = "sha256:1badab72aa8a3a2b812eacfede5020472e16c6b2212d737cefd685884c191085"}, {file = "protobuf-4.24.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e61a27f362369c2f33248a0ff6896c20dcd47b5d48239cb9720134bef6082e4"}, @@ -4684,7 +3527,36 @@ protobuf = [ {file = "protobuf-4.24.4-py3-none-any.whl", hash = "sha256:80797ce7424f8c8d2f2547e2d42bfbb6c08230ce5832d6c099a37335c9c90a92"}, {file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"}, ] -psycopg2-binary = [ + +[[package]] +name = "psycopg2" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-2.9.9-cp310-cp310-win32.whl", hash = "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516"}, + {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, + {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, + {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, + {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, + {file = "psycopg2-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e"}, + {file = "psycopg2-2.9.9-cp39-cp39-win32.whl", hash = "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59"}, + {file = "psycopg2-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913"}, + {file = "psycopg2-2.9.9.tar.gz", hash = "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156"}, +] + +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, @@ -4711,6 +3583,7 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -4719,6 +3592,8 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -4755,10 +3630,28 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, ] -psycopg2cffi = [ + +[[package]] +name = "psycopg2cffi" +version = "2.9.0" +description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" +optional = false +python-versions = "*" +files = [ {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, ] -pulsar-client = [ + +[package.dependencies] +cffi = ">=1.0" +six = "*" + +[[package]] +name = "pulsar-client" +version = "3.3.0" +description = "Apache Pulsar Python client library" +optional = false +python-versions = "*" +files = [ {file = "pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:c31afd3e67a044ff93177df89e08febf214cc965e95ede097d9fe8755af00e01"}, {file = "pulsar_client-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f66982284571674b215324cc26b5c2f7c56c7043113c47a7084cb70d67a8afb"}, {file = "pulsar_client-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fe50a06f81c48a75a9b95c27a6446260039adca71d9face273740de96b2efca"}, @@ -4790,11 +3683,40 @@ pulsar-client = [ {file = "pulsar_client-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7e147e5ba460c1818bc05254279a885b4e552bcafb8961d40e31f98d5ff46628"}, {file = "pulsar_client-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:694530af1d6c75fb81456fb509778c1868adee31e997ddece6e21678200182ea"}, ] -pyairtable = [ + +[package.dependencies] +certifi = "*" + +[package.extras] +all = ["apache-bookkeeper-client (>=4.16.1)", "fastavro (==1.7.3)", "grpcio (>=1.8.2)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] +avro = ["fastavro (==1.7.3)"] +functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.8.2)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] + +[[package]] +name = "pyairtable" +version = "2.1.0.post1" +description = "Python Client for the Airtable API" +optional = false +python-versions = "*" +files = [ {file = "pyairtable-2.1.0.post1-py2.py3-none-any.whl", hash = "sha256:a80eb85f7c020bf41679bb00ca57da11aeaa43769afbc73619276798a2ca182e"}, {file = "pyairtable-2.1.0.post1.tar.gz", hash = "sha256:e588249e68cf338dcdca9908537ed16d5a22ae72345ec930022b230ba96e5f84"}, ] -pyarrow = [ + +[package.dependencies] +inflection = "*" +pydantic = "*" +requests = ">=2.22.0" +typing-extensions = "*" +urllib3 = ">=1.26" + +[[package]] +name = "pyarrow" +version = "13.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, @@ -4825,26 +3747,77 @@ pyarrow = [ {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, ] -pyasn1 = [ + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pyasn1" +version = "0.5.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, ] -pyasn1-modules = [ + +[[package]] +name = "pyasn1-modules" +version = "0.3.0" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, ] -pycodestyle = [ + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.6.0" + +[[package]] +name = "pycodestyle" +version = "2.11.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, ] -pycountry = [ + +[[package]] +name = "pycountry" +version = "22.3.5" +description = "ISO country, subdivision, language, currency and script definitions and their translations" +optional = false +python-versions = ">=3.6, <4" +files = [ {file = "pycountry-22.3.5.tar.gz", hash = "sha256:b2163a246c585894d808f18783e19137cb70a0c18fb36748dc01fc6f109c1646"}, ] -pycparser = [ + +[package.dependencies] +setuptools = "*" + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] -pydantic = [ + +[[package]] +name = "pydantic" +version = "1.10.13" +description = "Data validation and settings management using python type hints" +optional = false +python-versions = ">=3.7" +files = [ {file = "pydantic-1.10.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:efff03cc7a4f29d9009d1c96ceb1e7a70a65cfe86e89d34e4a5f2ab1e5693737"}, {file = "pydantic-1.10.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ecea2b9d80e5333303eeb77e180b90e95eea8f765d08c3d278cd56b00345d01"}, {file = "pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1740068fd8e2ef6eb27a20e5651df000978edce6da6803c2bef0bc74540f9548"}, @@ -4882,19 +3855,66 @@ pydantic = [ {file = "pydantic-1.10.13-py3-none-any.whl", hash = "sha256:b87326822e71bd5f313e7d3bfdc77ac3247035ac10b0c0618bd99dcf95b1e687"}, {file = "pydantic-1.10.13.tar.gz", hash = "sha256:32c8b48dcd3b2ac4e78b0ba4af3a2c2eb6048cb75202f0ea7b34feb740efc340"}, ] -pyflakes = [ + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] -pygments = [ + +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, ] -pyjwt = [ + +[package.extras] +plugins = ["importlib-metadata"] + +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, ] -pymongo = [ + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + +[[package]] +name = "pymongo" +version = "4.5.0" +description = "Python driver for MongoDB " +optional = false +python-versions = ">=3.7" +files = [ {file = "pymongo-4.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d4fa1b01fa7e5b7bb8d312e3542e211b320eb7a4e3d8dc884327039d93cb9e0"}, {file = "pymongo-4.5.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:dfcd2b9f510411de615ccedd47462dae80e82fdc09fe9ab0f0f32f11cf57eeb5"}, {file = "pymongo-4.5.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:3e33064f1984db412b34d51496f4ea785a9cff621c67de58e09fb28da6468a52"}, @@ -4925,6 +3945,7 @@ pymongo = [ {file = "pymongo-4.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6422b6763b016f2ef2beedded0e546d6aa6ba87910f9244d86e0ac7690f75c96"}, {file = "pymongo-4.5.0-cp312-cp312-win32.whl", hash = "sha256:77cfff95c1fafd09e940b3fdcb7b65f11442662fad611d0e69b4dd5d17a81c60"}, {file = "pymongo-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:e57d859b972c75ee44ea2ef4758f12821243e99de814030f69a3decb2aa86807"}, + {file = "pymongo-4.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8443f3a8ab2d929efa761c6ebce39a6c1dca1c9ac186ebf11b62c8fe1aef53f4"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2b0176f9233a5927084c79ff80b51bd70bfd57e4f3d564f50f80238e797f0c8a"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:89b3f2da57a27913d15d2a07d58482f33d0a5b28abd20b8e643ab4d625e36257"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:5caee7bd08c3d36ec54617832b44985bd70c4cbd77c5b313de6f7fce0bb34f93"}, @@ -4977,59 +3998,226 @@ pymongo = [ {file = "pymongo-4.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:63d8019eee119df308a075b8a7bdb06d4720bf791e2b73d5ab0e7473c115d79c"}, {file = "pymongo-4.5.0.tar.gz", hash = "sha256:681f252e43b3ef054ca9161635f81b730f4d8cadd28b3f2b2004f5a72f853982"}, ] -pymysql = [ + +[package.dependencies] +dnspython = ">=1.16.0,<3.0.0" + +[package.extras] +aws = ["pymongo-auth-aws (<2.0.0)"] +encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] +gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] +ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] +snappy = ["python-snappy"] +zstd = ["zstandard"] + +[[package]] +name = "pymysql" +version = "1.1.0" +description = "Pure Python MySQL Driver" +optional = false +python-versions = ">=3.7" +files = [ {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, ] -pypandoc = [ + +[package.extras] +ed25519 = ["PyNaCl (>=1.4.0)"] +rsa = ["cryptography"] + +[[package]] +name = "pypandoc" +version = "1.11" +description = "Thin wrapper for pandoc." +optional = false +python-versions = ">=3.6" +files = [ {file = "pypandoc-1.11-py3-none-any.whl", hash = "sha256:b260596934e9cfc6513056110a7c8600171d414f90558bf4407e68b209be8007"}, {file = "pypandoc-1.11.tar.gz", hash = "sha256:7f6d68db0e57e0f6961bec2190897118c4d305fc2d31c22cd16037f22ee084a5"}, ] -pyparsing = [ + +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, ] -pypdf2 = [ + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pypdf2" +version = "3.0.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, ] -pyreadline3 = [ + +[package.dependencies] +typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + +[[package]] +name = "pypgoutput" +version = "0.0.3" +description = "PostgreSQL CDC library using pgoutput and python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pypgoutput-0.0.3-py3-none-any.whl", hash = "sha256:8790f83cc15f4e52e9df9fea6a42cfb86d9839ac5c93b16587bd0950873904e0"}, + {file = "pypgoutput-0.0.3.tar.gz", hash = "sha256:0866a11ee4938a234bdac58624646760109348614b436612978b3496aae4fddb"}, +] + +[package.dependencies] +psycopg2 = "*" +pydantic = "*" + +[[package]] +name = "pyreadline3" +version = "3.4.1" +description = "A python implementation of GNU readline." +optional = false +python-versions = "*" +files = [ {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] -pytest = [ + +[[package]] +name = "pytest" +version = "7.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, ] -python-dateutil = [ + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -python-docx = [ + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-docx" +version = "1.0.1" +description = "Create, read, and update Microsoft Word .docx files." +optional = false +python-versions = ">=3.7" +files = [ {file = "python-docx-1.0.1.tar.gz", hash = "sha256:255148e15a4414244ec75f50e92d19864e52a7416768c65491707a7414659524"}, {file = "python_docx-1.0.1-py3-none-any.whl", hash = "sha256:851340c49b36f917a1838a44c602a5a0702c0c3507b9890969545732dc10d2d1"}, ] -python-dotenv = [ + +[package.dependencies] +lxml = ">=3.1.0" +typing-extensions = "*" + +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, ] -python-magic = [ + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "python-magic" +version = "0.4.27" +description = "File type identification using libmagic" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, ] -python-pptx = [ + +[[package]] +name = "python-pptx" +version = "0.6.22" +description = "Generate and manipulate Open XML PowerPoint (.pptx) files" +optional = false +python-versions = "*" +files = [ {file = "python-pptx-0.6.22.tar.gz", hash = "sha256:38f8ee92dde31d24b4562560e61b0357e5d97ecf75c4352ae6616d5a32978654"}, {file = "python_pptx-0.6.22-py3-none-any.whl", hash = "sha256:3d097c29e08de2da1fc3c6752169087065efa4153216e77fc1b27dff1bcdcb46"}, ] -pytz = [ + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2,<=9.5.0" +XlsxWriter = ">=0.5.7" + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] -pytzdata = [ + +[[package]] +name = "pytzdata" +version = "2020.1" +description = "The Olson timezone database for Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, ] -pywin32 = [ + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, @@ -5045,12 +4233,20 @@ pywin32 = [ {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, ] -pyyaml = [ + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -5058,8 +4254,15 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -5076,6 +4279,7 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -5083,11 +4287,19 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] -regex = [ + +[[package]] +name = "regex" +version = "2023.10.3" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.7" +files = [ {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"}, @@ -5177,55 +4389,214 @@ regex = [ {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"}, {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, ] -requests = [ + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] -requests-file = [ + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-file" +version = "1.5.1" +description = "File transport adapter for Requests" +optional = false +python-versions = "*" +files = [ {file = "requests-file-1.5.1.tar.gz", hash = "sha256:07d74208d3389d01c38ab89ef403af0cfec63957d53a0081d8eca738d0247d8e"}, {file = "requests_file-1.5.1-py2.py3-none-any.whl", hash = "sha256:dfe5dae75c12481f68ba353183c53a65e6044c923e64c24b2209f6c7570ca953"}, ] -requests-mock = [ + +[package.dependencies] +requests = ">=1.0.0" +six = "*" + +[[package]] +name = "requests-mock" +version = "1.11.0" +description = "Mock out responses from the requests package" +optional = false +python-versions = "*" +files = [ {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, ] -requests-oauthlib = [ + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, ] -requests-toolbelt = [ + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, ] -requirements-parser = [ + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "requirements-parser" +version = "0.5.0" +description = "This is a small Python module for parsing Pip requirement files." +optional = false +python-versions = ">=3.6,<4.0" +files = [ {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, ] -rich = [ + +[package.dependencies] +types-setuptools = ">=57.0.0" + +[[package]] +name = "rich" +version = "13.6.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"}, {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"}, ] -rsa = [ + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +optional = false +python-versions = ">=3.6,<4" +files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, ] -s3fs = [ + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "s3fs" +version = "2023.9.2" +description = "Convenient Filesystem interface over S3" +optional = false +python-versions = ">= 3.8" +files = [ {file = "s3fs-2023.9.2-py3-none-any.whl", hash = "sha256:d0e0ad0267820f4e9ff16556e004e6759010e92378aebe2ac5d71419a6ff5387"}, {file = "s3fs-2023.9.2.tar.gz", hash = "sha256:64cccead32a816422dd9ae1d693c5d6354d99f64ae26c56388f1d8e1c7858321"}, ] -semver = [ + +[package.dependencies] +aiobotocore = ">=2.5.4,<2.6.0" +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" +fsspec = "2023.9.2" + +[package.extras] +awscli = ["aiobotocore[awscli] (>=2.5.4,<2.6.0)"] +boto3 = ["aiobotocore[boto3] (>=2.5.4,<2.6.0)"] + +[[package]] +name = "semver" +version = "3.0.2" +description = "Python helper for Semantic Versioning (https://semver.org)" +optional = false +python-versions = ">=3.7" +files = [ {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, ] -setuptools = [ + +[[package]] +name = "setuptools" +version = "68.2.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, ] -simple-salesforce = [ + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "simple-salesforce" +version = "1.12.5" +description = "A basic Salesforce.com REST API client." +optional = false +python-versions = "*" +files = [ {file = "simple-salesforce-1.12.5.tar.gz", hash = "sha256:ef65f72438e3b215619f6835d3d4356e147adf3a7ece6896d239127dd6aefcd1"}, {file = "simple_salesforce-1.12.5-py2.py3-none-any.whl", hash = "sha256:07029575385d04132babfd6e19c1c8068c859d616a45dab07bbf9875bdc5ab93"}, ] -simplejson = [ + +[package.dependencies] +cryptography = "*" +more-itertools = "*" +pendulum = "*" +pyjwt = "*" +requests = ">=2.22.0" +zeep = "*" + +[[package]] +name = "simplejson" +version = "3.19.2" +description = "Simple, fast, extensible JSON encoder/decoder for Python" +optional = false +python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "simplejson-3.19.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3471e95110dcaf901db16063b2e40fb394f8a9e99b3fe9ee3acc6f6ef72183a2"}, {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:3194cd0d2c959062b94094c0a9f8780ffd38417a5322450a0db0ca1a23e7fbd2"}, {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:8a390e56a7963e3946ff2049ee1eb218380e87c8a0e7608f7f8790ba19390867"}, @@ -5325,19 +4696,47 @@ simplejson = [ {file = "simplejson-3.19.2-py3-none-any.whl", hash = "sha256:bcedf4cae0d47839fee7de344f96b5694ca53c786f28b5f773d4f0b265a159eb"}, {file = "simplejson-3.19.2.tar.gz", hash = "sha256:9eb442a2442ce417801c912df68e1f6ccfcd41577ae7274953ab3ad24ef7d82c"}, ] -six = [ + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -smmap = [ + +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +files = [ {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, ] -sniffio = [ + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] -sqlalchemy = [ + +[[package]] +name = "sqlalchemy" +version = "2.0.22" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +files = [ {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13790cb42f917c45c9c850b39b9941539ca8ee7917dacf099cc0b569f3d40da7"}, @@ -5388,31 +4787,130 @@ sqlalchemy = [ {file = "SQLAlchemy-2.0.22-py3-none-any.whl", hash = "sha256:3076740335e4aaadd7deb3fe6dcb96b3015f1613bd190a4e1634e1b99b02ec86"}, {file = "SQLAlchemy-2.0.22.tar.gz", hash = "sha256:5434cc601aa17570d79e5377f5fd45ff92f9379e2abed0be5e8c2fba8d353d2b"}, ] -starlette = [ + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.2.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "starlette" +version = "0.20.4" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.7" +files = [ {file = "starlette-0.20.4-py3-none-any.whl", hash = "sha256:c0414d5a56297d37f3db96a84034d61ce29889b9eaccf65eb98a0b39441fcaa3"}, {file = "starlette-0.20.4.tar.gz", hash = "sha256:42fcf3122f998fefce3e2c5ad7e5edbf0f02cf685d646a83a08d404726af5084"}, ] -stevedore = [ + +[package.dependencies] +anyio = ">=3.4.0,<5" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +full = ["itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests"] + +[[package]] +name = "stevedore" +version = "5.1.0" +description = "Manage dynamic plugins for Python applications" +optional = false +python-versions = ">=3.8" +files = [ {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, ] -stripe = [ + +[package.dependencies] +pbr = ">=2.0.0,<2.1.0 || >2.1.0" + +[[package]] +name = "stripe" +version = "5.5.0" +description = "Python bindings for the Stripe API" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "stripe-5.5.0-py2.py3-none-any.whl", hash = "sha256:b4947da66dbb3de8969004ba6398f9a019c6b1b3ffe6aa88d5b07ac560a52b28"}, {file = "stripe-5.5.0.tar.gz", hash = "sha256:04a9732b37a46228ecf0e496163a3edd93596b0e6200029fbc48911638627e19"}, ] -sympy = [ + +[package.dependencies] +requests = {version = ">=2.20", markers = "python_version >= \"3.0\""} + +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, ] -tabulate = [ + +[package.dependencies] +mpmath = ">=0.19" + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, ] -tenacity = [ + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, ] -tiktoken = [ + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "tiktoken" +version = "0.4.0" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +optional = false +python-versions = ">=3.8" +files = [ {file = "tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:176cad7f053d2cc82ce7e2a7c883ccc6971840a4b5276740d0b732a2b2011f8a"}, {file = "tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:450d504892b3ac80207700266ee87c932df8efea54e05cefe8613edc963c1285"}, {file = "tiktoken-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d662de1e7986d129139faf15e6a6ee7665ee103440769b8dedf3e7ba6ac37f"}, @@ -5443,7 +4941,21 @@ tiktoken = [ {file = "tiktoken-0.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:c835d0ee1f84a5aa04921717754eadbc0f0a56cf613f78dfc1cf9ad35f6c3fea"}, {file = "tiktoken-0.4.0.tar.gz", hash = "sha256:59b20a819969735b48161ced9b92f05dc4519c17be4015cfb73b65270a243620"}, ] -tokenizers = [ + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + +[[package]] +name = "tokenizers" +version = "0.14.1" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "tokenizers-0.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:04ec1134a18ede355a05641cdc7700f17280e01f69f2f315769f02f7e295cf1e"}, {file = "tokenizers-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:638abedb39375f0ddce2de536fc9c976639b2d1b7202d715c2e7a25f0ebfd091"}, {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:901635098565773a44f74068639d265f19deaaca47ea77b428fd9bee13a61d87"}, @@ -5543,67 +5055,259 @@ tokenizers = [ {file = "tokenizers-0.14.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:628b654ba555b2ba9111c0936d558b14bfc9d5f57b8c323b02fc846036b38b2f"}, {file = "tokenizers-0.14.1.tar.gz", hash = "sha256:ea3b3f8908a9a5b9d6fc632b5f012ece7240031c44c6d4764809f33736534166"}, ] -tomli = [ + +[package.dependencies] +huggingface_hub = ">=0.16.4,<0.18" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -tomlkit = [ + +[[package]] +name = "tomlkit" +version = "0.12.1" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, ] -tqdm = [ + +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] -types-pytz = [ + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "types-pytz" +version = "2023.3.1.1" +description = "Typing stubs for pytz" +optional = false +python-versions = "*" +files = [ {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"}, {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"}, ] -types-requests = [ + +[[package]] +name = "types-requests" +version = "2.31.0.6" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.7" +files = [ {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, ] -types-setuptools = [ + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-setuptools" +version = "68.2.0.0" +description = "Typing stubs for setuptools" +optional = false +python-versions = "*" +files = [ {file = "types-setuptools-68.2.0.0.tar.gz", hash = "sha256:a4216f1e2ef29d089877b3af3ab2acf489eb869ccaf905125c69d2dc3932fd85"}, {file = "types_setuptools-68.2.0.0-py3-none-any.whl", hash = "sha256:77edcc843e53f8fc83bb1a840684841f3dc804ec94562623bfa2ea70d5a2ba1b"}, ] -types-stripe = [ + +[[package]] +name = "types-stripe" +version = "3.5.2.14" +description = "Typing stubs for stripe" +optional = false +python-versions = "*" +files = [ {file = "types-stripe-3.5.2.14.tar.gz", hash = "sha256:bcc020aa5ba9acd796b9f2ac21f044c8e377ce2c0f570057f0f64c4b4637bbe7"}, {file = "types_stripe-3.5.2.14-py3-none-any.whl", hash = "sha256:f5f1249f72a35ada1db95523edc7e8f7b543dc8434b2ff23eaa9ec2e251c2e59"}, ] -types-urllib3 = [ + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, ] -typing-extensions = [ + +[[package]] +name = "typing-extensions" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] -typing-inspect = [ + +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, ] -tzdata = [ + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] -unstructured = [ + +[[package]] +name = "unstructured" +version = "0.7.12" +description = "A library that prepares raw documents for downstream ML tasks." +optional = false +python-versions = ">=3.7.0" +files = [ {file = "unstructured-0.7.12-py3-none-any.whl", hash = "sha256:6dec4f23574e213f30bccb680a4fb84c95617092ce4abf5d8955cc71af402fef"}, {file = "unstructured-0.7.12.tar.gz", hash = "sha256:3dcddea34f52e1070f38fd10063b3b0f64bc4cbe5b778d6b86b5d33262d625cd"}, ] -uritemplate = [ + +[package.dependencies] +argilla = "*" +chardet = "*" +filetype = "*" +lxml = "*" +markdown = "*" +msg-parser = "*" +nltk = "*" +openpyxl = "*" +pandas = "*" +pdf2image = "*" +"pdfminer.six" = "*" +pillow = "*" +pypandoc = "*" +python-docx = "*" +python-magic = "*" +python-pptx = "*" +requests = "*" +tabulate = "*" +xlrd = "*" + +[package.extras] +azure = ["adlfs", "fsspec"] +discord = ["discord-py"] +dropbox = ["dropboxdrivefs", "fsspec"] +gcs = ["fsspec", "gcsfs"] +github = ["pygithub (==1.58.2)"] +gitlab = ["python-gitlab"] +google-drive = ["google-api-python-client"] +huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] +local-inference = ["unstructured-inference (==0.5.4)"] +reddit = ["praw"] +s3 = ["fsspec", "s3fs"] +slack = ["slack-sdk"] +wikipedia = ["wikipedia"] + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +optional = false +python-versions = ">=3.6" +files = [ {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] -urllib3 = [ + +[[package]] +name = "urllib3" +version = "1.26.17" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ {file = "urllib3-1.26.17-py2.py3-none-any.whl", hash = "sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b"}, {file = "urllib3-1.26.17.tar.gz", hash = "sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21"}, ] -uvicorn = [ + +[package.extras] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "uvicorn" +version = "0.23.2" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.8" +files = [ {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"}, {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"}, ] -uvloop = [ + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", optional = true, markers = "sys_platform == \"win32\" and extra == \"standard\""} +h11 = ">=0.8" +httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} +python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[[package]] +name = "uvloop" +version = "0.18.0" +description = "Fast implementation of asyncio event loop on top of libuv" +optional = false +python-versions = ">=3.7.0" +files = [ {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1f354d669586fca96a9a688c585b6257706d216177ac457c92e15709acaece10"}, {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:280904236a5b333a273292b3bcdcbfe173690f69901365b973fa35be302d7781"}, {file = "uvloop-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad79cd30c7e7484bdf6e315f3296f564b3ee2f453134a23ffc80d00e63b3b59e"}, @@ -5641,7 +5345,18 @@ uvloop = [ {file = "uvloop-0.18.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db1fcbad5deb9551e011ca589c5e7258b5afa78598174ac37a5f15ddcfb4ac7b"}, {file = "uvloop-0.18.0.tar.gz", hash = "sha256:d5d1135beffe9cd95d0350f19e2716bc38be47d5df296d7cc46e3b7557c0d1ff"}, ] -watchfiles = [ + +[package.extras] +docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] + +[[package]] +name = "watchfiles" +version = "0.21.0" +description = "Simple, modern and high performance file watching and code reload in python." +optional = false +python-versions = ">=3.8" +files = [ {file = "watchfiles-0.21.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:27b4035013f1ea49c6c0b42d983133b136637a527e48c132d368eb19bf1ac6aa"}, {file = "watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c81818595eff6e92535ff32825f31c116f867f64ff8cdf6562cd1d6b2e1e8f3e"}, {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c107ea3cf2bd07199d66f156e3ea756d1b84dfd43b542b2d870b77868c98c03"}, @@ -5718,7 +5433,17 @@ watchfiles = [ {file = "watchfiles-0.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43babacef21c519bc6631c5fce2a61eccdfc011b4bcb9047255e9620732c8097"}, {file = "watchfiles-0.21.0.tar.gz", hash = "sha256:c76c635fabf542bb78524905718c39f736a98e5ab25b23ec6d4abede1a85a6a3"}, ] -websockets = [ + +[package.dependencies] +anyio = ">=3.0.0" + +[[package]] +name = "websockets" +version = "11.0.3" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.7" +files = [ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"}, @@ -5790,11 +5515,28 @@ websockets = [ {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"}, {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"}, ] -wheel = [ + +[[package]] +name = "wheel" +version = "0.41.2" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, ] -win-precise-time = [ + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "win-precise-time" +version = "1.4.2" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, {file = "win_precise_time-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:bb8e44b0fc35fde268e8a781cdcd9f47d47abcd8089465d2d1d1063976411c8e"}, @@ -5809,7 +5551,14 @@ win-precise-time = [ {file = "win_precise_time-1.4.2-cp39-cp39-win32.whl", hash = "sha256:50d11a6ff92e1be96a8d4bee99ff6dc07a0ea0e2a392b0956bb2192e334f41ba"}, {file = "win_precise_time-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f510fa92d9c39ea533c983e1d62c7bc66fdf0a3e3c3bdda48d4ebb634ff7034"}, ] -wrapt = [ + +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, @@ -5886,15 +5635,41 @@ wrapt = [ {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, ] -xlrd = [ + +[[package]] +name = "xlrd" +version = "2.0.1" +description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"}, {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"}, ] -xlsxwriter = [ + +[package.extras] +build = ["twine", "wheel"] +docs = ["sphinx"] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "xlsxwriter" +version = "3.1.7" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.6" +files = [ {file = "XlsxWriter-3.1.7-py3-none-any.whl", hash = "sha256:8c730c4beb468696c4160aa1d6d168fb4c1a20dd972b212cd8cc1e74ddeab1b6"}, {file = "XlsxWriter-3.1.7.tar.gz", hash = "sha256:353042efb0f8551ce72baa087e98228f3394fcb380e8b96313edf1eec8d50823"}, ] -yarl = [ + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, @@ -5970,15 +5745,60 @@ yarl = [ {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, ] -zeep = [ + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zeep" +version = "4.2.1" +description = "A Python SOAP client" +optional = false +python-versions = ">=3.7" +files = [ {file = "zeep-4.2.1-py3-none-any.whl", hash = "sha256:6754feb4c34a4b6d65fbc359252bf6654dcce3937bf1d95aae4402a60a8f5939"}, {file = "zeep-4.2.1.tar.gz", hash = "sha256:72093acfdb1d8360ed400869b73fbf1882b95c4287f798084c42ee0c1ff0e425"}, ] -zipp = [ + +[package.dependencies] +attrs = ">=17.2.0" +isodate = ">=0.5.4" +lxml = ">=4.6.0" +platformdirs = ">=1.4.0" +pytz = "*" +requests = ">=2.7.0" +requests-file = ">=1.5.1" +requests-toolbelt = ">=0.7.1" + +[package.extras] +async = ["httpx (>=0.15.0)"] +docs = ["sphinx (>=1.4.0)"] +test = ["coverage[toml] (==5.2.1)", "flake8 (==3.8.3)", "flake8-blind-except (==0.1.1)", "flake8-debugger (==3.2.1)", "flake8-imports (==0.1.1)", "freezegun (==0.3.15)", "isort (==5.3.2)", "pretend (==1.0.9)", "pytest (==6.2.5)", "pytest-asyncio", "pytest-cov (==2.8.1)", "pytest-httpx", "requests-mock (>=0.7.0)"] +xmlsec = ["xmlsec (>=0.6.1)"] + +[[package]] +name = "zipp" +version = "3.17.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"}, {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"}, ] -zstandard = [ + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[[package]] +name = "zstandard" +version = "0.21.0" +description = "Zstandard bindings for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"}, {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"}, {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f"}, @@ -6023,3 +5843,14 @@ zstandard = [ {file = "zstandard-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a"}, {file = "zstandard-0.21.0.tar.gz", hash = "sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546"}, ] + +[package.dependencies] +cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} + +[package.extras] +cffi = ["cffi (>=1.11)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<3.13" +content-hash = "914945af0371b45dcaf91021d80adf793bd9732f686e94a9b6fd06b3783480f4" diff --git a/pyproject.toml b/pyproject.toml index f2cf9895d..886293ae7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ confluent-kafka = "^2.3.0" [tool.poetry.group.sql_database.dependencies] sqlalchemy = ">=1.4" pymysql = "^1.0.3" +pypgoutput = "0.0.3" [tool.poetry.group.google_sheets.dependencies] google-api-python-client = "^2.78.0" diff --git a/sources/sql_database/exceptions.py b/sources/sql_database/exceptions.py new file mode 100644 index 000000000..6b23bc4b0 --- /dev/null +++ b/sources/sql_database/exceptions.py @@ -0,0 +1,46 @@ +class ReplicationConnectionError(Exception): + def __init__(self, msg: str = "") -> None: + msg = "Failed creating a connection for logical replication. " + msg + super().__init__(msg) + + +class CreatePublicationError(Exception): + def __init__(self, msg: str = "") -> None: + msg = "Failed creating publication for logical replication. " + msg + super().__init__(msg) + + +class CreatePublicationInsufficientPrivilegeError(CreatePublicationError): + def __init__(self, user: str, database: str) -> None: + self.user = user + self.database = database + super().__init__( + f'Make sure the user "{user}" has the CREATE privilege for database "{database}".' + ) + + +class AddTableToPublicationError(Exception): + def __init__(self, table_name:str, publication_name: str, msg: str = "") -> None: + self.table_name = table_name + self.publication_name = publication_name + msg = f'Failed adding table "{table_name}" to publication "{publication_name}". ' + msg + super().__init__(msg) + + +class AddTableToPublicationInsufficientPrivilegeError(AddTableToPublicationError): + def __init__(self, table_name:str, publication_name: str, user: str) -> None: + self.table_name = table_name + self.publication_name = publication_name + super().__init__( + table_name, + publication_name, + f'Make sure the user "{user}" is owner of table "{table_name}".' + ) + + +class ReplicationSlotDoesNotExistError(Exception): + def __init__(self, slot_name: str) -> None: + self.slot_name = slot_name + super().__init__( + f'The replication slot "{slot_name}" does not exist on the Postgres instance.' + ) \ No newline at end of file diff --git a/sources/sql_database/pg_cdc_utils.py b/sources/sql_database/pg_cdc_utils.py new file mode 100644 index 000000000..863bdb7b8 --- /dev/null +++ b/sources/sql_database/pg_cdc_utils.py @@ -0,0 +1,250 @@ +from typing import Optional, Any, Dict + +import psycopg2 +from psycopg2.extras import LogicalReplicationConnection, ReplicationCursor +import pypgoutput +from pypgoutput.decoders import ( + Begin, Commit, Relation, Insert, Update, Delete, Truncate +) + +from dlt.common import logger +from dlt.common.typing import TDataItem, TDataItems +from dlt.common.schema.typing import TColumnSchema +from dlt.common.data_types.typing import TDataType + +from .exceptions import ( + ReplicationConnectionError, + CreatePublicationError, + CreatePublicationInsufficientPrivilegeError, + AddTableToPublicationError, + AddTableToPublicationInsufficientPrivilegeError, + ReplicationSlotDoesNotExistError, +) + + +def pg_to_dlt_type_mapper(pg_type_oid: int) -> TDataType: + """Maps postgres data type OID to dlt data type.""" + + def psy_to_dlt_type_mapper(psycopg2_type) -> TDataType: + """Maps psycopg2 data type to dlt data type.""" + + if psycopg2_type == psycopg2._psycopg.BOOLEAN: + return "bool" + return "bigint" + + psy = psycopg2.extensions.string_types[pg_type_oid] + return psy_to_dlt_type_mapper(psy) + + +def to_dlt_column_schema(col: pypgoutput.decoders.ColumnType) -> TColumnSchema: + """Converts pypgoutput ColumnType to dlt column schema.""" + + return { + "name": col.name, + "primary_key": bool(col.part_of_pkey), + "data_type": pg_to_dlt_type_mapper(col.type_id), + # "atttypmod": col.atttypmod + } + + +def cast_pg_to_json(val: Optional[str], data_type: TDataType) -> Any: + """Converts pgoutput's value string representation into JSON value.""" + + if data_type == "bool": + if val == "t": + return True + elif val == "f": + return False + return None + return val + + +def rep_conn( + database: str, user: str, password: str, host: str, port: str = 5432 +) -> psycopg2._psycopg.connection: + """Returns Postgres replication connection. + + Raises an error if the user does not have the LOGIN or REPLICATION attribute assigned. + """ + + try: + return psycopg2.connect( + database=database, + user=user, + password=password, + host=host, + port=port, + connection_factory=LogicalReplicationConnection, + ) + except psycopg2.OperationalError as e: + raise ReplicationConnectionError( + f'Make sure the user "{user}" has the LOGIN and REPLICATION attributes assigned.' + ) from e + except Exception as e: + raise ReplicationConnectionError from e + + +def get_dsn_dict(conn: psycopg2._psycopg.connection) -> Dict[str, str]: + """Returns connection DSN as dictionary.""" + + return dict([pair.split("=") for pair in conn.dsn.split(" ")]) + + +def create_publication(name: str, cur: ReplicationCursor) -> None: + """Creates a publication for logical replication if it doesn't exist yet. + + Raises error if the user does not have the CREATE privilege for the database. + """ + + try: + cur.execute(f"CREATE PUBLICATION {name};") + except psycopg2.errors.DuplicateObject: # the publication already exists + pass + except psycopg2.errors.InsufficientPrivilege as e: + dsn = get_dsn_dict(cur.connection) + raise CreatePublicationInsufficientPrivilegeError(dsn["user"], dsn["dbname"]) from e + except Exception as e: + raise CreatePublicationError from e + + +def add_table_to_publication(name: str, publication_name: str, cur: ReplicationCursor) -> None: + """Adds a table to a publication for logical replication if the table is not a member yet. + + Raises error if the user is not owner of the table. + """ + + try: + cur.execute(f"ALTER PUBLICATION {publication_name} ADD TABLE {name};") + except psycopg2.errors.DuplicateObject: # table is already member of publication + pass + except psycopg2.errors.InsufficientPrivilege as e: + raise AddTableToPublicationInsufficientPrivilegeError( + name, publication_name, get_dsn_dict(cur.connection)["user"] + ) from e + except Exception as e: + raise AddTableToPublicationError(name, publication_name) from e + + +def create_replication_slot(name: str, cur: ReplicationCursor, output_plugin: str = "pgoutput") -> None: + """Creates a replication slot if it doesn't exist yet.""" + + try: + cur.create_replication_slot(name, output_plugin=output_plugin) + except psycopg2.errors.DuplicateObject: # the replication slot already exists + pass + + +def get_max_lsn(slot_name: str, options: Dict[str, str], cur: ReplicationCursor) -> Optional[int]: + """Returns maximum Log Sequence Number (LSN) in replication slot. + + Returns None if the replication slot is empty. + + Raises error if the replication slot does not exist. + """ + + options_str = ", ".join( + f"'{x}'" for xs in list(map(list, options.items())) for x in xs # comma-separated value string + ) + try: + cur.execute( + # subtract '0/0' to convert pg_lsn type to int (https://stackoverflow.com/a/73738472) + "SELECT MAX(lsn) - '0/0' AS max_lsn " + f"FROM pg_logical_slot_peek_binary_changes('{slot_name}', NULL, NULL, {options_str});" + ) + return cur.fetchone()[0] + except psycopg2.errors.UndefinedObject as e: + raise ReplicationSlotDoesNotExistError(slot_name) from e + + +def cdc_rows( + conn: LogicalReplicationConnection, + publication_name: str, + slot_name: str, + upto_lsn: int, +) -> TDataItems: + + + cur = conn.cursor() + + + + + # drop_slot = False + # if drop_slot: + # try: + # cur.drop_replication_slot(slot_name) + # except psycopg2.ProgrammingError as e: + # print("e:", e) + + + options = {'publication_names': publication_name, 'proto_version': '1'} + # try: + # cur.start_replication(slot_name=slot_name, decode=False, options=options) + # except psycopg2.ProgrammingError: + # cur.create_replication_slot(slot_name, output_plugin='pgoutput') + # cur.start_replication(slot_name=slot_name, decode=False, options=options) + + + consumer = ReplicationConsumer(upto_lsn) + + try: + if consumer.upto_lsn is not None: + cur.consume_stream(consumer) + except psycopg2.extras.StopReplication: + print("StopReplication") + finally: + cur.close() + yield consumer.data_items + + +class ReplicationConsumer(object): + def __init__(self, upto_lsn): + self.upto_lsn = upto_lsn + self.relations = dict() + self.data_items: TDataItems = [] + + + def __call__(self, msg: psycopg2.extras.ReplicationMessage): + self.process_msg(msg) + # msg.cursor.send_feedback(flush_lsn=msg.data_start, force=True) + if msg.data_start == self.upto_lsn: + raise psycopg2.extras.StopReplication + + + def process_msg(self, msg: psycopg2.extras.ReplicationMessage): + op = (msg.payload[:1]).decode('utf-8') + if op == 'B': + print(Begin(msg.payload)) + elif op == "C": + print(Commit(msg.payload)) + elif op == "R": + self.process_relation(Relation(msg.payload)) + elif op == "I": + self.process_insert(Insert(msg.payload)) + elif op == "U": + print(Update(msg.payload)) + elif op == 'D': + print(Delete(msg.payload)) + elif op == 'T': + print(Truncate(msg.payload)) + else: + pass + + + def process_relation(self, decoded_msg): + column_schema_list = [to_dlt_column_schema(c) for c in decoded_msg.columns] + relation = {"name": decoded_msg.relation_name, "columns": column_schema_list} + self.relations[decoded_msg.relation_id] = relation + + + def process_insert(self, decoded_msg): + column_schema_list = self.relations[decoded_msg.relation_id]["columns"] + column_data_list = decoded_msg.new_tuple.column_data + columns = zip(column_schema_list, column_data_list) + + data_item: TDataItem = { + schema["name"]: cast_pg_to_json( + data.col_data, schema["data_type"] + ) for (schema, data) in columns + } + self.data_items.append(data_item) diff --git a/sources/sql_database_pipeline.py b/sources/sql_database_pipeline.py index 3511cd0e1..15f349fe7 100644 --- a/sources/sql_database_pipeline.py +++ b/sources/sql_database_pipeline.py @@ -206,12 +206,79 @@ def read_sql_x( print(info) +def tmp() -> None: + from sql_database.pg_cdc_utils import ( + cdc_rows, + rep_conn, + create_publication, + add_table_to_publication, + create_replication_slot, + get_max_lsn, + ) + + DATABASE = "dlt_data" + USER = "replication_reader" # CREATE USER replication_reader WITH PASSWORD 'replication_reader' LOGIN REPLICATION; + # USER = "loader" + PASSWORD = "replication_reader" + # PASSWORD = "loader" + HOST = "LOCALHOST" + PORT = '5432' + + + conn = rep_conn( + database=DATABASE, + user=USER, + password=PASSWORD, + host=HOST, + port=PORT, + ) + + publication_name = "foo" + table_name = "tmp" + slot_name = "bar" # "foo" + options = {'publication_names': publication_name, 'proto_version': '1'} + + cur = conn.cursor() + # cur.drop_replication_slot(slot_name) + create_replication_slot(slot_name, cur) + create_publication(publication_name, cur) + add_table_to_publication(table_name, publication_name, cur) + max_lsn = get_max_lsn(slot_name, options, cur) + print(max_lsn) + cur.start_replication(slot_name=slot_name, decode=False, options=options) + + assert False + slot_name = "foo" + + for row in cdc_rows(conn): + print(row) + + # pipeline = dlt.pipeline( + # pipeline_name="tmp", + # destination='duckdb', + # dataset_name="tmp", + # pipelines_dir="tmp", + # full_refresh=True, + # ) + + # tbl = sql_table( + # credentials="postgresql://loader:loader@localhost:5432/dlt_data", + # schema="tmp", + # table="tmp", + # ) + + # pipeline.run(tbl) + # print(pipeline.last_trace.last_normalize_info) + # print(pipeline.default_schema.to_pretty_yaml()) + + if __name__ == "__main__": # Load selected tables with different settings # load_select_tables_from_database() # load a table and select columns - select_columns() + # select_columns() + tmp() # Load tables with the standalone table resource # load_standalone_table_resource() From d218a3aec035f24a57fb129d77b7b65222a596a4 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 4 Mar 2024 00:28:46 +0100 Subject: [PATCH 02/38] WIP --- poetry.lock | 85 ++- pyproject.toml | 3 +- sources/.dlt/example.secrets.toml | 5 +- sources/sql_database/exceptions.py | 46 -- sources/sql_database/pg_cdc_utils.py | 250 --------- sources/sql_database/pg_replication/README.md | 13 + .../sql_database/pg_replication/__init__.py | 64 +++ .../sql_database/pg_replication/helpers.py | 528 ++++++++++++++++++ .../pg_replication/schema_types.py | 95 ++++ sources/sql_database/requirements.txt | 4 +- sources/sql_database_pipeline.py | 69 +-- tests/sql_database/pg_replication/__init__.py | 0 tests/sql_database/pg_replication/cases.py | 94 ++++ .../pg_replication/test_pg_replication.py | 157 ++++++ tests/utils.py | 11 +- 15 files changed, 1049 insertions(+), 375 deletions(-) delete mode 100644 sources/sql_database/exceptions.py delete mode 100644 sources/sql_database/pg_cdc_utils.py create mode 100644 sources/sql_database/pg_replication/README.md create mode 100644 sources/sql_database/pg_replication/__init__.py create mode 100644 sources/sql_database/pg_replication/helpers.py create mode 100644 sources/sql_database/pg_replication/schema_types.py create mode 100644 tests/sql_database/pg_replication/__init__.py create mode 100644 tests/sql_database/pg_replication/cases.py create mode 100644 tests/sql_database/pg_replication/test_pg_replication.py diff --git a/poetry.lock b/poetry.lock index 8d68a9f37..41397765b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -970,20 +970,23 @@ files = [ [[package]] name = "dlt" -version = "0.4.4" +version = "0.4.5" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." optional = false python-versions = ">=3.8.1,<3.13" files = [ - {file = "dlt-0.4.4-py3-none-any.whl", hash = "sha256:dfa1d0fd1ba5e2741f0d58314ca56aad26ec25032039bc3fa5d873d4611d8568"}, - {file = "dlt-0.4.4.tar.gz", hash = "sha256:9a9619f78fe06cc157a23179b4fb17a059606e8c980756ea0652b167b91356fa"}, + {file = "dlt-0.4.5-py3-none-any.whl", hash = "sha256:622fb4a687f583efec2b2c6fa4c9561864a7caf620adb3ba4edb13e9df24eeca"}, + {file = "dlt-0.4.5.tar.gz", hash = "sha256:5d6ae7f510084d10c820bc3ca91c1d0708aaf55238d5587ca266583a3f2ea1ae"}, ] [package.dependencies] astunparse = ">=1.6.3" botocore = {version = ">=1.28", optional = true, markers = "extra == \"filesystem\" or extra == \"s3\" or extra == \"athena\""} click = ">=7.1" -duckdb = {version = ">=0.6.1,<0.10.0", optional = true, markers = "extra == \"duckdb\" or extra == \"motherduck\""} +duckdb = [ + {version = ">=0.6.1,<0.10.0", optional = true, markers = "python_version >= \"3.8\" and python_version < \"3.12\" and extra == \"duckdb\" or python_version >= \"3.8\" and python_version < \"3.12\" and extra == \"motherduck\""}, + {version = ">=0.10.0,<0.11.0", optional = true, markers = "python_version >= \"3.12\" and extra == \"duckdb\" or python_version >= \"3.12\" and extra == \"motherduck\""}, +] fsspec = ">=2022.4.0" gcsfs = {version = ">=2022.4.0", optional = true, markers = "extra == \"gcp\" or extra == \"bigquery\" or extra == \"gs\""} gitpython = ">=3.1.29" @@ -1023,11 +1026,11 @@ bigquery = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (> cli = ["cron-descriptor (>=1.2.32)", "pipdeptree (>=2.9.0,<2.10)"] databricks = ["databricks-sql-connector (>=2.9.3,<3.0.0)"] dbt = ["dbt-athena-community (>=1.2.0)", "dbt-bigquery (>=1.2.0)", "dbt-core (>=1.2.0)", "dbt-databricks (>=1.7.3,<2.0.0)", "dbt-duckdb (>=1.2.0)", "dbt-redshift (>=1.2.0)", "dbt-snowflake (>=1.2.0)"] -duckdb = ["duckdb (>=0.6.1,<0.10.0)"] +duckdb = ["duckdb (>=0.10.0,<0.11.0)", "duckdb (>=0.6.1,<0.10.0)"] filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] gcp = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"] gs = ["gcsfs (>=2022.4.0)"] -motherduck = ["duckdb (>=0.6.1,<0.10.0)", "pyarrow (>=12.0.0)"] +motherduck = ["duckdb (>=0.10.0,<0.11.0)", "duckdb (>=0.6.1,<0.10.0)", "pyarrow (>=12.0.0)"] mssql = ["pyodbc (>=4.0.39,<5.0.0)"] parquet = ["pyarrow (>=12.0.0)"] postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] @@ -1138,6 +1141,62 @@ files = [ {file = "duckdb-0.8.1.tar.gz", hash = "sha256:a54d37f4abc2afc4f92314aaa56ecf215a411f40af4bffe1e86bd25e62aceee9"}, ] +[[package]] +name = "duckdb" +version = "0.10.0" +description = "DuckDB in-process database" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bd0ffb3fddef0f72a150e4d76e10942a84a1a0447d10907df1621b90d6668060"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3d709d5c7c1a12b5e10d0b05fa916c670cd2b50178e3696faa0cc16048a1745"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9114aa22ec5d591a20ce5184be90f49d8e5b5348ceaab21e102c54560d07a5f8"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a37877efadf39caf7cadde0f430fedf762751b9c54750c821e2f1316705a21"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87cbc9e1d9c3fc9f14307bea757f99f15f46843c0ab13a6061354410824ed41f"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0bfec79fed387201550517d325dff4fad2705020bc139d936cab08b9e845662"}, + {file = "duckdb-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5622134d2d9796b15e09de810e450859d4beb46d9b861357ec9ae40a61b775c"}, + {file = "duckdb-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:089ee8e831ccaef1b73fc89c43b661567175eed0115454880bafed5e35cda702"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a05af63747f1d7021995f0811c333dee7316cec3b06c0d3e4741b9bdb678dd21"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:072d6eba5d8a59e0069a8b5b4252fed8a21f9fe3f85a9129d186a39b3d0aea03"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a77b85668f59b919042832e4659538337f1c7f197123076c5311f1c9cf077df7"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96a666f1d2da65d03199a977aec246920920a5ea1da76b70ae02bd4fb1ffc48c"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ec76a4262b783628d26612d184834852d9c92fb203e91af789100c17e3d7173"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009dd9d2cdbd3b061a9efbdfc79f2d1a8377bcf49f1e5f430138621f8c083a6c"}, + {file = "duckdb-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:878f06766088090dad4a2e5ee0081555242b2e8dcb29415ecc97e388cf0cf8d8"}, + {file = "duckdb-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:713ff0a1fb63a6d60f454acf67f31656549fb5d63f21ac68314e4f522daa1a89"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9c0ee450dfedfb52dd4957244e31820feef17228da31af6d052979450a80fd19"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ff79b2ea9994398b545c0d10601cd73565fbd09f8951b3d8003c7c5c0cebc7cb"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bdf1aa71b924ef651062e6b8ff9981ad85bec89598294af8a072062c5717340"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0265bbc8216be3ced7b377ba8847128a3fc0ef99798a3c4557c1b88e3a01c23"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d418a315a07707a693bd985274c0f8c4dd77015d9ef5d8d3da4cc1942fd82e0"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2828475a292e68c71855190b818aded6bce7328f79e38c04a0c75f8f1c0ceef0"}, + {file = "duckdb-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c3aaeaae2eba97035c65f31ffdb18202c951337bf2b3d53d77ce1da8ae2ecf51"}, + {file = "duckdb-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c51790aaaea97d8e4a58a114c371ed8d2c4e1ca7cbf29e3bdab6d8ccfc5afc1e"}, + {file = "duckdb-0.10.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8af1ae7cc77a12206b6c47ade191882cc8f49f750bb3e72bb86ac1d4fa89926a"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4f7e8e8dc0e376aeb280b83f2584d0e25ec38985c27d19f3107b2edc4f4a97"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae942a79fad913defa912b56483cd7827a4e7721f4ce4bc9025b746ecb3c89"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01b57802898091455ca2a32c1335aac1e398da77c99e8a96a1e5de09f6a0add9"}, + {file = "duckdb-0.10.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:52e1ad4a55fa153d320c367046b9500578192e01c6d04308ba8b540441736f2c"}, + {file = "duckdb-0.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:904c47d04095af745e989c853f0bfc0776913dfc40dfbd2da7afdbbb5f67fed0"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:184ae7ea5874f3b8fa51ab0f1519bdd088a0b78c32080ee272b1d137e2c8fd9c"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd33982ecc9bac727a032d6cedced9f19033cbad56647147408891eb51a6cb37"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f59bf0949899105dd5f8864cb48139bfb78454a8c017b8258ba2b5e90acf7afc"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:395f3b18948001e35dceb48a4423d574e38656606d033eef375408b539e7b076"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b8eb2b803be7ee1df70435c33b03a4598cdaf676cd67ad782b288dcff65d781"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31b2ddd331801064326c8e3587a4db8a31d02aef11332c168f45b3bd92effb41"}, + {file = "duckdb-0.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c8b89e76a041424b8c2026c5dc1f74b53fbbc6c6f650d563259885ab2e7d093d"}, + {file = "duckdb-0.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:79084a82f16c0a54f6bfb7ded5600400c2daa90eb0d83337d81a56924eaee5d4"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:79799b3a270dcd9070f677ba510f1e66b112df3068425691bac97c5e278929c7"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8fc394bfe3434920cdbcfbdd0ac3ba40902faa1dbda088db0ba44003a45318a"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c116605551b4abf5786243a59bcef02bd69cc51837d0c57cafaa68cdc428aa0c"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3191170c3b0a43b0c12644800326f5afdea00d5a4621d59dbbd0c1059139e140"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fee69a50eb93c72dc77e7ab1fabe0c38d21a52c5da44a86aa217081e38f9f1bd"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5f449e87dacb16b0d145dbe65fa6fdb5a55b2b6911a46d74876e445dd395bac"}, + {file = "duckdb-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4487d0df221b17ea4177ad08131bc606b35f25cfadf890987833055b9d10cdf6"}, + {file = "duckdb-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:c099ae2ff8fe939fda62da81704f91e2f92ac45e48dc0e37c679c9d243d01e65"}, + {file = "duckdb-0.10.0.tar.gz", hash = "sha256:c02bcc128002aa79e3c9d89b9de25e062d1096a8793bc0d7932317b7977f6845"}, +] + [[package]] name = "et-xmlfile" version = "1.1.0" @@ -4259,6 +4318,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5106,6 +5166,17 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-psycopg2" +version = "2.9.21.20240218" +description = "Typing stubs for psycopg2" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-psycopg2-2.9.21.20240218.tar.gz", hash = "sha256:3084cd807038a62c80fb5be78b41d855b48a060316101ea59fd85c302efb57d4"}, + {file = "types_psycopg2-2.9.21.20240218-py3-none-any.whl", hash = "sha256:cac96264e063cbce28dee337a973d39e6df4ca671252343cb4f8e5ef6db5e67d"}, +] + [[package]] name = "types-pytz" version = "2023.3.1.1" @@ -5853,4 +5924,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "914945af0371b45dcaf91021d80adf793bd9732f686e94a9b6fd06b3783480f4" +content-hash = "96e90dd7c2bc7d9d152dd2180ed4c41a4892cd3678817143fd6d78dc05f5b9fd" diff --git a/pyproject.toml b/pyproject.toml index 886293ae7..1178ba0a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ packages = [{include = "sources"}] [tool.poetry.dependencies] python = ">=3.8.1,<3.13" -dlt = {version = "0.4.4", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb", "s3", "gs"]} +dlt = {version = "0.4.5", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb", "s3", "gs"]} [tool.poetry.group.dev.dependencies] mypy = "1.6.1" @@ -31,6 +31,7 @@ black = "^23.3.0" pypdf2 = "^3.0.1" greenlet = "<3.0.0" confluent-kafka = "^2.3.0" +types-psycopg2 = "^2.9.0" [tool.poetry.group.sql_database.dependencies] sqlalchemy = ">=1.4" diff --git a/sources/.dlt/example.secrets.toml b/sources/.dlt/example.secrets.toml index 63126e597..343af2cb4 100644 --- a/sources/.dlt/example.secrets.toml +++ b/sources/.dlt/example.secrets.toml @@ -19,4 +19,7 @@ location = "US" ## chess pipeline # the section below defines secrets for "chess_dlt_config_example" source in chess/__init__.py [sources.chess] -secret_str="secret string" # a string secret \ No newline at end of file +secret_str="secret string" # a string secret + +## local postgres as source +sql_database.credentials="postgresql://loader:loader@localhost:5432/dlt_data" \ No newline at end of file diff --git a/sources/sql_database/exceptions.py b/sources/sql_database/exceptions.py deleted file mode 100644 index 6b23bc4b0..000000000 --- a/sources/sql_database/exceptions.py +++ /dev/null @@ -1,46 +0,0 @@ -class ReplicationConnectionError(Exception): - def __init__(self, msg: str = "") -> None: - msg = "Failed creating a connection for logical replication. " + msg - super().__init__(msg) - - -class CreatePublicationError(Exception): - def __init__(self, msg: str = "") -> None: - msg = "Failed creating publication for logical replication. " + msg - super().__init__(msg) - - -class CreatePublicationInsufficientPrivilegeError(CreatePublicationError): - def __init__(self, user: str, database: str) -> None: - self.user = user - self.database = database - super().__init__( - f'Make sure the user "{user}" has the CREATE privilege for database "{database}".' - ) - - -class AddTableToPublicationError(Exception): - def __init__(self, table_name:str, publication_name: str, msg: str = "") -> None: - self.table_name = table_name - self.publication_name = publication_name - msg = f'Failed adding table "{table_name}" to publication "{publication_name}". ' + msg - super().__init__(msg) - - -class AddTableToPublicationInsufficientPrivilegeError(AddTableToPublicationError): - def __init__(self, table_name:str, publication_name: str, user: str) -> None: - self.table_name = table_name - self.publication_name = publication_name - super().__init__( - table_name, - publication_name, - f'Make sure the user "{user}" is owner of table "{table_name}".' - ) - - -class ReplicationSlotDoesNotExistError(Exception): - def __init__(self, slot_name: str) -> None: - self.slot_name = slot_name - super().__init__( - f'The replication slot "{slot_name}" does not exist on the Postgres instance.' - ) \ No newline at end of file diff --git a/sources/sql_database/pg_cdc_utils.py b/sources/sql_database/pg_cdc_utils.py deleted file mode 100644 index 863bdb7b8..000000000 --- a/sources/sql_database/pg_cdc_utils.py +++ /dev/null @@ -1,250 +0,0 @@ -from typing import Optional, Any, Dict - -import psycopg2 -from psycopg2.extras import LogicalReplicationConnection, ReplicationCursor -import pypgoutput -from pypgoutput.decoders import ( - Begin, Commit, Relation, Insert, Update, Delete, Truncate -) - -from dlt.common import logger -from dlt.common.typing import TDataItem, TDataItems -from dlt.common.schema.typing import TColumnSchema -from dlt.common.data_types.typing import TDataType - -from .exceptions import ( - ReplicationConnectionError, - CreatePublicationError, - CreatePublicationInsufficientPrivilegeError, - AddTableToPublicationError, - AddTableToPublicationInsufficientPrivilegeError, - ReplicationSlotDoesNotExistError, -) - - -def pg_to_dlt_type_mapper(pg_type_oid: int) -> TDataType: - """Maps postgres data type OID to dlt data type.""" - - def psy_to_dlt_type_mapper(psycopg2_type) -> TDataType: - """Maps psycopg2 data type to dlt data type.""" - - if psycopg2_type == psycopg2._psycopg.BOOLEAN: - return "bool" - return "bigint" - - psy = psycopg2.extensions.string_types[pg_type_oid] - return psy_to_dlt_type_mapper(psy) - - -def to_dlt_column_schema(col: pypgoutput.decoders.ColumnType) -> TColumnSchema: - """Converts pypgoutput ColumnType to dlt column schema.""" - - return { - "name": col.name, - "primary_key": bool(col.part_of_pkey), - "data_type": pg_to_dlt_type_mapper(col.type_id), - # "atttypmod": col.atttypmod - } - - -def cast_pg_to_json(val: Optional[str], data_type: TDataType) -> Any: - """Converts pgoutput's value string representation into JSON value.""" - - if data_type == "bool": - if val == "t": - return True - elif val == "f": - return False - return None - return val - - -def rep_conn( - database: str, user: str, password: str, host: str, port: str = 5432 -) -> psycopg2._psycopg.connection: - """Returns Postgres replication connection. - - Raises an error if the user does not have the LOGIN or REPLICATION attribute assigned. - """ - - try: - return psycopg2.connect( - database=database, - user=user, - password=password, - host=host, - port=port, - connection_factory=LogicalReplicationConnection, - ) - except psycopg2.OperationalError as e: - raise ReplicationConnectionError( - f'Make sure the user "{user}" has the LOGIN and REPLICATION attributes assigned.' - ) from e - except Exception as e: - raise ReplicationConnectionError from e - - -def get_dsn_dict(conn: psycopg2._psycopg.connection) -> Dict[str, str]: - """Returns connection DSN as dictionary.""" - - return dict([pair.split("=") for pair in conn.dsn.split(" ")]) - - -def create_publication(name: str, cur: ReplicationCursor) -> None: - """Creates a publication for logical replication if it doesn't exist yet. - - Raises error if the user does not have the CREATE privilege for the database. - """ - - try: - cur.execute(f"CREATE PUBLICATION {name};") - except psycopg2.errors.DuplicateObject: # the publication already exists - pass - except psycopg2.errors.InsufficientPrivilege as e: - dsn = get_dsn_dict(cur.connection) - raise CreatePublicationInsufficientPrivilegeError(dsn["user"], dsn["dbname"]) from e - except Exception as e: - raise CreatePublicationError from e - - -def add_table_to_publication(name: str, publication_name: str, cur: ReplicationCursor) -> None: - """Adds a table to a publication for logical replication if the table is not a member yet. - - Raises error if the user is not owner of the table. - """ - - try: - cur.execute(f"ALTER PUBLICATION {publication_name} ADD TABLE {name};") - except psycopg2.errors.DuplicateObject: # table is already member of publication - pass - except psycopg2.errors.InsufficientPrivilege as e: - raise AddTableToPublicationInsufficientPrivilegeError( - name, publication_name, get_dsn_dict(cur.connection)["user"] - ) from e - except Exception as e: - raise AddTableToPublicationError(name, publication_name) from e - - -def create_replication_slot(name: str, cur: ReplicationCursor, output_plugin: str = "pgoutput") -> None: - """Creates a replication slot if it doesn't exist yet.""" - - try: - cur.create_replication_slot(name, output_plugin=output_plugin) - except psycopg2.errors.DuplicateObject: # the replication slot already exists - pass - - -def get_max_lsn(slot_name: str, options: Dict[str, str], cur: ReplicationCursor) -> Optional[int]: - """Returns maximum Log Sequence Number (LSN) in replication slot. - - Returns None if the replication slot is empty. - - Raises error if the replication slot does not exist. - """ - - options_str = ", ".join( - f"'{x}'" for xs in list(map(list, options.items())) for x in xs # comma-separated value string - ) - try: - cur.execute( - # subtract '0/0' to convert pg_lsn type to int (https://stackoverflow.com/a/73738472) - "SELECT MAX(lsn) - '0/0' AS max_lsn " - f"FROM pg_logical_slot_peek_binary_changes('{slot_name}', NULL, NULL, {options_str});" - ) - return cur.fetchone()[0] - except psycopg2.errors.UndefinedObject as e: - raise ReplicationSlotDoesNotExistError(slot_name) from e - - -def cdc_rows( - conn: LogicalReplicationConnection, - publication_name: str, - slot_name: str, - upto_lsn: int, -) -> TDataItems: - - - cur = conn.cursor() - - - - - # drop_slot = False - # if drop_slot: - # try: - # cur.drop_replication_slot(slot_name) - # except psycopg2.ProgrammingError as e: - # print("e:", e) - - - options = {'publication_names': publication_name, 'proto_version': '1'} - # try: - # cur.start_replication(slot_name=slot_name, decode=False, options=options) - # except psycopg2.ProgrammingError: - # cur.create_replication_slot(slot_name, output_plugin='pgoutput') - # cur.start_replication(slot_name=slot_name, decode=False, options=options) - - - consumer = ReplicationConsumer(upto_lsn) - - try: - if consumer.upto_lsn is not None: - cur.consume_stream(consumer) - except psycopg2.extras.StopReplication: - print("StopReplication") - finally: - cur.close() - yield consumer.data_items - - -class ReplicationConsumer(object): - def __init__(self, upto_lsn): - self.upto_lsn = upto_lsn - self.relations = dict() - self.data_items: TDataItems = [] - - - def __call__(self, msg: psycopg2.extras.ReplicationMessage): - self.process_msg(msg) - # msg.cursor.send_feedback(flush_lsn=msg.data_start, force=True) - if msg.data_start == self.upto_lsn: - raise psycopg2.extras.StopReplication - - - def process_msg(self, msg: psycopg2.extras.ReplicationMessage): - op = (msg.payload[:1]).decode('utf-8') - if op == 'B': - print(Begin(msg.payload)) - elif op == "C": - print(Commit(msg.payload)) - elif op == "R": - self.process_relation(Relation(msg.payload)) - elif op == "I": - self.process_insert(Insert(msg.payload)) - elif op == "U": - print(Update(msg.payload)) - elif op == 'D': - print(Delete(msg.payload)) - elif op == 'T': - print(Truncate(msg.payload)) - else: - pass - - - def process_relation(self, decoded_msg): - column_schema_list = [to_dlt_column_schema(c) for c in decoded_msg.columns] - relation = {"name": decoded_msg.relation_name, "columns": column_schema_list} - self.relations[decoded_msg.relation_id] = relation - - - def process_insert(self, decoded_msg): - column_schema_list = self.relations[decoded_msg.relation_id]["columns"] - column_data_list = decoded_msg.new_tuple.column_data - columns = zip(column_schema_list, column_data_list) - - data_item: TDataItem = { - schema["name"]: cast_pg_to_json( - data.col_data, schema["data_type"] - ) for (schema, data) in columns - } - self.data_items.append(data_item) diff --git a/sources/sql_database/pg_replication/README.md b/sources/sql_database/pg_replication/README.md new file mode 100644 index 000000000..ba8d2bed9 --- /dev/null +++ b/sources/sql_database/pg_replication/README.md @@ -0,0 +1,13 @@ +## Prerequisites + +The Postgres user needs to have the `LOGIN` and `REPLICATION` attributes assigned: + +```sql +CREATE ROLE replication_user WITH LOGIN REPLICATION; +``` + +It also needs `CREATE` privilege on the database: + +```sql +GRANT CREATE ON DATABASE dlt_data TO replication_user; +``` diff --git a/sources/sql_database/pg_replication/__init__.py b/sources/sql_database/pg_replication/__init__.py new file mode 100644 index 000000000..3e3a1e133 --- /dev/null +++ b/sources/sql_database/pg_replication/__init__.py @@ -0,0 +1,64 @@ +from typing import Optional, Sequence + +import dlt + +from dlt.common.schema.typing import TColumnNames +from dlt.sources import DltResource +from dlt.sources.credentials import ConnectionStringCredentials + +from .helpers import table_replication_items, TableChangesResourceConfiguration + + +@dlt.sources.config.with_config( + sections=("sources", "sql_database"), + spec=TableChangesResourceConfiguration, +) +def table_changes( + credentials: ConnectionStringCredentials = dlt.secrets.value, + table: str = dlt.config.value, + primary_key: TColumnNames = None, + include_columns: Optional[Sequence[str]] = dlt.config.value, + slot_name: str = dlt.config.value, + publication_name: str = dlt.config.value, + upto_lsn: Optional[int] = None, +) -> DltResource: + """Returns a dlt resource that yields data items for changes in a postgres table. + + Relies on a dedicated replication slot and publication that publishes DML + operations (i.e. `insert`, `update`, and/or `delete`) for the table (helper + method `init_table_replication` can be used to set this up). + Uses `merge` write disposition to merge changes into destination table(s). + + Args: + credentials (ConnectionStringCredentials): Postgres database credentials. + table (str): Name of the table that is replicated + primary_key (TColumnNames): Names of one or multiple columns serving as + primary key on the table. Used to deduplicate data items in the `merge` + operation. + include_columns (Optional[Sequence[str]]): Optional sequence of names of + columns to include in the generated data itemes. Any columns not in the + sequence are excluded. If not provided, all columns are included. + slot_name (str): Name of the replication slot to consume replication + messages from. Each table is expected to have a dedicated slot. + publication_name (str): Name of the publication that published DML operations + for the table. Each table is expected to have a dedicated publication. + upto_lsn Optional[int]: Optional integer LSN value upto which the replication + slot is consumed. If not provided, all messages in the slot are consumed, + ensuring all new changes in the source table are included. + + Returns: + DltResource that yields data items for changes in the postgres table. + """ + return dlt.resource( + table_replication_items, + name=table, + write_disposition="merge", + primary_key=primary_key, + columns={"lsn": {"dedup_sort": "desc"}}, + )( + credentials=credentials, + slot_name=slot_name, + publication_name=publication_name, + include_columns=include_columns, + upto_lsn=upto_lsn, + ) diff --git a/sources/sql_database/pg_replication/helpers.py b/sources/sql_database/pg_replication/helpers.py new file mode 100644 index 000000000..fea9ca950 --- /dev/null +++ b/sources/sql_database/pg_replication/helpers.py @@ -0,0 +1,528 @@ +from typing import Optional, Dict, Iterator, Union, List, Tuple, Sequence, Any +from dlt.common.pendulum import pendulum +from copy import deepcopy +from dataclasses import dataclass, field + +import psycopg2 +from psycopg2.extras import ( + LogicalReplicationConnection, + ReplicationCursor, + ReplicationMessage, + StopReplication, +) +from pypgoutput.decoders import ( # type: ignore[import-untyped] + Begin, + Commit, + Relation, + Insert, + Update, + Delete, + Truncate, + ColumnData, +) + +import dlt + +from dlt.common import logger +from dlt.common.typing import TDataItem +from dlt.common.schema.typing import TTableSchema, TTableSchemaColumns +from dlt.common.data_writers.escape import escape_postgres_identifier +from dlt.common.configuration.specs import BaseConfiguration, configspec +from dlt.extract.typing import DataItemWithMeta +from dlt.sources import DltResource +from dlt.sources.credentials import ConnectionStringCredentials + +from .. import sql_table + +from .schema_types import _to_dlt_column_schema, _to_dlt_val + + +@configspec +class InitTableReplicationConfiguration(BaseConfiguration): + credentials: ConnectionStringCredentials + table: str + schema: str + include_columns: Optional[Sequence[str]] + + +@configspec +class TableChangesResourceConfiguration(BaseConfiguration): + credentials: ConnectionStringCredentials + table: str + slot_name: str + publication_name: str + + +@dlt.sources.config.with_config( + sections=("sources", "sql_database"), + spec=InitTableReplicationConfiguration, +) +def init_table_replication( + credentials: ConnectionStringCredentials = dlt.secrets.value, + table: str = dlt.config.value, + schema: str = dlt.config.value, + publish: str = "insert, update, delete", + persist_snapshot: bool = False, + include_columns: Optional[Sequence[str]] = dlt.config.value, + reset: bool = False, +) -> Tuple[str, str, Optional[DltResource]]: + """Initializes replication for a table. + + Creates a replication slot and publication dedicated to the table if they do not exist yet. + Does nothing if the slot and publication already exist and `reset` is set to `False`. + + Args: + credentials (ConnectionStringCredentials): Postgres database credentials. + table (str): Name of the table to initialize replication for. + schema (str): Name of the schema the table belongs to. + publish (str): Comma-separated string of DML operations. Can be used to + control which changes are included in the publication. Allowed operations + are `insert`, `update`, and `delete`. `truncate` is currently not + supported—messages of that type are ignored. + E.g. `publish="insert"` will create a publication that only publishes insert operations. + persist_snapshot (bool): Whether the table state in the exported snapshot + is persisted to a table. If true, a snapshot table is created in Postgres + and a DltResource object for this table is returned. The DltResource + can be used to perform an initial load of all data present in the table + at the moment the replication slot got created. + include_columns (Optional[Sequence[str]]): Optional sequence of names of + columns to include in the snapshot table. Any columns not in the sequence + are excluded. If not provided, all columns are included in the table. + reset (bool): Whether replication for the table is reset. Only relevant + if a replication slot and publication already exist for the table. If + set to True, the existing slot and publication are dropped and recreated. + + Returns: + Tuple with the names of the created slot and publication, and optionally + a table snapshot resource. + """ + slot_name, publication_name = _gen_table_replication_references(table, schema) + cur = _get_rep_conn(credentials).cursor() + if reset: + drop_replication_slot(slot_name, cur) + drop_publication(publication_name, cur) + create_publication(publication_name, cur, publish) + add_table_to_publication(table, schema, publication_name, cur) + rep_slot = create_replication_slot(slot_name, cur) + table_snapshot: DltResource = None + if persist_snapshot: + if rep_slot is None: + logger.warning( + "Cannot persist snapshot because it does not exist. " + f'The replication slot "{slot_name}" already existed prior to calling this function.' + ) + else: + # need separate session to read the snapshot: https://stackoverflow.com/q/75852587 + cur_snap = _get_conn(credentials).cursor() + snapshot_table_name = persist_snapshot_table( + snapshot_name=rep_slot["snapshot_name"], + table=table, + schema=schema, + cur=cur_snap, + include_columns=include_columns, + ) + table_snapshot = sql_table( + credentials=credentials, + table=snapshot_table_name, + schema=schema, + ) + table_snapshot.apply_hints(table_name=table) + return (slot_name, publication_name, table_snapshot) + + +def create_publication( + name: str, + cur: ReplicationCursor, + publish: str = "insert, update, delete", +) -> None: + """Creates a publication for logical replication if it doesn't exist yet. + + Does nothing if the publication already exists. + Raises error if the user does not have the CREATE privilege for the database. + """ + esc_name = escape_postgres_identifier(name) + try: + cur.execute(f"CREATE PUBLICATION {esc_name} WITH (publish = '{publish}');") + logger.info( + f"Successfully created publication {esc_name} with publish = '{publish}'." + ) + except psycopg2.errors.DuplicateObject: # the publication already exists + logger.info(f'Publication "{name}" already exists.') + + +def add_table_to_publication( + table_name: str, + schema_name: str, + publication_name: str, + cur: ReplicationCursor, +) -> None: + """Adds a table to a publication for logical replication if the table is not a member yet. + + Raises error if the user is not owner of the table. + """ + qual_name = _make_qualified_table_name(table_name, schema_name) + esc_pub_name = escape_postgres_identifier(publication_name) + try: + cur.execute(f"ALTER PUBLICATION {esc_pub_name} ADD TABLE {qual_name};") + logger.info( + f"Successfully added table {qual_name} to publication {esc_pub_name}." + ) + except psycopg2.errors.DuplicateObject: # table is already member of publication + pass + + +def create_replication_slot( # type: ignore[return] + name: str, cur: ReplicationCursor, output_plugin: str = "pgoutput" +) -> Optional[Dict[str, str]]: + """Creates a replication slot if it doesn't exist yet.""" + try: + cur.create_replication_slot(name, output_plugin=output_plugin) + logger.info(f'Successfully created replication slot "{name}".') + result = cur.fetchone() + return { + "slot_name": result[0], + "consistent_point": result[1], + "snapshot_name": result[2], + "output_plugin": result[3], + } + except psycopg2.errors.DuplicateObject: # the replication slot already exists + logger.info( + f'Replication slot "{name}" cannot be created because it already exists.' + ) + + +def drop_replication_slot(name: str, cur: ReplicationCursor) -> None: + """Drops a replication slot if it exists.""" + try: + cur.drop_replication_slot(name) + logger.warning(f'Successfully dropped replication slot "{name}".') + except psycopg2.errors.UndefinedObject: # the replication slot does not exist + logger.warning( + f'Replication slot "{name}" cannot be dropped because it does not exist.' + ) + + +def drop_publication(name: str, cur: ReplicationCursor) -> None: + """Drops a publication if it exists.""" + esc_name = escape_postgres_identifier(name) + try: + cur.execute(f"DROP PUBLICATION {esc_name};") + cur.connection.commit() + logger.warning(f"Successfully dropped publication {esc_name}.") + except psycopg2.errors.UndefinedObject: # the publication does not exist + logger.warning( + f"Publication {esc_name} cannot be dropped because it does not exist." + ) + + +def persist_snapshot_table( + snapshot_name: str, + table: str, + schema: str, + cur: psycopg2.extensions.cursor, + include_columns: Optional[Sequence[str]] = None, +) -> str: + """Persists exported snapshot table state. + + Reads snapshot table content and copies it into new table. + """ + col_str = "*" + if include_columns is not None: + col_str = ", ".join(map(escape_postgres_identifier, include_columns)) + snapshot_table_name = f"{table}_snapshot_{snapshot_name}" + snapshot_qual_name = _make_qualified_table_name(snapshot_table_name, schema) + qual_name = _make_qualified_table_name(table, schema) + cur.execute( + f""" + START TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SET TRANSACTION SNAPSHOT '{snapshot_name}'; + CREATE TABLE {snapshot_qual_name} AS SELECT {col_str} FROM {qual_name}; + """ + ) + cur.connection.commit() + logger.info(f"Successfully persisted snapshot table state in {snapshot_qual_name}.") + return snapshot_table_name + + +def get_max_lsn( + slot_name: str, + options: Dict[str, str], + credentials: ConnectionStringCredentials, +) -> Optional[int]: + """Returns maximum Log Sequence Number (LSN) in replication slot. + + Returns None if the replication slot is empty. + Does not consume the slot, i.e. messages are not flushed. + Raises error if the replication slot or publication does not exist. + """ + # comma-separated value string + options_str = ", ".join( + f"'{x}'" for xs in list(map(list, options.items())) for x in xs # type: ignore[arg-type] + ) + cur = _get_conn(credentials).cursor() + cur.execute( + "SELECT MAX(lsn) - '0/0' AS max_lsn " # subtract '0/0' to convert pg_lsn type to int (https://stackoverflow.com/a/73738472) + f"FROM pg_logical_slot_peek_binary_changes('{slot_name}', NULL, NULL, {options_str});" + ) + lsn: int = cur.fetchone()[0] + cur.connection.close() + return lsn + + +def lsn_int_to_hex(lsn: int) -> str: + """Convert integer LSN to postgres' hexadecimal representation.""" + # https://stackoverflow.com/questions/66797767/lsn-external-representation. + hex_val = format(lsn, "x") + if len(hex_val) > 8: + return hex_val[:-8] + "/" + hex_val[-8:] + return "0/" + format(lsn, "x").zfill(8) + + +def advance_slot( + upto_lsn: int, + slot_name: str, + credentials: ConnectionStringCredentials, +) -> None: + """Advances position in the replication slot. + + Flushes all messages upto (and including) the message with LSN = `upto_lsn`. + This function is used as alternative to psycopg2's `send_feedback` method, because + the behavior of that method seems odd when used outside of `consume_stream`. + """ + if upto_lsn != 0: + cur = _get_conn(credentials).cursor() + cur.execute( + f"SELECT * FROM pg_replication_slot_advance('{slot_name}', '{lsn_int_to_hex(upto_lsn)}');" + ) + cur.connection.close() + + +def table_replication_items( + credentials: ConnectionStringCredentials, + slot_name: str, + publication_name: str, + include_columns: Optional[Sequence[str]] = None, + upto_lsn: Optional[int] = None, +) -> Iterator[Union[TDataItem, DataItemWithMeta]]: + """Yields data items from generator. + + Maintains LSN of last consumed message in state to track progress. + At start of the run, advances the slot upto last consumed message in previous run. + """ + # advance slot + flush_lsn = dlt.current.resource_state().setdefault("last_commit_lsn", 0) + advance_slot(flush_lsn, slot_name, credentials) + + # use max LSN in slot if `upto_lsn` was not provided + options = {"publication_names": publication_name, "proto_version": "1"} + if upto_lsn is None: + upto_lsn = get_max_lsn(slot_name, options, credentials) + + # check if there's anything to consume + if upto_lsn is not None and upto_lsn > flush_lsn: + gen = ItemGenerator(credentials, slot_name, options, upto_lsn, include_columns) + yield from gen + dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + + +def _get_conn( + credentials: ConnectionStringCredentials, connection_factory: Optional[Any] = None +) -> Union[psycopg2.extensions.connection, LogicalReplicationConnection]: + # returns a psycopg2 connection + return psycopg2.connect( # type: ignore[no-any-return] + database=credentials.database, + user=credentials.username, + password=credentials.password, + host=credentials.host, + port=credentials.port, + connection_factory=connection_factory, + ) + + +def _get_rep_conn( + credentials: ConnectionStringCredentials, +) -> LogicalReplicationConnection: + # returns a psycopg2 LogicalReplicationConnection + return _get_conn(credentials, LogicalReplicationConnection) # type: ignore[return-value] + + +def _make_qualified_table_name(table_name: str, schema_name: str) -> str: + return ( + escape_postgres_identifier(schema_name) + + "." + + escape_postgres_identifier(table_name) + ) + + +def _gen_table_replication_references( + table_name: str, schema_name: str +) -> Tuple[str, str]: + # generate replication slot and publication names dedicated to a single table + slot_name = f"_dlt_slot_{schema_name}_{table_name}" + publication_name = f"_dlt_pub_{schema_name}_{table_name}" + return (slot_name, publication_name) + + +@dataclass +class ItemGenerator: + credentials: ConnectionStringCredentials + slot_name: str + options: Dict[str, str] + upto_lsn: Optional[int] = None + include_columns: Optional[Sequence[str]] = None + last_commit_lsn: Optional[int] = field(default=None, init=False) + + def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: + """Consumes messages from replication slot and generates data items. + + Does not advance the slot. + Maintains LSN of last consumed Commit message in object state. + """ + cur = _get_rep_conn(self.credentials).cursor() + try: + consumed_all = False + consumer = MessageConsumer(self.upto_lsn, self.include_columns) + cur.start_replication( + slot_name=self.slot_name, decode=False, options=self.options + ) + cur.consume_stream(consumer) + except StopReplication: + # all messages upto `upto_lsn` have been successfully consumed + consumed_all = True + finally: + cur.connection.close() + if consumed_all: + for i in consumer.data_items: + yield i + self.last_commit_lsn = consumer.last_commit_lsn + + +class MessageConsumer: + """Consumes messages from a ReplicationCursor. + + Assumes all messages passed to __call__ have the same relation_id, i.e. they + belong to the same table. + """ + + def __init__( + self, + upto_lsn: int, + include_columns: Optional[Sequence[str]] = None, + ) -> None: + self.upto_lsn = upto_lsn + self.include_columns = include_columns + + # data_items attribute maintains all data items + self.data_items: List[Union[TDataItem, DataItemWithMeta]] = [] + # other attributes only maintain last-seen values + self.last_table_schema: TTableSchema + self.last_commit_ts: pendulum.DateTime + self.last_commit_lsn = None + + def __call__(self, msg: ReplicationMessage) -> None: + """Processes message received from stream. + + Breaks out of stream when `upto_lsn` is reached. + """ + self.process_msg(msg) + if msg.data_start == self.upto_lsn: + raise StopReplication + + def process_msg(self, msg: ReplicationMessage) -> None: + """Processes encoded replication message. + + Identifies message type and decodes accordingly. + Message treatment is different for various message types. + """ + op = (msg.payload[:1]).decode("utf-8") + if op == "B": + self.last_commit_ts = Begin(msg.payload).commit_ts + elif op == "C": + self.last_commit_lsn = msg.data_start + elif op == "R": + self.process_relation(Relation(msg.payload)) + elif op == "I": + self.process_change(Insert(msg.payload), msg.data_start) + elif op == "U": + self.process_change(Update(msg.payload), msg.data_start) + elif op == "D": + self.process_change(Delete(msg.payload), msg.data_start) + elif op == "T": + logger.warning( + "The truncate operation is currently not supported. " + "Truncate replication messages are ignored." + ) + + def process_relation(self, decoded_msg: Relation) -> None: + """Processes a replication message of type Relation. + + Stores table schema information from Relation message in object state + and adds meta data item to the relation's list of data items to update + the table schema. + """ + # store table schema information + columns = {c.name: _to_dlt_column_schema(c) for c in decoded_msg.columns} + self.last_table_schema = {"name": decoded_msg.relation_name, "columns": columns} + # add meta data item to update table schema + meta_data_item = self.gen_meta_data_item(columns, self.include_columns) + self.data_items.append(meta_data_item) + + def process_change( + self, decoded_msg: Union[Insert, Update, Delete], msg_start_lsn: int + ) -> None: + """Processes replication message of type Insert, Update, or Delete. + + Adds data item for inserted/updated/deleted record to instance attribute. + """ + if isinstance(decoded_msg, (Insert, Update)): + column_data = decoded_msg.new_tuple.column_data + elif isinstance(decoded_msg, Delete): + column_data = decoded_msg.old_tuple.column_data + data_item = self.gen_data_item( + data=column_data, + schema=self.last_table_schema["columns"], + lsn=msg_start_lsn, + commit_ts=self.last_commit_ts, + for_delete=isinstance(decoded_msg, Delete), + include_columns=self.include_columns, + ) + self.data_items.append(data_item) + + @staticmethod + def gen_data_item( + data: List[ColumnData], + schema: TTableSchemaColumns, + lsn: int, + commit_ts: pendulum.DateTime, + for_delete: bool, + include_columns: Optional[Sequence[str]] = None, + ) -> TDataItem: + """Generates data item from replication message data and corresponding metadata.""" + columns = zip(schema.values(), data) + data_item = { + schema["name"]: _to_dlt_val( + data.col_data, schema["data_type"], data.col_data_category + ) + for (schema, data) in columns + } + if include_columns is not None: + data_item = {k: v for k, v in data_item.items() if k in include_columns} + data_item["lsn"] = lsn + if for_delete: + data_item["deleted_ts"] = commit_ts + return data_item + + @staticmethod + def gen_meta_data_item( + columns: TTableSchemaColumns, + include_columns: Optional[Sequence[str]] = None, + ) -> DataItemWithMeta: + """Returns a data item containing only metadata, no data.""" + _columns = deepcopy(columns) + if include_columns is not None: + _columns = {k: v for k, v in columns.items() if k in include_columns} + return dlt.mark.with_hints( + [], + # write disposition needs to be explicitly set, else it defaults to "append" + dlt.mark.make_hints(write_disposition="merge", columns=_columns), + ) diff --git a/sources/sql_database/pg_replication/schema_types.py b/sources/sql_database/pg_replication/schema_types.py new file mode 100644 index 000000000..3f3d10d12 --- /dev/null +++ b/sources/sql_database/pg_replication/schema_types.py @@ -0,0 +1,95 @@ +import json +from typing import Optional, Any + +import pypgoutput # type: ignore[import-untyped] + +from dlt.common.data_types.typing import TDataType +from dlt.common.data_types.type_helpers import coerce_value +from dlt.common.schema.typing import TColumnSchema, TColumnType +from dlt.destinations.impl.postgres import capabilities +from dlt.destinations.impl.postgres.postgres import PostgresTypeMapper + + +# maps postgres type OID to type string +_PG_TYPES = { + 16: "boolean", + 17: "bytea", + 20: "bigint", + 21: "smallint", + 23: "integer", + 701: "double precision", + 1043: "character varying", + 1082: "date", + 1083: "time without time zone", + 1184: "timestamp with time zone", + 1700: "numeric", + 3802: "jsonb", +} + + +def _get_precision(type_id: int, atttypmod: int) -> Optional[int]: + # get precision from postgres type attributes: https://stackoverflow.com/a/3351120 + if type_id == 21: # smallint + return 16 + elif type_id == 23: # integer + return 32 + elif type_id == 20: # bigint + return 64 + if atttypmod != -1: + if type_id == 1700: # numeric + return ((atttypmod - 4) >> 16) & 65535 + elif type_id in ( + 1083, + 1184, + ): # time without time zone, timestamp with time zone + return atttypmod + elif type_id == 1043: # character varying + return atttypmod - 4 + return None + + +def _get_scale(type_id: int, atttypmod: int) -> Optional[int]: + # get scale from postgres type attributes: https://stackoverflow.com/a/3351120 + if atttypmod != -1: + if type_id in (21, 23, 20): # smallint, integer, bigint + return 0 + if type_id == 1700: # numeric + return (atttypmod - 4) & 65535 + return None + + +def _to_dlt_column_type(type_id: int, atttypmod: int) -> TColumnType: + # converts postgres type to dlt column type + pg_type = _PG_TYPES[type_id] + precision = _get_precision(type_id, atttypmod) + scale = _get_scale(type_id, atttypmod) + mapper = PostgresTypeMapper(capabilities()) + return mapper.from_db_type(pg_type, precision, scale) + + +def _to_dlt_column_schema(col: pypgoutput.decoders.ColumnType) -> TColumnSchema: + # converts pypgoutput ColumnType to dlt column schema + dlt_column_type = _to_dlt_column_type(col.type_id, col.atttypmod) + partial_column_schema = { + "name": col.name, + "primary_key": bool(col.part_of_pkey), + "nullable": not bool(col.part_of_pkey), + } + return {**dlt_column_type, **partial_column_schema} # type: ignore[typeddict-item] + + +def _to_dlt_val(val: str, data_type: TDataType, byte1: str) -> Any: + # converts pgoutput's text-formatted value into dlt-compatible data value + if byte1 == "n": + return None + elif byte1 == "t": + if data_type == "binary": + # https://www.postgresql.org/docs/current/datatype-binary.html#DATATYPE-BINARY-BYTEA-HEX-FORMAT + return bytes.fromhex(val.replace("\\x", "")) + elif data_type == "complex": + return json.loads(val) + return coerce_value(data_type, "text", val) + else: + raise ValueError( + f"Byte1 in replication message must be 'n' or 't', not '{byte1}'." + ) diff --git a/sources/sql_database/requirements.txt b/sources/sql_database/requirements.txt index a9590fa6e..bc048d6cb 100644 --- a/sources/sql_database/requirements.txt +++ b/sources/sql_database/requirements.txt @@ -1,2 +1,4 @@ sqlalchemy>=1.4 -dlt>=0.3.5 +dlt>=0.4.5 +psycopg2>=2.9.9 +pypgoutput==0.0.3 diff --git a/sources/sql_database_pipeline.py b/sources/sql_database_pipeline.py index 15f349fe7..3511cd0e1 100644 --- a/sources/sql_database_pipeline.py +++ b/sources/sql_database_pipeline.py @@ -206,79 +206,12 @@ def read_sql_x( print(info) -def tmp() -> None: - from sql_database.pg_cdc_utils import ( - cdc_rows, - rep_conn, - create_publication, - add_table_to_publication, - create_replication_slot, - get_max_lsn, - ) - - DATABASE = "dlt_data" - USER = "replication_reader" # CREATE USER replication_reader WITH PASSWORD 'replication_reader' LOGIN REPLICATION; - # USER = "loader" - PASSWORD = "replication_reader" - # PASSWORD = "loader" - HOST = "LOCALHOST" - PORT = '5432' - - - conn = rep_conn( - database=DATABASE, - user=USER, - password=PASSWORD, - host=HOST, - port=PORT, - ) - - publication_name = "foo" - table_name = "tmp" - slot_name = "bar" # "foo" - options = {'publication_names': publication_name, 'proto_version': '1'} - - cur = conn.cursor() - # cur.drop_replication_slot(slot_name) - create_replication_slot(slot_name, cur) - create_publication(publication_name, cur) - add_table_to_publication(table_name, publication_name, cur) - max_lsn = get_max_lsn(slot_name, options, cur) - print(max_lsn) - cur.start_replication(slot_name=slot_name, decode=False, options=options) - - assert False - slot_name = "foo" - - for row in cdc_rows(conn): - print(row) - - # pipeline = dlt.pipeline( - # pipeline_name="tmp", - # destination='duckdb', - # dataset_name="tmp", - # pipelines_dir="tmp", - # full_refresh=True, - # ) - - # tbl = sql_table( - # credentials="postgresql://loader:loader@localhost:5432/dlt_data", - # schema="tmp", - # table="tmp", - # ) - - # pipeline.run(tbl) - # print(pipeline.last_trace.last_normalize_info) - # print(pipeline.default_schema.to_pretty_yaml()) - - if __name__ == "__main__": # Load selected tables with different settings # load_select_tables_from_database() # load a table and select columns - # select_columns() - tmp() + select_columns() # Load tables with the standalone table resource # load_standalone_table_resource() diff --git a/tests/sql_database/pg_replication/__init__.py b/tests/sql_database/pg_replication/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/sql_database/pg_replication/cases.py b/tests/sql_database/pg_replication/cases.py new file mode 100644 index 000000000..0514fcd86 --- /dev/null +++ b/tests/sql_database/pg_replication/cases.py @@ -0,0 +1,94 @@ +from typing import Iterator, List + +from dlt.common import Decimal +from dlt.common.schema import TColumnSchema, TTableSchemaColumns + + +TABLE_ROW_ALL_DATA_TYPES = { + "col1": 989127831, + "col2": 898912.821982, + "col3": True, + "col4": "2022-05-23T13:26:45.176451+00:00", + "col5": "string data \n \r \x8e 🦆", + "col6": Decimal("2323.34"), + "col7": b"binary data \n \r \x8e", + # "col8": 2**56 + 92093890840, # TODO: uncommment and make it work + "col9": { + "complex": [1, 2, 3, "a"], + "link": ( + "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\012 \6" + " \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085" + ), + }, + "col10": "2023-02-27", + "col11": "13:26:45.176451", + "col1_null": None, + "col2_null": None, + "col3_null": None, + "col4_null": None, + "col5_null": None, + "col6_null": None, + "col7_null": None, + "col8_null": None, + "col9_null": None, + "col10_null": None, + "col11_null": None, + "col1_precision": 22324, + "col4_precision": "2022-05-23T13:26:46.167231+00:00", + "col5_precision": "string data 2 \n \r \x8e 🦆", + "col6_precision": Decimal("2323.34"), + "col7_precision": b"binary data 2 \n \r \x8e", + "col11_precision": "13:26:45.176451", +} +TABLE_UPDATE: List[TColumnSchema] = [ + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "col2", "data_type": "double", "nullable": False}, + {"name": "col3", "data_type": "bool", "nullable": False}, + {"name": "col4", "data_type": "timestamp", "nullable": False}, + {"name": "col5", "data_type": "text", "nullable": False}, + {"name": "col6", "data_type": "decimal", "nullable": False}, + {"name": "col7", "data_type": "binary", "nullable": False}, + # {"name": "col8", "data_type": "wei", "nullable": False}, + {"name": "col9", "data_type": "complex", "nullable": False, "variant": True}, + {"name": "col10", "data_type": "date", "nullable": False}, + {"name": "col11", "data_type": "time", "nullable": False}, + {"name": "col1_null", "data_type": "bigint", "nullable": True}, + {"name": "col2_null", "data_type": "double", "nullable": True}, + {"name": "col3_null", "data_type": "bool", "nullable": True}, + {"name": "col4_null", "data_type": "timestamp", "nullable": True}, + {"name": "col5_null", "data_type": "text", "nullable": True}, + {"name": "col6_null", "data_type": "decimal", "nullable": True}, + {"name": "col7_null", "data_type": "binary", "nullable": True}, + {"name": "col8_null", "data_type": "wei", "nullable": True}, + {"name": "col9_null", "data_type": "complex", "nullable": True, "variant": True}, + {"name": "col10_null", "data_type": "date", "nullable": True}, + {"name": "col11_null", "data_type": "time", "nullable": True}, + { + "name": "col1_precision", + "data_type": "bigint", + "precision": 16, + "nullable": False, + }, + { + "name": "col4_precision", + "data_type": "timestamp", + "precision": 3, + "nullable": False, + }, + {"name": "col5_precision", "data_type": "text", "precision": 25, "nullable": False}, + { + "name": "col6_precision", + "data_type": "decimal", + "precision": 6, + "scale": 2, + "nullable": False, + }, + { + "name": "col7_precision", + "data_type": "binary", + "precision": 19, + "nullable": False, + }, + {"name": "col11_precision", "data_type": "time", "precision": 3, "nullable": False}, +] +TABLE_UPDATE_COLUMNS_SCHEMA: TTableSchemaColumns = {t["name"]: t for t in TABLE_UPDATE} diff --git a/tests/sql_database/pg_replication/test_pg_replication.py b/tests/sql_database/pg_replication/test_pg_replication.py new file mode 100644 index 000000000..6e6eed663 --- /dev/null +++ b/tests/sql_database/pg_replication/test_pg_replication.py @@ -0,0 +1,157 @@ +import pytest + +from typing import Iterator +from copy import deepcopy + +import dlt + +from tests.utils import ( + ALL_DESTINATIONS, + assert_load_info, + load_table_counts, + select_data, +) +from sources.sql_database.pg_replication import table_changes +from sources.sql_database.pg_replication.helpers import ( + init_table_replication, + _gen_table_replication_references, +) + +from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA + + +TABLE_NAME = "items" + + +@pytest.fixture() +def src_pl() -> Iterator[dlt.Pipeline]: + # setup + src_pl = dlt.pipeline( + destination="postgres", dataset_name="src_pl", full_refresh=True + ) + yield src_pl + # teardown + with src_pl.sql_client() as c: + c.drop_dataset() + slot_name, publication_name = _gen_table_replication_references( + TABLE_NAME, src_pl.dataset_name + ) + c.execute_sql(f"SELECT pg_drop_replication_slot('{slot_name}');") + c.execute_sql(f"DROP PUBLICATION IF EXISTS {publication_name};") + with c.with_staging_dataset(staging=True): + c.drop_dataset() + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_all_data_types(src_pl: dlt.Pipeline, destination_name: str): + # resource to load data into postgres source table + @dlt.resource( + name=TABLE_NAME, + primary_key="col1", # will not be physically applied on Postgres table + write_disposition="merge", + columns=TABLE_UPDATE_COLUMNS_SCHEMA, + ) + def items(data): + yield data + + # create postgres table with single record containing all data types + data = TABLE_ROW_ALL_DATA_TYPES + src_pl.run(items(data)) + + # add primary key that serves as REPLICA IDENTITY, necessary when publishing UPDATEs and/or DELETEs + with src_pl.sql_client() as c: + qual_name = c.make_qualified_table_name(TABLE_NAME) + c.execute_sql(f"ALTER TABLE {qual_name} ADD PRIMARY KEY (col1);") + + # excludes dlt system columns from replication + include_columns = data.keys() + + # initialize table replication, persist snapshot for initial load + slot_name, publication_name, table_snapshot = init_table_replication( + table=TABLE_NAME, + schema=src_pl.dataset_name, + persist_snapshot=True, + include_columns=include_columns, + ) + table_snapshot.apply_hints( + columns=TABLE_UPDATE_COLUMNS_SCHEMA + ) # TODO: automatically get column schema from source table? + + # initial load + dest_pl = dlt.pipeline( + destination=destination_name, dataset_name="dest_pl", full_refresh=True + ) + info = dest_pl.run(table_snapshot) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 1 + + # insert two records in postgres table + r1 = deepcopy(TABLE_ROW_ALL_DATA_TYPES) + r2 = deepcopy(TABLE_ROW_ALL_DATA_TYPES) + r1["col1"] = 1 + r2["col1"] = 2 + src_pl.run(items([r1, r2])) + + r = table_changes( + table=TABLE_NAME, + primary_key="col1", + include_columns=include_columns, + slot_name=slot_name, + publication_name=publication_name, + ) + info = dest_pl.run(r) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + # compare observed with expected column types + observed = dest_pl.default_schema.get_table("items")["columns"] + for name, expected in TABLE_UPDATE_COLUMNS_SCHEMA.items(): + assert observed[name]["data_type"] == expected["data_type"] + # postgres bytea does not have precision + if expected.get("precision") is not None and expected["data_type"] != "binary": + assert observed[name]["precision"] == expected["precision"] + + # update two records in postgres table + # this does two deletes and two inserts because dlt implements "merge" and "delete-and-insert" + # as such, postgres will create four replication messages: two of type Delete and two of type Insert + r1["col2"] = 1.5 + r2["col3"] = False + src_pl.run(items([r1, r2])) + + info = dest_pl.run(r) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + # compare observed records with expected records + qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) + observed = [ + {"col1": row[0], "col2": row[1], "col3": row[2]} + for row in select_data(dest_pl, f"SELECT col1, col2, col3 FROM {qual_name}") + ] + expected = [ + {"col1": 1, "col2": 1.5, "col3": True}, + {"col1": 2, "col2": 898912.821982, "col3": False}, + {"col1": 989127831, "col2": 898912.821982, "col3": True}, + ] + assert sorted(observed, key=lambda d: d["col1"]) == expected + + # now do an actual update, so postgres will create a replication message of type Update + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) + c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 989127831;") + + # load the change to the destination + info = dest_pl.run(r) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + # compare observed records with expected records + qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) + observed = [ + {"col1": row[0], "col2": row[1], "col3": row[2]} + for row in select_data( + dest_pl, f"SELECT col1, col2, col3 FROM {qual_name} WHERE col1 = 989127831;" + ) + ] + expected = [{"col1": 989127831, "col2": 2.5, "col3": True}] + assert sorted(observed, key=lambda d: d["col1"]) == expected diff --git a/tests/utils.py b/tests/utils.py index 4fd8a8bfc..f9216dce7 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,7 +1,7 @@ import os import platform import pytest -from typing import Any, Iterator, List +from typing import Any, Iterator, List, Sequence from os import environ from unittest.mock import patch @@ -222,3 +222,12 @@ def load_table_distinct_counts( with c.execute_query(query) as cur: rows = list(cur.fetchall()) return {r[0]: r[1] for r in rows} + + +def select_data( + p: dlt.Pipeline, sql: str, schema_name: str = None +) -> List[Sequence[Any]]: + """Returns select `sql` results as list.""" + with p.sql_client(schema_name=schema_name) as c: + with c.execute_query(sql) as cur: + return list(cur.fetchall()) From fa9a4c1e1bb54bbe01eb250b31aaece5403b52ce Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 4 Mar 2024 02:11:06 +0100 Subject: [PATCH 03/38] move config to correct position --- sources/.dlt/example.secrets.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sources/.dlt/example.secrets.toml b/sources/.dlt/example.secrets.toml index 343af2cb4..5743ce4a7 100644 --- a/sources/.dlt/example.secrets.toml +++ b/sources/.dlt/example.secrets.toml @@ -16,10 +16,10 @@ location = "US" ### Sources [sources] +## local postgres as source +sql_database.credentials="postgresql://loader:loader@localhost:5432/dlt_data" + ## chess pipeline # the section below defines secrets for "chess_dlt_config_example" source in chess/__init__.py [sources.chess] secret_str="secret string" # a string secret - -## local postgres as source -sql_database.credentials="postgresql://loader:loader@localhost:5432/dlt_data" \ No newline at end of file From 4cdf8231e2d81aaca10eabb25d90ff02c80f26e2 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 12 Mar 2024 03:37:26 +0100 Subject: [PATCH 04/38] extend SQLAlchemy type mapping --- sources/sql_database/schema_types.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sources/sql_database/schema_types.py b/sources/sql_database/schema_types.py index a93ff162d..044b74e18 100644 --- a/sources/sql_database/schema_types.py +++ b/sources/sql_database/schema_types.py @@ -36,6 +36,12 @@ def sqla_col_to_column_schema(sql_col: ColumnAny) -> Optional[TColumnSchema]: precision=sql_t.precision, scale=sql_t.scale, ) + elif isinstance(sql_t, sqltypes.Float): + col = dict(name=sql_col.name, data_type="double") + elif isinstance(sql_t, sqltypes.Boolean): + col = dict(name=sql_col.name, data_type="bool") + elif isinstance(sql_t, sqltypes.JSON): + col = dict(name=sql_col.name, data_type="complex") elif isinstance(sql_t, sqltypes.String): col = dict(name=sql_col.name, data_type="text", precision=sql_t.length) elif isinstance(sql_t, sqltypes._Binary): From 880881282c7e62efc4e89be77f34940995a2fbae Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 12 Mar 2024 03:38:47 +0100 Subject: [PATCH 05/38] add initial support for postgres replication --- poetry.lock | 2 +- pyproject.toml | 3 + .../pg_replication/README.md | 0 sources/pg_replication/__init__.py | 161 ++++++ sources/pg_replication/exceptions.py | 2 + .../pg_replication/helpers.py | 376 +++++++++++--- sources/pg_replication/requirements.txt | 3 + .../pg_replication/schema_types.py | 38 +- sources/pg_replication_pipeline.py | 46 ++ .../sql_database/pg_replication/__init__.py | 64 --- sources/sql_database/requirements.txt | 5 +- .../pg_replication/__init__.py | 0 .../pg_replication/cases.py | 6 +- tests/pg_replication/conftest.py | 47 ++ tests/pg_replication/test_pg_replication.py | 478 ++++++++++++++++++ tests/pg_replication/utils.py | 9 + .../pg_replication/test_pg_replication.py | 157 ------ 17 files changed, 1079 insertions(+), 318 deletions(-) rename sources/{sql_database => }/pg_replication/README.md (100%) create mode 100644 sources/pg_replication/__init__.py create mode 100644 sources/pg_replication/exceptions.py rename sources/{sql_database => }/pg_replication/helpers.py (58%) create mode 100644 sources/pg_replication/requirements.txt rename sources/{sql_database => }/pg_replication/schema_types.py (71%) create mode 100644 sources/pg_replication_pipeline.py delete mode 100644 sources/sql_database/pg_replication/__init__.py rename tests/{sql_database => }/pg_replication/__init__.py (100%) rename tests/{sql_database => }/pg_replication/cases.py (96%) create mode 100644 tests/pg_replication/conftest.py create mode 100644 tests/pg_replication/test_pg_replication.py create mode 100644 tests/pg_replication/utils.py delete mode 100644 tests/sql_database/pg_replication/test_pg_replication.py diff --git a/poetry.lock b/poetry.lock index 41397765b..2b0f4a425 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5924,4 +5924,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "96e90dd7c2bc7d9d152dd2180ed4c41a4892cd3678817143fd6d78dc05f5b9fd" +content-hash = "d548d9158f0e06e6a1cca4d8b65bbfe2b75caa1787490391c1dee3aeb2ae17dd" diff --git a/pyproject.toml b/pyproject.toml index 1178ba0a5..efba11b50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,9 @@ types-psycopg2 = "^2.9.0" [tool.poetry.group.sql_database.dependencies] sqlalchemy = ">=1.4" pymysql = "^1.0.3" + +[tool.poetry.group.pg_replication.dependencies] +psycopg2 = ">=2.9.9" pypgoutput = "0.0.3" [tool.poetry.group.google_sheets.dependencies] diff --git a/sources/sql_database/pg_replication/README.md b/sources/pg_replication/README.md similarity index 100% rename from sources/sql_database/pg_replication/README.md rename to sources/pg_replication/README.md diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py new file mode 100644 index 000000000..21b3f4c68 --- /dev/null +++ b/sources/pg_replication/__init__.py @@ -0,0 +1,161 @@ +from typing import Optional, Sequence, Dict, Iterable + +import dlt + +from dlt.common.schema.typing import TAnySchemaColumns +from dlt.sources import DltResource +from dlt.sources.credentials import ConnectionStringCredentials +from dlt.extract.typing import TTableHintTemplate + +from .helpers import ( + init_table_replication, + table_changes, + snapshot_and_changes, + PgReplicationCredentialsConfiguration, + InitTableReplicationConfig, + ReplicatedTableConfig, +) + + +@dlt.sources.config.with_config( + sections=("sources", "pg_replication"), spec=PgReplicationCredentialsConfiguration +) +def replicated_table( + table_name: str, + schema_name: str, + credentials: ConnectionStringCredentials = dlt.secrets.value, + columns: TTableHintTemplate[TAnySchemaColumns] = None, + include_columns: Optional[Sequence[str]] = None, + init_conf: Optional[InitTableReplicationConfig] = None, + slot_name: Optional[str] = None, + pub_name: Optional[str] = None, + table_snapshot: Optional[DltResource] = None, + target_batch_size: int = 1000, + flush_slot: bool = True, +) -> DltResource: + """Returns a dlt resource that yields data for a replicated postgres table. + + This resource handles both the one-off initial load and subsequent changes + to the table. The initial load is optional and enabled by setting "persist_snapshot" + to True in `init_conf`, or by providing `table_snapshot` directly. + + Replication initialization is handled automatically if `slot_name` is not provided. + In that case, this function calls `init_table_replication`, which creates a + new dedicated replication slot and publication for the table, or returns the names of + the replication slot and publication if they already exist. + + Args: + table_name (str): Name of the table to replicate. + schema_name (str): Name of the schema the table belongs to. + credentials (ConnectionStringCredentials): Postgres database credentials. + columns (TTableHintTemplate[TAnySchemaColumns]): Column hints for the DltResource. + include_columns (Optional[Sequence[str]]): Sequence of names of + columns to include in the replicated table. Any columns not in the sequence + are excluded. If not provided, all columns are included in the table. + init_conf (Optional[InitTableReplicationConfig]): Dictionary to configure + the initialization of the table replication. Key-value pairs in the dictionary + are passed as keyword arguments to `init_table_replication`. Allowed keys + are "publish", "persist_snapshot", and "reset". + Example: {"publish": "insert, update", "persist_snapshot": True}. + slot_name (Optional[str]): Name of the replication slot to consume replication + messages from. Each table is expected to have a dedicated slot. If not + provided, `init_table_replication` is called, which creates a new replication slot + and publication for the table, or returns the names of the replication slot + and publication if they already exist. + pub_name (Optional[str]): Name of the publication that published DML operations + for the table. Each table is expected to have a dedicated publication. + table_snapshot (Optional[DltResource]): Resource yielding data items from + a snapshot exported during creation of a replication slot. This resource + can be created using `init_table_replication`. Only relevant if the + initialization of the table replication is not handled by this function. + target_batch_size (int): Desired number of data items (including metadata + items) yielded in a batch. This argument can be used to limit the data + items in memory. Note that the number of data items yielded can be (far) + greater than `min_batch_size`, because all messages belonging to the same + transaction are always processed in the same batch, regardless of the number + of messages in the transaction and regardless of the value of `min_batch_size`. + The number of data items can also be smaller than `min_batch_size` when + the replication slot is exhausted before a batch is full. This argument + is passed to `table_changes` and does not apply to the `table_snapshot` resource. + flush_slot (bool): Whether processed messages are discarded from the replication + slot. The recommended value is True. Be careful when setting this argument + to False—not flushing can eventually lead to a “disk full” condition on the server, + because the server retains all the WAL segments that might be needed to + stream the changes via all of the currently open replication slots. + + Returns: + DltResource that yields data items for the initial load and subsequent + changes in the postgres table. + """ + if slot_name is None: + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=table_name, + schema_name=schema_name, + columns=columns, + include_columns=include_columns, + **(dict() if init_conf is None else init_conf) + ) + + changes = table_changes( + credentials=credentials, + table_name=table_name, + schema_name=schema_name, + include_columns=include_columns, + target_batch_size=target_batch_size, + slot_name=slot_name, + pub_name=pub_name, + flush_slot=flush_slot, + ) + + # include `dedup_sort` and `hard_delete` hints defined on table_changes resource + if columns is None: + columns = changes.columns + else: + columns = {**columns, **changes.columns} # type: ignore[dict-item] + + # return combined resource that first yields from table_snapshot, then from table_changes + resource_name = schema_name + "_" + table_name + return dlt.resource( + # combined, + snapshot_and_changes, + name=resource_name, + table_name=table_name, + write_disposition=changes.write_disposition, + columns=columns, + primary_key=changes._hints.get("primary_key"), + )(resource_name, table_snapshot, changes) + + +@dlt.source(name="pg_replication") +def pg_replication_source( + table_names: Sequence[str] = dlt.config.value, + schema_name: str = dlt.config.value, + credentials: ConnectionStringCredentials = dlt.secrets.value, + conf: Optional[Dict[str, ReplicatedTableConfig]] = None, +) -> Iterable[DltResource]: + """A dlt source that yields resources for one or more replicated postgres tables. + + Args: + table_names (Sequence[str]): Sequences of names of tables to replicate. + schema_name (str): Name of the schema the tables belong to. + credentials (ConnectionStringCredentials): Postgres database credentials. + conf (Dict[str, ReplicatedTableConfig]): Mapping from table names to + ReplicatedTableConfig objects, which are configuration dictionaries for + the resources created for individual tables. Key-value pairs in the dictionary + are passed as keyword arguments to `replicated_table`. Allowed keys + are "columns", "include_columns", "target_batch_size", "init_conf", "slot_name", + "pub_name", "flush_slot", and "table_snapshot". See the documentation for + the `replicated_table` function for an explanation of these arguments. + Example: {"table_x": {"include_columns": ["id", "val"]}, "table_y": + {"init_conf": {"publish": "insert", "persist_snapshot": True}}}. + + Yields: + A DltResource for each replicated postgres table. + """ + for table_name in table_names: + yield replicated_table( + table_name=table_name, + schema_name=schema_name, + credentials=credentials, + **(dict() if conf is None or table_name not in conf else conf[table_name]) + ) diff --git a/sources/pg_replication/exceptions.py b/sources/pg_replication/exceptions.py new file mode 100644 index 000000000..6edf03da1 --- /dev/null +++ b/sources/pg_replication/exceptions.py @@ -0,0 +1,2 @@ +class NoPrimaryKeyException(Exception): + pass diff --git a/sources/sql_database/pg_replication/helpers.py b/sources/pg_replication/helpers.py similarity index 58% rename from sources/sql_database/pg_replication/helpers.py rename to sources/pg_replication/helpers.py index fea9ca950..d4777340f 100644 --- a/sources/sql_database/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -1,5 +1,14 @@ -from typing import Optional, Dict, Iterator, Union, List, Tuple, Sequence, Any -from dlt.common.pendulum import pendulum +from typing import ( + Optional, + Dict, + Iterator, + Union, + List, + Tuple, + Sequence, + Any, + TypedDict, +) from copy import deepcopy from dataclasses import dataclass, field @@ -25,45 +34,59 @@ from dlt.common import logger from dlt.common.typing import TDataItem -from dlt.common.schema.typing import TTableSchema, TTableSchemaColumns +from dlt.common.pendulum import pendulum +from dlt.common.schema.typing import ( + TTableSchema, + TTableSchemaColumns, + TAnySchemaColumns, + TColumnNames, +) from dlt.common.data_writers.escape import escape_postgres_identifier from dlt.common.configuration.specs import BaseConfiguration, configspec -from dlt.extract.typing import DataItemWithMeta -from dlt.sources import DltResource +from dlt.extract.typing import DataItemWithMeta, TTableHintTemplate +from dlt.extract.resource import DltResource from dlt.sources.credentials import ConnectionStringCredentials -from .. import sql_table +try: + from ..sql_database import sql_table # type: ignore[import-untyped] +except Exception: + from sql_database import sql_table from .schema_types import _to_dlt_column_schema, _to_dlt_val +from .exceptions import NoPrimaryKeyException @configspec -class InitTableReplicationConfiguration(BaseConfiguration): +class PgReplicationCredentialsConfiguration(BaseConfiguration): credentials: ConnectionStringCredentials - table: str - schema: str - include_columns: Optional[Sequence[str]] -@configspec -class TableChangesResourceConfiguration(BaseConfiguration): - credentials: ConnectionStringCredentials - table: str - slot_name: str - publication_name: str +class InitTableReplicationConfig(TypedDict, total=False): + publish: str + persist_snapshot: bool + reset: bool -@dlt.sources.config.with_config( - sections=("sources", "sql_database"), - spec=InitTableReplicationConfiguration, -) +class ReplicatedTableConfig(TypedDict, total=False): + columns: TTableHintTemplate[TAnySchemaColumns] + include_columns: Optional[Sequence[str]] + target_batch_size: int + init_conf: Optional[InitTableReplicationConfig] + slot_name: Optional[str] + pub_name: Optional[str] + flush_slot: bool + table_snapshot: Optional[DltResource] + + +@dlt.sources.config.with_config(sections=("sources", "pg_replication")) def init_table_replication( + table_name: str, + schema_name: str, credentials: ConnectionStringCredentials = dlt.secrets.value, - table: str = dlt.config.value, - schema: str = dlt.config.value, publish: str = "insert, update, delete", persist_snapshot: bool = False, - include_columns: Optional[Sequence[str]] = dlt.config.value, + columns: TTableHintTemplate[TAnySchemaColumns] = None, + include_columns: Optional[Sequence[str]] = None, reset: bool = False, ) -> Tuple[str, str, Optional[DltResource]]: """Initializes replication for a table. @@ -73,8 +96,8 @@ def init_table_replication( Args: credentials (ConnectionStringCredentials): Postgres database credentials. - table (str): Name of the table to initialize replication for. - schema (str): Name of the schema the table belongs to. + table_name (str): Name of the table to initialize replication for. + schema_name (str): Name of the schema the table belongs to. publish (str): Comma-separated string of DML operations. Can be used to control which changes are included in the publication. Allowed operations are `insert`, `update`, and `delete`. `truncate` is currently not @@ -96,15 +119,14 @@ def init_table_replication( Tuple with the names of the created slot and publication, and optionally a table snapshot resource. """ - slot_name, publication_name = _gen_table_replication_references(table, schema) + slot_name, pub_name = _gen_table_replication_references(table_name, schema_name) cur = _get_rep_conn(credentials).cursor() if reset: drop_replication_slot(slot_name, cur) - drop_publication(publication_name, cur) - create_publication(publication_name, cur, publish) - add_table_to_publication(table, schema, publication_name, cur) + drop_publication(pub_name, cur) + create_publication(pub_name, cur, publish) + add_table_to_publication(table_name, schema_name, pub_name, cur) rep_slot = create_replication_slot(slot_name, cur) - table_snapshot: DltResource = None if persist_snapshot: if rep_slot is None: logger.warning( @@ -116,18 +138,129 @@ def init_table_replication( cur_snap = _get_conn(credentials).cursor() snapshot_table_name = persist_snapshot_table( snapshot_name=rep_slot["snapshot_name"], - table=table, - schema=schema, + table_name=table_name, + schema_name=schema_name, cur=cur_snap, include_columns=include_columns, ) - table_snapshot = sql_table( + resource = sql_table( credentials=credentials, table=snapshot_table_name, - schema=schema, + schema=schema_name, + detect_precision_hints=True, ) - table_snapshot.apply_hints(table_name=table) - return (slot_name, publication_name, table_snapshot) + resource.apply_hints( + table_name=table_name, + write_disposition="append" if publish == "insert" else "merge", + columns=columns, + ) + return (slot_name, pub_name, resource) + return (slot_name, pub_name, None) + + +@dlt.sources.config.with_config(sections=("sources", "pg_replication")) +def table_changes( + table_name: str, + schema_name: str, + credentials: ConnectionStringCredentials = dlt.secrets.value, + include_columns: Optional[Sequence[str]] = None, + target_batch_size: int = 1000, + slot_name: Optional[str] = None, + pub_name: Optional[str] = None, + flush_slot: bool = True, +) -> DltResource: + """Returns a dlt resource that yields data items for changes in a postgres table. + + Relies on a dedicated replication slot and publication that publishes DML + operations (i.e. `insert`, `update`, and/or `delete`) for the table (helper + method `init_table_replication` can be used to set this up). + Uses `merge` write disposition to merge changes into destination table(s). + + Args: + credentials (ConnectionStringCredentials): Postgres database credentials. + table_name (str): Name of the table that is replicated + primary_key (TColumnNames): Names of one or multiple columns serving as + primary key on the table. Used to deduplicate data items in the `merge` + operation. + include_columns (Optional[Sequence[str]]): Optional sequence of names of + columns to include in the generated data itemes. Any columns not in the + sequence are excluded. If not provided, all columns are included. + target_batch_size (int): Desired number of data items (including metadata + items) yielded in a batch. This argument can be used to limit the data + items in memory. Note that the number of data items yielded can be (far) + greater than `min_batch_size`, because all messages belonging to the same + transaction are always processed in the same batch, regardless of the number + of messages in the transaction and regardless of the value of `min_batch_size`. + The number of data items can also be smaller than `min_batch_size` when + the replication slot is exhausted before a batch is full. + slot_name (str): Name of the replication slot to consume replication + messages from. Each table is expected to have a dedicated slot. + pub_name (str): Name of the publication that published DML operations + for the table. Each table is expected to have a dedicated publication. + flush_slot (bool): Whether processed messages are discarded from the replication + slot. The recommended value is True. Be careful when setting this argument + to False—not flushing can eventually lead to a “disk full” condition on the server, + because the server retains all the WAL segments that might be needed to + stream the changes via all of the currently open replication slots. + + Returns: + DltResource that yields data items for changes in the postgres table. + """ + write_disposition = "append" + columns: TTableSchemaColumns = {"lsn": {"data_type": "bigint"}} + primary_key = _get_pk(table_name, schema_name, credentials) + + if slot_name is None: + slot_name, pub_name = _gen_table_replication_references(table_name, schema_name) + if pub_name is not None: + pub_ops = get_pub_ops(pub_name, credentials) + if pub_ops["update"] or pub_ops["delete"]: + if primary_key is None: + raise NoPrimaryKeyException( + f'Table "{schema_name}.{table_name}" does not have a primary key. ' + "A primary key is required as REPLICA IDENTITY when INSERT " + "and/or DELETE operations are published." + ) + write_disposition = "merge" + columns["lsn"]["dedup_sort"] = "desc" + if pub_ops["delete"]: + columns["deleted_ts"] = {"hard_delete": True, "data_type": "timestamp"} + + return dlt.resource( # type: ignore[no-any-return, call-overload] + table_replication_items, + name=schema_name + "_" + table_name, + table_name=table_name, + write_disposition=write_disposition, + primary_key=primary_key, + columns=columns, + )( + credentials=credentials, + slot_name=slot_name, + pub_name=pub_name, + include_columns=include_columns, + target_batch_size=target_batch_size, + flush_slot=flush_slot, + ) + + +def snapshot_and_changes( + resource_name: str, + table_snapshot: Optional[DltResource], + table_changes: DltResource, +) -> Iterator[Union[TDataItem, DataItemWithMeta]]: + """Generator for data items for a replicated postgres table. + + First yields from snapshot resource (if provided and if not yielded before) + to do the initial load, then from changes resource to process subsequent DML + operations on the table. + """ + snapshot_yielded = dlt.current.resource_state(resource_name).setdefault( + "snapshot_yielded", False + ) + if table_snapshot is not None and not snapshot_yielded: + yield from table_snapshot + dlt.current.resource_state(resource_name)["snapshot_yielded"] = True + yield from table_changes def create_publication( @@ -153,7 +286,7 @@ def create_publication( def add_table_to_publication( table_name: str, schema_name: str, - publication_name: str, + pub_name: str, cur: ReplicationCursor, ) -> None: """Adds a table to a publication for logical replication if the table is not a member yet. @@ -161,7 +294,7 @@ def add_table_to_publication( Raises error if the user is not owner of the table. """ qual_name = _make_qualified_table_name(table_name, schema_name) - esc_pub_name = escape_postgres_identifier(publication_name) + esc_pub_name = escape_postgres_identifier(pub_name) try: cur.execute(f"ALTER PUBLICATION {esc_pub_name} ADD TABLE {qual_name};") logger.info( @@ -217,8 +350,8 @@ def drop_publication(name: str, cur: ReplicationCursor) -> None: def persist_snapshot_table( snapshot_name: str, - table: str, - schema: str, + table_name: str, + schema_name: str, cur: psycopg2.extensions.cursor, include_columns: Optional[Sequence[str]] = None, ) -> str: @@ -229,9 +362,9 @@ def persist_snapshot_table( col_str = "*" if include_columns is not None: col_str = ", ".join(map(escape_postgres_identifier, include_columns)) - snapshot_table_name = f"{table}_snapshot_{snapshot_name}" - snapshot_qual_name = _make_qualified_table_name(snapshot_table_name, schema) - qual_name = _make_qualified_table_name(table, schema) + snapshot_table_name = f"{table_name}_snapshot_{snapshot_name}" + snapshot_qual_name = _make_qualified_table_name(snapshot_table_name, schema_name) + qual_name = _make_qualified_table_name(table_name, schema_name) cur.execute( f""" START TRANSACTION ISOLATION LEVEL REPEATABLE READ; @@ -269,13 +402,33 @@ def get_max_lsn( return lsn +def get_pub_ops( + pub_name: str, + credentials: ConnectionStringCredentials, +) -> Dict[str, bool]: + cur = _get_conn(credentials).cursor() + cur.execute( + f""" + SELECT pubinsert, pubupdate, pubdelete, pubtruncate + FROM pg_publication WHERE pubname = '{pub_name}' + """ + ) + result = cur.fetchone() + cur.connection.close() + if result is None: + raise ValueError(f'Publication "{pub_name}" does not exist.') + return { + "insert": result[0], + "update": result[1], + "delete": result[2], + "truncate": result[3], + } + + def lsn_int_to_hex(lsn: int) -> str: """Convert integer LSN to postgres' hexadecimal representation.""" # https://stackoverflow.com/questions/66797767/lsn-external-representation. - hex_val = format(lsn, "x") - if len(hex_val) > 8: - return hex_val[:-8] + "/" + hex_val[-8:] - return "0/" + format(lsn, "x").zfill(8) + return f"{lsn >> 32 & 4294967295:X}/{lsn & 4294967295:08X}" def advance_slot( @@ -300,35 +453,51 @@ def advance_slot( def table_replication_items( credentials: ConnectionStringCredentials, slot_name: str, - publication_name: str, + pub_name: str, include_columns: Optional[Sequence[str]] = None, - upto_lsn: Optional[int] = None, + target_batch_size: int = 1000, + flush_slot: bool = True, ) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Yields data items from generator. Maintains LSN of last consumed message in state to track progress. At start of the run, advances the slot upto last consumed message in previous run. + Processes in batches to limit memory usage. """ - # advance slot - flush_lsn = dlt.current.resource_state().setdefault("last_commit_lsn", 0) - advance_slot(flush_lsn, slot_name, credentials) - - # use max LSN in slot if `upto_lsn` was not provided - options = {"publication_names": publication_name, "proto_version": "1"} + # start where we left off in previous run + start_lsn = dlt.current.resource_state().get("last_commit_lsn", 0) + if flush_slot: + advance_slot(start_lsn, slot_name, credentials) + + # continue until last message in replication slot + options = {"publication_names": pub_name, "proto_version": "1"} + upto_lsn = get_max_lsn(slot_name, options, credentials) if upto_lsn is None: - upto_lsn = get_max_lsn(slot_name, options, credentials) - - # check if there's anything to consume - if upto_lsn is not None and upto_lsn > flush_lsn: - gen = ItemGenerator(credentials, slot_name, options, upto_lsn, include_columns) + return "Replication slot is empty." + + # generate items in batches + while True: + gen = ItemGenerator( + credentials=credentials, + slot_name=slot_name, + options=options, + start_lsn=start_lsn, + upto_lsn=upto_lsn, + target_batch_size=target_batch_size, + include_columns=include_columns, + ) yield from gen - dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + if gen.generated_all: + dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + break + start_lsn = gen.last_commit_lsn def _get_conn( - credentials: ConnectionStringCredentials, connection_factory: Optional[Any] = None + credentials: ConnectionStringCredentials, + connection_factory: Optional[Any] = None, ) -> Union[psycopg2.extensions.connection, LogicalReplicationConnection]: - # returns a psycopg2 connection + """Returns a psycopg2 connection to interact with postgres.""" return psycopg2.connect( # type: ignore[no-any-return] database=credentials.database, user=credentials.username, @@ -342,11 +511,12 @@ def _get_conn( def _get_rep_conn( credentials: ConnectionStringCredentials, ) -> LogicalReplicationConnection: - # returns a psycopg2 LogicalReplicationConnection + """Returns a psycopg2 LogicalReplicationConnection to interact with postgres replication functionality.""" return _get_conn(credentials, LogicalReplicationConnection) # type: ignore[return-value] def _make_qualified_table_name(table_name: str, schema_name: str) -> str: + """Escapes and combines a schema and table name.""" return ( escape_postgres_identifier(schema_name) + "." @@ -357,10 +527,40 @@ def _make_qualified_table_name(table_name: str, schema_name: str) -> str: def _gen_table_replication_references( table_name: str, schema_name: str ) -> Tuple[str, str]: - # generate replication slot and publication names dedicated to a single table + """Generates replication slot and publication names dedicated to a single table.""" slot_name = f"_dlt_slot_{schema_name}_{table_name}" - publication_name = f"_dlt_pub_{schema_name}_{table_name}" - return (slot_name, publication_name) + pub_name = f"_dlt_pub_{schema_name}_{table_name}" + return (slot_name, pub_name) + + +def _get_pk( + table_name: str, + schema_name: str, + credentials: ConnectionStringCredentials, +) -> Optional[TColumnNames]: + """Returns primary key column(s) for postgres table. + + Returns None if no primary key columns exist. + """ + qual_name = _make_qualified_table_name(table_name, schema_name) + cur = _get_conn(credentials).cursor() + # https://wiki.postgresql.org/wiki/Retrieve_primary_key_columns + cur.execute( + f""" + SELECT a.attname + FROM pg_index i + JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) + WHERE i.indrelid = '{qual_name}'::regclass + AND i.indisprimary; + """ + ) + result = [tup[0] for tup in cur.fetchall()] + cur.connection.close() + if len(result) == 0: + return None + elif len(result) == 1: + return result[0] # type: ignore[no-any-return] + return result @dataclass @@ -368,9 +568,12 @@ class ItemGenerator: credentials: ConnectionStringCredentials slot_name: str options: Dict[str, str] + start_lsn: int = 0 upto_lsn: Optional[int] = None + target_batch_size: int = 1000 include_columns: Optional[Sequence[str]] = None last_commit_lsn: Optional[int] = field(default=None, init=False) + generated_all: bool = False def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Consumes messages from replication slot and generates data items. @@ -378,23 +581,26 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: Does not advance the slot. Maintains LSN of last consumed Commit message in object state. """ - cur = _get_rep_conn(self.credentials).cursor() try: - consumed_all = False - consumer = MessageConsumer(self.upto_lsn, self.include_columns) + cur = _get_rep_conn(self.credentials).cursor() cur.start_replication( - slot_name=self.slot_name, decode=False, options=self.options + slot_name=self.slot_name, + start_lsn=self.start_lsn, + decode=False, + options=self.options, + ) + consumer = MessageConsumer( + self.upto_lsn, self.target_batch_size, self.include_columns ) cur.consume_stream(consumer) - except StopReplication: - # all messages upto `upto_lsn` have been successfully consumed - consumed_all = True + except StopReplication: # completed batch or reached `upto_lsn` + pass finally: cur.connection.close() - if consumed_all: - for i in consumer.data_items: - yield i - self.last_commit_lsn = consumer.last_commit_lsn + self.last_commit_lsn = consumer.last_commit_lsn + for i in consumer.data_items: + yield i + self.generated_all = consumer.consumed_all class MessageConsumer: @@ -407,11 +613,14 @@ class MessageConsumer: def __init__( self, upto_lsn: int, + target_batch_size: int = 1000, include_columns: Optional[Sequence[str]] = None, ) -> None: self.upto_lsn = upto_lsn + self.target_batch_size = target_batch_size self.include_columns = include_columns + self.consumed_all: bool = False # data_items attribute maintains all data items self.data_items: List[Union[TDataItem, DataItemWithMeta]] = [] # other attributes only maintain last-seen values @@ -425,8 +634,6 @@ def __call__(self, msg: ReplicationMessage) -> None: Breaks out of stream when `upto_lsn` is reached. """ self.process_msg(msg) - if msg.data_start == self.upto_lsn: - raise StopReplication def process_msg(self, msg: ReplicationMessage) -> None: """Processes encoded replication message. @@ -439,6 +646,10 @@ def process_msg(self, msg: ReplicationMessage) -> None: self.last_commit_ts = Begin(msg.payload).commit_ts elif op == "C": self.last_commit_lsn = msg.data_start + if msg.data_start >= self.upto_lsn: + self.consumed_all = True + if self.consumed_all or len(self.data_items) >= self.target_batch_size: + raise StopReplication elif op == "R": self.process_relation(Relation(msg.payload)) elif op == "I": @@ -501,7 +712,10 @@ def gen_data_item( columns = zip(schema.values(), data) data_item = { schema["name"]: _to_dlt_val( - data.col_data, schema["data_type"], data.col_data_category + val=data.col_data, + data_type=schema["data_type"], + byte1=data.col_data_category, + for_delete=for_delete, ) for (schema, data) in columns } diff --git a/sources/pg_replication/requirements.txt b/sources/pg_replication/requirements.txt new file mode 100644 index 000000000..5f4179973 --- /dev/null +++ b/sources/pg_replication/requirements.txt @@ -0,0 +1,3 @@ +dlt>=0.4.5 +psycopg2>=2.9.9 +pypgoutput==0.0.3 \ No newline at end of file diff --git a/sources/sql_database/pg_replication/schema_types.py b/sources/pg_replication/schema_types.py similarity index 71% rename from sources/sql_database/pg_replication/schema_types.py rename to sources/pg_replication/schema_types.py index 3f3d10d12..cf79bcda5 100644 --- a/sources/sql_database/pg_replication/schema_types.py +++ b/sources/pg_replication/schema_types.py @@ -1,8 +1,9 @@ import json -from typing import Optional, Any +from typing import Optional, Any, Dict import pypgoutput # type: ignore[import-untyped] +from dlt.common import Decimal from dlt.common.data_types.typing import TDataType from dlt.common.data_types.type_helpers import coerce_value from dlt.common.schema.typing import TColumnSchema, TColumnType @@ -10,8 +11,22 @@ from dlt.destinations.impl.postgres.postgres import PostgresTypeMapper +_DUMMY_VALS: Dict[TDataType, Any] = { + "bigint": 0, + "binary": b" ", + "bool": True, + "complex": [0], + "date": "2000-01-01", + "decimal": Decimal(0), + "double": 0.0, + "text": "", + "time": "00:00:00", + "timestamp": "2000-01-01T00:00:00", + "wei": 0, +} + # maps postgres type OID to type string -_PG_TYPES = { +_PG_TYPES: Dict[int, str] = { 16: "boolean", 17: "bytea", 20: "bigint", @@ -28,7 +43,8 @@ def _get_precision(type_id: int, atttypmod: int) -> Optional[int]: - # get precision from postgres type attributes: https://stackoverflow.com/a/3351120 + """Get precision from postgres type attributes.""" + # https://stackoverflow.com/a/3351120 if type_id == 21: # smallint return 16 elif type_id == 23: # integer @@ -49,7 +65,8 @@ def _get_precision(type_id: int, atttypmod: int) -> Optional[int]: def _get_scale(type_id: int, atttypmod: int) -> Optional[int]: - # get scale from postgres type attributes: https://stackoverflow.com/a/3351120 + """Get scale from postgres type attributes.""" + # https://stackoverflow.com/a/3351120 if atttypmod != -1: if type_id in (21, 23, 20): # smallint, integer, bigint return 0 @@ -59,7 +76,7 @@ def _get_scale(type_id: int, atttypmod: int) -> Optional[int]: def _to_dlt_column_type(type_id: int, atttypmod: int) -> TColumnType: - # converts postgres type to dlt column type + """Converts postgres type to dlt column type.""" pg_type = _PG_TYPES[type_id] precision = _get_precision(type_id, atttypmod) scale = _get_scale(type_id, atttypmod) @@ -68,19 +85,22 @@ def _to_dlt_column_type(type_id: int, atttypmod: int) -> TColumnType: def _to_dlt_column_schema(col: pypgoutput.decoders.ColumnType) -> TColumnSchema: - # converts pypgoutput ColumnType to dlt column schema + """Converts pypgoutput ColumnType to dlt column schema.""" dlt_column_type = _to_dlt_column_type(col.type_id, col.atttypmod) partial_column_schema = { "name": col.name, "primary_key": bool(col.part_of_pkey), - "nullable": not bool(col.part_of_pkey), + # "nullable": not bool(col.part_of_pkey), } return {**dlt_column_type, **partial_column_schema} # type: ignore[typeddict-item] -def _to_dlt_val(val: str, data_type: TDataType, byte1: str) -> Any: - # converts pgoutput's text-formatted value into dlt-compatible data value +def _to_dlt_val(val: str, data_type: TDataType, byte1: str, for_delete: bool) -> Any: + """Converts pgoutput's text-formatted value into dlt-compatible data value.""" if byte1 == "n": + if for_delete: + # replace None with dummy value to prevent NOT NULL violations in staging table + return _DUMMY_VALS[data_type] return None elif byte1 == "t": if data_type == "binary": diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py new file mode 100644 index 000000000..3b68a9771 --- /dev/null +++ b/sources/pg_replication_pipeline.py @@ -0,0 +1,46 @@ +import dlt + +from pg_replication import pg_replication_source, replicated_table + + +def replicate_single_table() -> None: + pipeline = dlt.pipeline( + pipeline_name="pg_replication_pipeline", + destination="duckdb", + dataset_name="postgres_table", + ) + + table_resource = replicated_table( + table_name="your_table", + schema_name="your_schema", + init_conf={"persist_snapshot": True}, # this enables an initial load + ) + + info = pipeline.run(table_resource) + print(info) + + +def replicate_multiple_tables() -> None: + pipeline = dlt.pipeline( + pipeline_name="pg_replication_pipeline", + destination="duckdb", + dataset_name="postgres_tables", + ) + + replication_source = pg_replication_source( + table_names=["table_x", "table_y", "table_z"], + schema_name="your_schema", + conf={ + "table_x": {"include_columns": ["col_1", "col_2"]}, + "table_y": {"init_conf": {"publish": "insert"}}, + "table_z": {"init_conf": {"persist_snapshot": True}}, + }, + ) + + info = pipeline.run(replication_source) + print(info) + + +if __name__ == "__main__": + replicate_single_table() + # replicate_multiple_tables() diff --git a/sources/sql_database/pg_replication/__init__.py b/sources/sql_database/pg_replication/__init__.py deleted file mode 100644 index 3e3a1e133..000000000 --- a/sources/sql_database/pg_replication/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Optional, Sequence - -import dlt - -from dlt.common.schema.typing import TColumnNames -from dlt.sources import DltResource -from dlt.sources.credentials import ConnectionStringCredentials - -from .helpers import table_replication_items, TableChangesResourceConfiguration - - -@dlt.sources.config.with_config( - sections=("sources", "sql_database"), - spec=TableChangesResourceConfiguration, -) -def table_changes( - credentials: ConnectionStringCredentials = dlt.secrets.value, - table: str = dlt.config.value, - primary_key: TColumnNames = None, - include_columns: Optional[Sequence[str]] = dlt.config.value, - slot_name: str = dlt.config.value, - publication_name: str = dlt.config.value, - upto_lsn: Optional[int] = None, -) -> DltResource: - """Returns a dlt resource that yields data items for changes in a postgres table. - - Relies on a dedicated replication slot and publication that publishes DML - operations (i.e. `insert`, `update`, and/or `delete`) for the table (helper - method `init_table_replication` can be used to set this up). - Uses `merge` write disposition to merge changes into destination table(s). - - Args: - credentials (ConnectionStringCredentials): Postgres database credentials. - table (str): Name of the table that is replicated - primary_key (TColumnNames): Names of one or multiple columns serving as - primary key on the table. Used to deduplicate data items in the `merge` - operation. - include_columns (Optional[Sequence[str]]): Optional sequence of names of - columns to include in the generated data itemes. Any columns not in the - sequence are excluded. If not provided, all columns are included. - slot_name (str): Name of the replication slot to consume replication - messages from. Each table is expected to have a dedicated slot. - publication_name (str): Name of the publication that published DML operations - for the table. Each table is expected to have a dedicated publication. - upto_lsn Optional[int]: Optional integer LSN value upto which the replication - slot is consumed. If not provided, all messages in the slot are consumed, - ensuring all new changes in the source table are included. - - Returns: - DltResource that yields data items for changes in the postgres table. - """ - return dlt.resource( - table_replication_items, - name=table, - write_disposition="merge", - primary_key=primary_key, - columns={"lsn": {"dedup_sort": "desc"}}, - )( - credentials=credentials, - slot_name=slot_name, - publication_name=publication_name, - include_columns=include_columns, - upto_lsn=upto_lsn, - ) diff --git a/sources/sql_database/requirements.txt b/sources/sql_database/requirements.txt index bc048d6cb..c81667399 100644 --- a/sources/sql_database/requirements.txt +++ b/sources/sql_database/requirements.txt @@ -1,4 +1,3 @@ sqlalchemy>=1.4 -dlt>=0.4.5 -psycopg2>=2.9.9 -pypgoutput==0.0.3 +dlt>=0.3.5 +pymysql>=1.0.0 \ No newline at end of file diff --git a/tests/sql_database/pg_replication/__init__.py b/tests/pg_replication/__init__.py similarity index 100% rename from tests/sql_database/pg_replication/__init__.py rename to tests/pg_replication/__init__.py diff --git a/tests/sql_database/pg_replication/cases.py b/tests/pg_replication/cases.py similarity index 96% rename from tests/sql_database/pg_replication/cases.py rename to tests/pg_replication/cases.py index 0514fcd86..a17efcad7 100644 --- a/tests/sql_database/pg_replication/cases.py +++ b/tests/pg_replication/cases.py @@ -1,4 +1,4 @@ -from typing import Iterator, List +from typing import List from dlt.common import Decimal from dlt.common.schema import TColumnSchema, TTableSchemaColumns @@ -29,7 +29,7 @@ "col5_null": None, "col6_null": None, "col7_null": None, - "col8_null": None, + # "col8_null": None, "col9_null": None, "col10_null": None, "col11_null": None, @@ -59,7 +59,7 @@ {"name": "col5_null", "data_type": "text", "nullable": True}, {"name": "col6_null", "data_type": "decimal", "nullable": True}, {"name": "col7_null", "data_type": "binary", "nullable": True}, - {"name": "col8_null", "data_type": "wei", "nullable": True}, + # {"name": "col8_null", "data_type": "wei", "nullable": True}, {"name": "col9_null", "data_type": "complex", "nullable": True, "variant": True}, {"name": "col10_null", "data_type": "date", "nullable": True}, {"name": "col11_null", "data_type": "time", "nullable": True}, diff --git a/tests/pg_replication/conftest.py b/tests/pg_replication/conftest.py new file mode 100644 index 000000000..2d0e25107 --- /dev/null +++ b/tests/pg_replication/conftest.py @@ -0,0 +1,47 @@ +import pytest + +from typing import Iterator + +import dlt + + +TABLE_NAME = "items" + + +@pytest.fixture() +def src_pl() -> Iterator[dlt.Pipeline]: + # setup + src_pl = dlt.pipeline( + pipeline_name="src_pl", destination="postgres", full_refresh=True + ) + yield src_pl + # teardown + with src_pl.sql_client() as c: + # drop tables + try: + c.drop_dataset() + except Exception as e: + print(e) + with c.with_staging_dataset(staging=True): + try: + c.drop_dataset() + except Exception as e: + print(e) + # drop replication slots + slot_names = [ + tup[0] + for tup in c.execute_sql( + f"SELECT slot_name FROM pg_replication_slots WHERE slot_name LIKE '_dlt_slot_{src_pl.dataset_name}_%'" + ) + ] + for slot_name in slot_names: + c.execute_sql(f"SELECT pg_drop_replication_slot('{slot_name}');") + # drop publications + pub_names = [ + tup[0] + for tup in c.execute_sql( + f"SELECT pubname FROM pg_publication WHERE pubname LIKE '_dlt_pub_{src_pl.dataset_name}_%'" + ) + ] + for pub_name in pub_names: + c.execute_sql(f"DROP PUBLICATION IF EXISTS {pub_name};") diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py new file mode 100644 index 000000000..400779bf3 --- /dev/null +++ b/tests/pg_replication/test_pg_replication.py @@ -0,0 +1,478 @@ +import pytest + +from copy import deepcopy + +import dlt +from dlt.extract.resource import DltResource + +from tests.utils import ( + ALL_DESTINATIONS, + assert_load_info, + load_table_counts, + select_data, +) +from sources.pg_replication import ( + table_changes, + replicated_table, + pg_replication_source, +) +from sources.pg_replication.helpers import ( + init_table_replication, + _gen_table_replication_references, +) + +from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA +from .conftest import TABLE_NAME +from .utils import add_pk + + +@pytest.mark.parametrize("persist_snapshot", [True, False]) +def test_init_table_replication(src_pl: dlt.Pipeline, persist_snapshot: bool): + # resource to load data into postgres source table + @dlt.resource(table_name=TABLE_NAME, primary_key="id", write_disposition="merge") + def items(): + yield {"id": 1, "val": True} + + # create postgres table with single record + src_pl.run(items()) + + # initialize table replication for table_x, persist snapshot for initial load + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + persist_snapshot=persist_snapshot, + ) + expected_slot_name, expected_pub_name = _gen_table_replication_references( + TABLE_NAME, src_pl.dataset_name + ) + assert slot_name == expected_slot_name + assert pub_name == expected_pub_name + if persist_snapshot: + assert isinstance(table_snapshot, DltResource) + else: + assert table_snapshot is None + + # initialize table replication again + # method should return names of existing replication slot and publication name + # `table_snapshot` should be None—also when `persist_snapshot` is True, because + # a snapshot is only created when the slot is created + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + persist_snapshot=persist_snapshot, + ) + assert slot_name == expected_slot_name + assert pub_name == expected_pub_name + assert table_snapshot is None + + # initialize table replication again, now use `reset` arg to drop and + # recreate the slot and publication + # since there is a new slot, a `table_snapshot` should be returned when + # `persist_snapshot` is True + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + persist_snapshot=persist_snapshot, + reset=True, + ) + expected_slot_name, expected_pub_name = _gen_table_replication_references( + TABLE_NAME, src_pl.dataset_name + ) + assert slot_name == expected_slot_name + assert pub_name == expected_pub_name + if persist_snapshot: + assert isinstance(table_snapshot, DltResource) + else: + assert table_snapshot is None + + +@pytest.mark.parametrize("publish", ["insert", "insert, update, delete"]) +def test_write_disposition(src_pl: dlt.Pipeline, publish: str): + # resource to load data into postgres source table + @dlt.resource(name=TABLE_NAME, primary_key="id", write_disposition="merge") + def items(data): + yield data + + # create postgres table with single record + src_pl.run(items({"id": 1, "val": True})) + + if publish == "insert, update, delete": + add_pk(src_pl.sql_client, TABLE_NAME, "id") + + # initialize replication, create resources for snapshot and changes + _, _, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + publish=publish, + persist_snapshot=True, + ) + changes = table_changes(table_name=TABLE_NAME, schema_name=src_pl.dataset_name) + + # assert write dispositions + expected_write_disposition = "append" if publish == "insert" else "merge" + assert table_snapshot.write_disposition == expected_write_disposition + assert changes.write_disposition == expected_write_disposition + + # also check replicated_table resource + rep_tbl = replicated_table( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + init_conf={"publish": publish, "persist_snapshot": True, "reset": True}, + ) + assert rep_tbl.write_disposition == expected_write_disposition + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("give_hints", [True, False]) +@pytest.mark.parametrize("explicit_init", [True, False]) +def test_all_data_types( + src_pl: dlt.Pipeline, + destination_name: str, + give_hints: bool, + explicit_init: bool, +): + data = deepcopy(TABLE_ROW_ALL_DATA_TYPES) + column_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) + + # resource to load data into postgres source table + @dlt.resource( + name=TABLE_NAME, + primary_key="col1", + write_disposition="merge", + columns=column_schema, + ) + def items(data): + yield data + + # create postgres table with single record containing all data types + src_pl.run(items(data)) + add_pk(src_pl.sql_client, TABLE_NAME, "col1") + + # excludes dlt system columns from replication + include_columns = data.keys() + + if explicit_init: + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + columns=column_schema if give_hints else None, + include_columns=include_columns, + persist_snapshot=True, + ) + else: + # init will be done inside `replicated_table` function + slot_name = None + pub_name = None + table_snapshot = None + + rep_tbl = replicated_table( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + columns=column_schema if give_hints else None, + include_columns=include_columns, + init_conf={"persist_snapshot": True}, + slot_name=slot_name, + pub_name=pub_name, + table_snapshot=table_snapshot, + ) + + # initial load + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + info = dest_pl.run(rep_tbl) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 1 + + # insert two records in postgres table + r1 = deepcopy(data) + r2 = deepcopy(data) + r1["col1"] = 1 + r2["col1"] = 2 + src_pl.run(items([r1, r2])) + + info = dest_pl.run(rep_tbl) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + if give_hints: + # compare observed with expected column types + observed = dest_pl.default_schema.get_table("items")["columns"] + for name, expected in column_schema.items(): + assert observed[name]["data_type"] == expected["data_type"] + # postgres bytea does not have precision + if ( + expected.get("precision") is not None + and expected["data_type"] != "binary" + ): + assert observed[name]["precision"] == expected["precision"] + + # update two records in postgres table + # this does two deletes and two inserts because dlt implements "merge" as "delete-and-insert" + # as such, postgres will create four replication messages: two of type Delete and two of type Insert + r1["col2"] = 1.5 + r2["col3"] = False + src_pl.run(items([r1, r2])) + + info = dest_pl.run(rep_tbl) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + # compare observed records with expected records + qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) + observed = [ + {"col1": row[0], "col2": row[1], "col3": row[2]} + for row in select_data(dest_pl, f"SELECT col1, col2, col3 FROM {qual_name}") + ] + expected = [ + {"col1": 1, "col2": 1.5, "col3": True}, + {"col1": 2, "col2": 898912.821982, "col3": False}, + {"col1": 989127831, "col2": 898912.821982, "col3": True}, + ] + assert sorted(observed, key=lambda d: d["col1"]) == expected + + # now do an actual update, so postgres will create a replication message of type Update + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) + c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 989127831;") + + # load the change to the destination + info = dest_pl.run(rep_tbl) + assert_load_info(info) + assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + + # compare observed records with expected records + qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) + observed = [ + {"col1": row[0], "col2": row[1], "col3": row[2]} + for row in select_data( + dest_pl, f"SELECT col1, col2, col3 FROM {qual_name} WHERE col1 = 989127831;" + ) + ] + expected = [{"col1": 989127831, "col2": 2.5, "col3": True}] + assert sorted(observed, key=lambda d: d["col1"]) == expected + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_insert_only(src_pl: dlt.Pipeline, destination_name: str): + # resource to load data into postgres source table + @dlt.resource(name=TABLE_NAME, write_disposition="append") + def items(data): + yield data + + # create postgres table with single record + data = {"id": 1, "foo": "bar"} + src_pl.run(items(data)) + + # excludes dlt system columns from replication + include_columns = data.keys() + + # initialize table replication, persist snapshot for initial load + slot_name, pub_name, table_snapshot = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + publish="insert", + persist_snapshot=True, + include_columns=include_columns, + ) + + # initial load + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + extract_info = dest_pl.extract(table_snapshot) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + + # insert a record in postgres table + data = {"id": 2, "foo": "bar"} + src_pl.run(items(data)) + + # create resource for table changes + changes = table_changes( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + include_columns=include_columns, + slot_name=slot_name, + pub_name=pub_name, + ) + + # extract items from resource + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + + # do an update and a delete—these operations should not lead to items in the resource + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) + c.execute_sql(f"UPDATE {qual_name} SET foo = 'baz' WHERE id = 2;") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id = 2;") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"] == [] + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_batching(src_pl: dlt.Pipeline, destination_name: str): + # this test asserts the number of data items yielded by the `table_changes` + # resource is not affected by `target_batch_size` and the number of replication + # messages per transaction + + # resource to load data into postgres source table + @dlt.resource(name=TABLE_NAME, primary_key="id", write_disposition="merge") + def items(data): + yield data + + # create postgres table with single record + data = {"id": 1000, "val": True} + src_pl.run(items(data)) + add_pk(src_pl.sql_client, TABLE_NAME, "id") + + # excludes dlt system columns from replication + include_columns = data.keys() + + # initialize table replication + slot_name, pub_name, _ = init_table_replication( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + persist_snapshot=False, + include_columns=include_columns, + ) + + # create destination pipeline and resource + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + changes = table_changes( + table_name=TABLE_NAME, + schema_name=src_pl.dataset_name, + include_columns=include_columns, + target_batch_size=50, + slot_name=slot_name, + pub_name=pub_name, + ) + + # insert 100 records into source table in one transaction + batch = [{**r, **{"id": key}} for r in [data] for key in range(1, 101)] + src_pl.run(items(batch)) + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + # insert 100 records into source table in 5 transactions + batch = [{**r, **{"id": key}} for r in [data] for key in range(101, 121)] + src_pl.run(items(batch)) + batch = [{**r, **{"id": key}} for r in [data] for key in range(121, 141)] + src_pl.run(items(batch)) + batch = [{**r, **{"id": key}} for r in [data] for key in range(141, 161)] + src_pl.run(items(batch)) + batch = [{**r, **{"id": key}} for r in [data] for key in range(161, 181)] + src_pl.run(items(batch)) + batch = [{**r, **{"id": key}} for r in [data] for key in range(181, 201)] + src_pl.run(items(batch)) + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("give_conf", [True, False]) +def test_source_multiple_tables( + src_pl: dlt.Pipeline, destination_name: str, give_conf: bool +): + # resources to load data into postgres source tables + @dlt.resource(primary_key="id_w", write_disposition="merge") + def items_w(data): + yield data + + @dlt.resource(primary_key="id_x", write_disposition="merge") + def items_x(data): + yield data + + @dlt.resource(primary_key="id_y", write_disposition="merge") + def items_y(data): + yield data + + @dlt.resource(primary_key="id_z", write_disposition="merge") + def items_z(data): + yield data + + # create two postgres tables with single record + resources = [ + items_w({"id_w": 1, "val": [1, 2, 3]}), + items_x({"id_x": 1, "val": "foo"}), + items_y([{"id_y": 1, "val": True}, {"id_y": 2, "val": False}]), + items_z({"id_z": 1, "val": "2024-03-11"}), + ] + src_pl.run(resources) + add_pk(src_pl.sql_client, "items_w", "id_w") + add_pk(src_pl.sql_client, "items_x", "id_x") + add_pk(src_pl.sql_client, "items_y", "id_y") + if not give_conf: + add_pk(src_pl.sql_client, "items_z", "id_z") + + conf = ( + { + "items_x": { + "include_columns": ["id_x", "val"], + "init_conf": {"persist_snapshot": True}, + }, + "items_y": { + "include_columns": ["id_y", "val"], + "init_conf": {"persist_snapshot": False}, + }, + "items_z": { + "include_columns": ["id_z", "val"], + "init_conf": {"persist_snapshot": True, "publish": "insert"}, + }, + } + if give_conf + else None + ) + + rep_tbls = pg_replication_source( + table_names=["items_w", "items_x", "items_y", "items_z"], + schema_name=src_pl.dataset_name, + conf=conf, + ) + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + info = dest_pl.run(rep_tbls) + assert_load_info(info) + if give_conf: + assert load_table_counts(dest_pl, "items_x", "items_z") == { + "items_x": 1, + "items_z": 1, + } + with pytest.raises(dlt.destinations.exceptions.DatabaseUndefinedRelation): + # "items_w" table does not exist because we didn't specify "persist_snapshot" and it defaults to False + load_table_counts(dest_pl, "items_w") + with pytest.raises(dlt.destinations.exceptions.DatabaseUndefinedRelation): + # "items_y" table does not exist because we set "persist_snapshot" to False + load_table_counts(dest_pl, "items_y") + + # insert one record in both postgres tables + resources = [ + items_w({"id_w": 2, "val": [1, 2]}), + items_x({"id_x": 2, "val": "foo"}), + items_y({"id_y": 3, "val": True}), + items_z({"id_z": 2, "val": "2000-01-01"}), + ] + src_pl.run(resources) + + info = dest_pl.run(rep_tbls) + assert_load_info(info) + if give_conf: + assert load_table_counts( + dest_pl, "items_w", "items_x", "items_y", "items_z" + ) == { + "items_w": 1, + "items_x": 2, + "items_y": 1, + "items_z": 2, + } + else: + assert load_table_counts( + dest_pl, "items_w", "items_x", "items_y", "items_z" + ) == { + "items_w": 1, + "items_x": 1, + "items_y": 1, + "items_z": 1, + } diff --git a/tests/pg_replication/utils.py b/tests/pg_replication/utils.py new file mode 100644 index 000000000..78ab1e67f --- /dev/null +++ b/tests/pg_replication/utils.py @@ -0,0 +1,9 @@ +def add_pk(sql_client, table_name: str, column_name: str) -> None: + """Adds primary key to postgres table. + + In the context of replication, the primary key serves as REPLICA IDENTITY. + A REPLICA IDENTITY is required when publishing UPDATEs and/or DELETEs. + """ + with sql_client() as c: + qual_name = c.make_qualified_table_name(table_name) + c.execute_sql(f"ALTER TABLE {qual_name} ADD PRIMARY KEY ({column_name});") diff --git a/tests/sql_database/pg_replication/test_pg_replication.py b/tests/sql_database/pg_replication/test_pg_replication.py deleted file mode 100644 index 6e6eed663..000000000 --- a/tests/sql_database/pg_replication/test_pg_replication.py +++ /dev/null @@ -1,157 +0,0 @@ -import pytest - -from typing import Iterator -from copy import deepcopy - -import dlt - -from tests.utils import ( - ALL_DESTINATIONS, - assert_load_info, - load_table_counts, - select_data, -) -from sources.sql_database.pg_replication import table_changes -from sources.sql_database.pg_replication.helpers import ( - init_table_replication, - _gen_table_replication_references, -) - -from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA - - -TABLE_NAME = "items" - - -@pytest.fixture() -def src_pl() -> Iterator[dlt.Pipeline]: - # setup - src_pl = dlt.pipeline( - destination="postgres", dataset_name="src_pl", full_refresh=True - ) - yield src_pl - # teardown - with src_pl.sql_client() as c: - c.drop_dataset() - slot_name, publication_name = _gen_table_replication_references( - TABLE_NAME, src_pl.dataset_name - ) - c.execute_sql(f"SELECT pg_drop_replication_slot('{slot_name}');") - c.execute_sql(f"DROP PUBLICATION IF EXISTS {publication_name};") - with c.with_staging_dataset(staging=True): - c.drop_dataset() - - -@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_all_data_types(src_pl: dlt.Pipeline, destination_name: str): - # resource to load data into postgres source table - @dlt.resource( - name=TABLE_NAME, - primary_key="col1", # will not be physically applied on Postgres table - write_disposition="merge", - columns=TABLE_UPDATE_COLUMNS_SCHEMA, - ) - def items(data): - yield data - - # create postgres table with single record containing all data types - data = TABLE_ROW_ALL_DATA_TYPES - src_pl.run(items(data)) - - # add primary key that serves as REPLICA IDENTITY, necessary when publishing UPDATEs and/or DELETEs - with src_pl.sql_client() as c: - qual_name = c.make_qualified_table_name(TABLE_NAME) - c.execute_sql(f"ALTER TABLE {qual_name} ADD PRIMARY KEY (col1);") - - # excludes dlt system columns from replication - include_columns = data.keys() - - # initialize table replication, persist snapshot for initial load - slot_name, publication_name, table_snapshot = init_table_replication( - table=TABLE_NAME, - schema=src_pl.dataset_name, - persist_snapshot=True, - include_columns=include_columns, - ) - table_snapshot.apply_hints( - columns=TABLE_UPDATE_COLUMNS_SCHEMA - ) # TODO: automatically get column schema from source table? - - # initial load - dest_pl = dlt.pipeline( - destination=destination_name, dataset_name="dest_pl", full_refresh=True - ) - info = dest_pl.run(table_snapshot) - assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 1 - - # insert two records in postgres table - r1 = deepcopy(TABLE_ROW_ALL_DATA_TYPES) - r2 = deepcopy(TABLE_ROW_ALL_DATA_TYPES) - r1["col1"] = 1 - r2["col1"] = 2 - src_pl.run(items([r1, r2])) - - r = table_changes( - table=TABLE_NAME, - primary_key="col1", - include_columns=include_columns, - slot_name=slot_name, - publication_name=publication_name, - ) - info = dest_pl.run(r) - assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 - - # compare observed with expected column types - observed = dest_pl.default_schema.get_table("items")["columns"] - for name, expected in TABLE_UPDATE_COLUMNS_SCHEMA.items(): - assert observed[name]["data_type"] == expected["data_type"] - # postgres bytea does not have precision - if expected.get("precision") is not None and expected["data_type"] != "binary": - assert observed[name]["precision"] == expected["precision"] - - # update two records in postgres table - # this does two deletes and two inserts because dlt implements "merge" and "delete-and-insert" - # as such, postgres will create four replication messages: two of type Delete and two of type Insert - r1["col2"] = 1.5 - r2["col3"] = False - src_pl.run(items([r1, r2])) - - info = dest_pl.run(r) - assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 - - # compare observed records with expected records - qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) - observed = [ - {"col1": row[0], "col2": row[1], "col3": row[2]} - for row in select_data(dest_pl, f"SELECT col1, col2, col3 FROM {qual_name}") - ] - expected = [ - {"col1": 1, "col2": 1.5, "col3": True}, - {"col1": 2, "col2": 898912.821982, "col3": False}, - {"col1": 989127831, "col2": 898912.821982, "col3": True}, - ] - assert sorted(observed, key=lambda d: d["col1"]) == expected - - # now do an actual update, so postgres will create a replication message of type Update - with src_pl.sql_client() as c: - qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) - c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 989127831;") - - # load the change to the destination - info = dest_pl.run(r) - assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 - - # compare observed records with expected records - qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) - observed = [ - {"col1": row[0], "col2": row[1], "col3": row[2]} - for row in select_data( - dest_pl, f"SELECT col1, col2, col3 FROM {qual_name} WHERE col1 = 989127831;" - ) - ] - expected = [{"col1": 989127831, "col2": 2.5, "col3": True}] - assert sorted(observed, key=lambda d: d["col1"]) == expected From 1914acfa205fd82ada77adc33d5ccacc9d103fc6 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 12 Mar 2024 03:48:00 +0100 Subject: [PATCH 06/38] add credentials instruction --- sources/pg_replication/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sources/pg_replication/README.md b/sources/pg_replication/README.md index ba8d2bed9..275a2dc93 100644 --- a/sources/pg_replication/README.md +++ b/sources/pg_replication/README.md @@ -11,3 +11,12 @@ It also needs `CREATE` privilege on the database: ```sql GRANT CREATE ON DATABASE dlt_data TO replication_user; ``` + +## Add credentials +1. Open `.dlt/secrets.toml`. +2. Enter the credentials + + ```toml + [sources.pg_replication] + credentials="postgresql://replication_user:<>@localhost:5432/dlt_data" + ``` \ No newline at end of file From cc6a11d8830ea0f1aef8efe7ebf645b8d45954e2 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 12 Mar 2024 04:13:34 +0100 Subject: [PATCH 07/38] undo adding secret --- sources/.dlt/example.secrets.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/sources/.dlt/example.secrets.toml b/sources/.dlt/example.secrets.toml index 5743ce4a7..a0e8963e0 100644 --- a/sources/.dlt/example.secrets.toml +++ b/sources/.dlt/example.secrets.toml @@ -16,9 +16,6 @@ location = "US" ### Sources [sources] -## local postgres as source -sql_database.credentials="postgresql://loader:loader@localhost:5432/dlt_data" - ## chess pipeline # the section below defines secrets for "chess_dlt_config_example" source in chess/__init__.py [sources.chess] From f815361ea4fbb3aaa0f1823864ccb3d268f27650 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 12 Mar 2024 04:13:43 +0100 Subject: [PATCH 08/38] add module docstring --- sources/pg_replication/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index 21b3f4c68..7b93ed339 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -1,3 +1,5 @@ +"""Replicates postgres tables in batch using logical decoding.""" + from typing import Optional, Sequence, Dict, Iterable import dlt From a318fee0c8aa3119cbaba588fb0cd002802ab629 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Wed, 13 Mar 2024 12:44:40 +0100 Subject: [PATCH 09/38] use from import to prevent AttributeError when running test_dlt_init.py --- sources/pg_replication/schema_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sources/pg_replication/schema_types.py b/sources/pg_replication/schema_types.py index cf79bcda5..835bca0a5 100644 --- a/sources/pg_replication/schema_types.py +++ b/sources/pg_replication/schema_types.py @@ -1,7 +1,7 @@ import json from typing import Optional, Any, Dict -import pypgoutput # type: ignore[import-untyped] +from pypgoutput.decoders import ColumnType # type: ignore[import-untyped] from dlt.common import Decimal from dlt.common.data_types.typing import TDataType @@ -84,7 +84,7 @@ def _to_dlt_column_type(type_id: int, atttypmod: int) -> TColumnType: return mapper.from_db_type(pg_type, precision, scale) -def _to_dlt_column_schema(col: pypgoutput.decoders.ColumnType) -> TColumnSchema: +def _to_dlt_column_schema(col: ColumnType) -> TColumnSchema: """Converts pypgoutput ColumnType to dlt column schema.""" dlt_column_type = _to_dlt_column_type(col.type_id, col.atttypmod) partial_column_schema = { From 8aed399ece6ff7885574e2e811ac11c2fc5ea7bb Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Fri, 15 Mar 2024 16:10:47 +0100 Subject: [PATCH 10/38] enable multiple tables per publication --- sources/pg_replication/__init__.py | 175 +---- sources/pg_replication/helpers.py | 381 ++++------ sources/pg_replication/schema_types.py | 1 - sources/pg_replication_pipeline.py | 46 -- tests/pg_replication/conftest.py | 25 +- tests/pg_replication/test_pg_replication.py | 802 ++++++++++++-------- tests/pg_replication/utils.py | 29 + 7 files changed, 678 insertions(+), 781 deletions(-) delete mode 100644 sources/pg_replication_pipeline.py diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index 7b93ed339..72a8a7a2b 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -1,163 +1,54 @@ """Replicates postgres tables in batch using logical decoding.""" -from typing import Optional, Sequence, Dict, Iterable +from typing import Dict, Sequence, Optional import dlt -from dlt.common.schema.typing import TAnySchemaColumns -from dlt.sources import DltResource +from dlt.common.schema.typing import ( + TTableSchemaColumns, + TAnySchemaColumns, + TWriteDisposition, +) from dlt.sources.credentials import ConnectionStringCredentials +from dlt.extract.resource import DltResource from dlt.extract.typing import TTableHintTemplate -from .helpers import ( - init_table_replication, - table_changes, - snapshot_and_changes, - PgReplicationCredentialsConfiguration, - InitTableReplicationConfig, - ReplicatedTableConfig, -) +from .helpers import _gen_replication_resource_name, get_pub_ops, replication_items -@dlt.sources.config.with_config( - sections=("sources", "pg_replication"), spec=PgReplicationCredentialsConfiguration -) -def replicated_table( - table_name: str, - schema_name: str, +@dlt.sources.config.with_config(sections=("sources", "pg_replication")) +def replication_resource( + slot_name: str, + pub_name: str, credentials: ConnectionStringCredentials = dlt.secrets.value, - columns: TTableHintTemplate[TAnySchemaColumns] = None, - include_columns: Optional[Sequence[str]] = None, - init_conf: Optional[InitTableReplicationConfig] = None, - slot_name: Optional[str] = None, - pub_name: Optional[str] = None, - table_snapshot: Optional[DltResource] = None, + include_columns: Optional[Dict[str, Sequence[str]]] = None, + columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, target_batch_size: int = 1000, flush_slot: bool = True, ) -> DltResource: - """Returns a dlt resource that yields data for a replicated postgres table. - - This resource handles both the one-off initial load and subsequent changes - to the table. The initial load is optional and enabled by setting "persist_snapshot" - to True in `init_conf`, or by providing `table_snapshot` directly. - - Replication initialization is handled automatically if `slot_name` is not provided. - In that case, this function calls `init_table_replication`, which creates a - new dedicated replication slot and publication for the table, or returns the names of - the replication slot and publication if they already exist. - - Args: - table_name (str): Name of the table to replicate. - schema_name (str): Name of the schema the table belongs to. - credentials (ConnectionStringCredentials): Postgres database credentials. - columns (TTableHintTemplate[TAnySchemaColumns]): Column hints for the DltResource. - include_columns (Optional[Sequence[str]]): Sequence of names of - columns to include in the replicated table. Any columns not in the sequence - are excluded. If not provided, all columns are included in the table. - init_conf (Optional[InitTableReplicationConfig]): Dictionary to configure - the initialization of the table replication. Key-value pairs in the dictionary - are passed as keyword arguments to `init_table_replication`. Allowed keys - are "publish", "persist_snapshot", and "reset". - Example: {"publish": "insert, update", "persist_snapshot": True}. - slot_name (Optional[str]): Name of the replication slot to consume replication - messages from. Each table is expected to have a dedicated slot. If not - provided, `init_table_replication` is called, which creates a new replication slot - and publication for the table, or returns the names of the replication slot - and publication if they already exist. - pub_name (Optional[str]): Name of the publication that published DML operations - for the table. Each table is expected to have a dedicated publication. - table_snapshot (Optional[DltResource]): Resource yielding data items from - a snapshot exported during creation of a replication slot. This resource - can be created using `init_table_replication`. Only relevant if the - initialization of the table replication is not handled by this function. - target_batch_size (int): Desired number of data items (including metadata - items) yielded in a batch. This argument can be used to limit the data - items in memory. Note that the number of data items yielded can be (far) - greater than `min_batch_size`, because all messages belonging to the same - transaction are always processed in the same batch, regardless of the number - of messages in the transaction and regardless of the value of `min_batch_size`. - The number of data items can also be smaller than `min_batch_size` when - the replication slot is exhausted before a batch is full. This argument - is passed to `table_changes` and does not apply to the `table_snapshot` resource. - flush_slot (bool): Whether processed messages are discarded from the replication - slot. The recommended value is True. Be careful when setting this argument - to False—not flushing can eventually lead to a “disk full” condition on the server, - because the server retains all the WAL segments that might be needed to - stream the changes via all of the currently open replication slots. + write_disposition: TWriteDisposition = "append" + rep_cols: TTableSchemaColumns = {"lsn": {"data_type": "bigint"}} + resource_name = _gen_replication_resource_name(slot_name, pub_name) - Returns: - DltResource that yields data items for the initial load and subsequent - changes in the postgres table. - """ - if slot_name is None: - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=table_name, - schema_name=schema_name, - columns=columns, - include_columns=include_columns, - **(dict() if init_conf is None else init_conf) - ) + pub_ops = get_pub_ops(pub_name, credentials) + if pub_ops["update"] or pub_ops["delete"]: + write_disposition = "merge" + rep_cols["lsn"]["dedup_sort"] = "desc" + if pub_ops["delete"]: + rep_cols["deleted_ts"] = {"hard_delete": True, "data_type": "timestamp"} - changes = table_changes( + return dlt.resource( + replication_items, + name=resource_name, + write_disposition=write_disposition, + columns=rep_cols, + )( credentials=credentials, - table_name=table_name, - schema_name=schema_name, - include_columns=include_columns, - target_batch_size=target_batch_size, slot_name=slot_name, pub_name=pub_name, + include_columns=include_columns, + columns=columns, + target_batch_size=target_batch_size, flush_slot=flush_slot, + write_disposition=write_disposition, ) - - # include `dedup_sort` and `hard_delete` hints defined on table_changes resource - if columns is None: - columns = changes.columns - else: - columns = {**columns, **changes.columns} # type: ignore[dict-item] - - # return combined resource that first yields from table_snapshot, then from table_changes - resource_name = schema_name + "_" + table_name - return dlt.resource( - # combined, - snapshot_and_changes, - name=resource_name, - table_name=table_name, - write_disposition=changes.write_disposition, - columns=columns, - primary_key=changes._hints.get("primary_key"), - )(resource_name, table_snapshot, changes) - - -@dlt.source(name="pg_replication") -def pg_replication_source( - table_names: Sequence[str] = dlt.config.value, - schema_name: str = dlt.config.value, - credentials: ConnectionStringCredentials = dlt.secrets.value, - conf: Optional[Dict[str, ReplicatedTableConfig]] = None, -) -> Iterable[DltResource]: - """A dlt source that yields resources for one or more replicated postgres tables. - - Args: - table_names (Sequence[str]): Sequences of names of tables to replicate. - schema_name (str): Name of the schema the tables belong to. - credentials (ConnectionStringCredentials): Postgres database credentials. - conf (Dict[str, ReplicatedTableConfig]): Mapping from table names to - ReplicatedTableConfig objects, which are configuration dictionaries for - the resources created for individual tables. Key-value pairs in the dictionary - are passed as keyword arguments to `replicated_table`. Allowed keys - are "columns", "include_columns", "target_batch_size", "init_conf", "slot_name", - "pub_name", "flush_slot", and "table_snapshot". See the documentation for - the `replicated_table` function for an explanation of these arguments. - Example: {"table_x": {"include_columns": ["id", "val"]}, "table_y": - {"init_conf": {"publish": "insert", "persist_snapshot": True}}}. - - Yields: - A DltResource for each replicated postgres table. - """ - for table_name in table_names: - yield replicated_table( - table_name=table_name, - schema_name=schema_name, - credentials=credentials, - **(dict() if conf is None or table_name not in conf else conf[table_name]) - ) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index d4777340f..a4f8fc93c 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -4,15 +4,13 @@ Iterator, Union, List, - Tuple, Sequence, Any, - TypedDict, ) -from copy import deepcopy from dataclasses import dataclass, field import psycopg2 +from psycopg2.extensions import cursor from psycopg2.extras import ( LogicalReplicationConnection, ReplicationCursor, @@ -40,7 +38,9 @@ TTableSchemaColumns, TAnySchemaColumns, TColumnNames, + TWriteDisposition, ) +from dlt.common.schema.utils import get_columns_names_with_prop from dlt.common.data_writers.escape import escape_postgres_identifier from dlt.common.configuration.specs import BaseConfiguration, configspec from dlt.extract.typing import DataItemWithMeta, TTableHintTemplate @@ -53,219 +53,95 @@ from sql_database import sql_table from .schema_types import _to_dlt_column_schema, _to_dlt_val -from .exceptions import NoPrimaryKeyException - - -@configspec -class PgReplicationCredentialsConfiguration(BaseConfiguration): - credentials: ConnectionStringCredentials - - -class InitTableReplicationConfig(TypedDict, total=False): - publish: str - persist_snapshot: bool - reset: bool - - -class ReplicatedTableConfig(TypedDict, total=False): - columns: TTableHintTemplate[TAnySchemaColumns] - include_columns: Optional[Sequence[str]] - target_batch_size: int - init_conf: Optional[InitTableReplicationConfig] - slot_name: Optional[str] - pub_name: Optional[str] - flush_slot: bool - table_snapshot: Optional[DltResource] @dlt.sources.config.with_config(sections=("sources", "pg_replication")) -def init_table_replication( - table_name: str, +def init_replication( + table_names: Union[str, Sequence[str]], schema_name: str, + slot_name: str, + pub_name: str, credentials: ConnectionStringCredentials = dlt.secrets.value, publish: str = "insert, update, delete", - persist_snapshot: bool = False, - columns: TTableHintTemplate[TAnySchemaColumns] = None, - include_columns: Optional[Sequence[str]] = None, + persist_snapshots: bool = False, + include_columns: Optional[Dict[str, Sequence[str]]] = None, + columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, reset: bool = False, -) -> Tuple[str, str, Optional[DltResource]]: - """Initializes replication for a table. - - Creates a replication slot and publication dedicated to the table if they do not exist yet. - Does nothing if the slot and publication already exist and `reset` is set to `False`. - - Args: - credentials (ConnectionStringCredentials): Postgres database credentials. - table_name (str): Name of the table to initialize replication for. - schema_name (str): Name of the schema the table belongs to. - publish (str): Comma-separated string of DML operations. Can be used to - control which changes are included in the publication. Allowed operations - are `insert`, `update`, and `delete`. `truncate` is currently not - supported—messages of that type are ignored. - E.g. `publish="insert"` will create a publication that only publishes insert operations. - persist_snapshot (bool): Whether the table state in the exported snapshot - is persisted to a table. If true, a snapshot table is created in Postgres - and a DltResource object for this table is returned. The DltResource - can be used to perform an initial load of all data present in the table - at the moment the replication slot got created. - include_columns (Optional[Sequence[str]]): Optional sequence of names of - columns to include in the snapshot table. Any columns not in the sequence - are excluded. If not provided, all columns are included in the table. - reset (bool): Whether replication for the table is reset. Only relevant - if a replication slot and publication already exist for the table. If - set to True, the existing slot and publication are dropped and recreated. - - Returns: - Tuple with the names of the created slot and publication, and optionally - a table snapshot resource. - """ - slot_name, pub_name = _gen_table_replication_references(table_name, schema_name) +) -> Optional[List[DltResource]]: + if isinstance(table_names, str): + table_names = [table_names] cur = _get_rep_conn(credentials).cursor() if reset: drop_replication_slot(slot_name, cur) drop_publication(pub_name, cur) create_publication(pub_name, cur, publish) - add_table_to_publication(table_name, schema_name, pub_name, cur) - rep_slot = create_replication_slot(slot_name, cur) - if persist_snapshot: - if rep_slot is None: - logger.warning( - "Cannot persist snapshot because it does not exist. " + add_tables_to_publication(table_names, schema_name, pub_name, cur) + slot = create_replication_slot(slot_name, cur) + if persist_snapshots: + if slot is None: + logger.info( + "Cannot persist snapshots because they do not exist. " f'The replication slot "{slot_name}" already existed prior to calling this function.' ) else: # need separate session to read the snapshot: https://stackoverflow.com/q/75852587 cur_snap = _get_conn(credentials).cursor() - snapshot_table_name = persist_snapshot_table( - snapshot_name=rep_slot["snapshot_name"], - table_name=table_name, - schema_name=schema_name, - cur=cur_snap, - include_columns=include_columns, - ) - resource = sql_table( - credentials=credentials, - table=snapshot_table_name, - schema=schema_name, - detect_precision_hints=True, - ) - resource.apply_hints( - table_name=table_name, - write_disposition="append" if publish == "insert" else "merge", - columns=columns, - ) - return (slot_name, pub_name, resource) - return (slot_name, pub_name, None) + snapshot_table_names = [ + persist_snapshot_table( + snapshot_name=slot["snapshot_name"], + table_name=table_name, + schema_name=schema_name, + cur=cur_snap, + include_columns=None + if include_columns is None + else include_columns.get(table_name), + ) + for table_name in table_names + ] + snapshot_table_resources = [ + snapshot_table_resource( + snapshot_table_name=snapshot_table_name, + schema_name=schema_name, + table_name=table_name, + write_disposition="append" if publish == "insert" else "merge", + columns=None if columns is None else columns.get(table_name), + credentials=credentials, + ) + for table_name, snapshot_table_name in zip( + table_names, snapshot_table_names + ) + ] + return snapshot_table_resources + return None -@dlt.sources.config.with_config(sections=("sources", "pg_replication")) -def table_changes( - table_name: str, +def snapshot_table_resource( + snapshot_table_name: str, schema_name: str, + table_name: str, + write_disposition: TWriteDisposition, + columns: TTableHintTemplate[TAnySchemaColumns] = None, credentials: ConnectionStringCredentials = dlt.secrets.value, - include_columns: Optional[Sequence[str]] = None, - target_batch_size: int = 1000, - slot_name: Optional[str] = None, - pub_name: Optional[str] = None, - flush_slot: bool = True, ) -> DltResource: - """Returns a dlt resource that yields data items for changes in a postgres table. - - Relies on a dedicated replication slot and publication that publishes DML - operations (i.e. `insert`, `update`, and/or `delete`) for the table (helper - method `init_table_replication` can be used to set this up). - Uses `merge` write disposition to merge changes into destination table(s). - - Args: - credentials (ConnectionStringCredentials): Postgres database credentials. - table_name (str): Name of the table that is replicated - primary_key (TColumnNames): Names of one or multiple columns serving as - primary key on the table. Used to deduplicate data items in the `merge` - operation. - include_columns (Optional[Sequence[str]]): Optional sequence of names of - columns to include in the generated data itemes. Any columns not in the - sequence are excluded. If not provided, all columns are included. - target_batch_size (int): Desired number of data items (including metadata - items) yielded in a batch. This argument can be used to limit the data - items in memory. Note that the number of data items yielded can be (far) - greater than `min_batch_size`, because all messages belonging to the same - transaction are always processed in the same batch, regardless of the number - of messages in the transaction and regardless of the value of `min_batch_size`. - The number of data items can also be smaller than `min_batch_size` when - the replication slot is exhausted before a batch is full. - slot_name (str): Name of the replication slot to consume replication - messages from. Each table is expected to have a dedicated slot. - pub_name (str): Name of the publication that published DML operations - for the table. Each table is expected to have a dedicated publication. - flush_slot (bool): Whether processed messages are discarded from the replication - slot. The recommended value is True. Be careful when setting this argument - to False—not flushing can eventually lead to a “disk full” condition on the server, - because the server retains all the WAL segments that might be needed to - stream the changes via all of the currently open replication slots. - - Returns: - DltResource that yields data items for changes in the postgres table. - """ - write_disposition = "append" - columns: TTableSchemaColumns = {"lsn": {"data_type": "bigint"}} + resource: DltResource = sql_table( + credentials=credentials, + table=snapshot_table_name, + schema=schema_name, + detect_precision_hints=True, + ) primary_key = _get_pk(table_name, schema_name, credentials) - - if slot_name is None: - slot_name, pub_name = _gen_table_replication_references(table_name, schema_name) - if pub_name is not None: - pub_ops = get_pub_ops(pub_name, credentials) - if pub_ops["update"] or pub_ops["delete"]: - if primary_key is None: - raise NoPrimaryKeyException( - f'Table "{schema_name}.{table_name}" does not have a primary key. ' - "A primary key is required as REPLICA IDENTITY when INSERT " - "and/or DELETE operations are published." - ) - write_disposition = "merge" - columns["lsn"]["dedup_sort"] = "desc" - if pub_ops["delete"]: - columns["deleted_ts"] = {"hard_delete": True, "data_type": "timestamp"} - - return dlt.resource( # type: ignore[no-any-return, call-overload] - table_replication_items, - name=schema_name + "_" + table_name, + resource.apply_hints( table_name=table_name, write_disposition=write_disposition, - primary_key=primary_key, columns=columns, - )( - credentials=credentials, - slot_name=slot_name, - pub_name=pub_name, - include_columns=include_columns, - target_batch_size=target_batch_size, - flush_slot=flush_slot, - ) - - -def snapshot_and_changes( - resource_name: str, - table_snapshot: Optional[DltResource], - table_changes: DltResource, -) -> Iterator[Union[TDataItem, DataItemWithMeta]]: - """Generator for data items for a replicated postgres table. - - First yields from snapshot resource (if provided and if not yielded before) - to do the initial load, then from changes resource to process subsequent DML - operations on the table. - """ - snapshot_yielded = dlt.current.resource_state(resource_name).setdefault( - "snapshot_yielded", False + primary_key=primary_key, ) - if table_snapshot is not None and not snapshot_yielded: - yield from table_snapshot - dlt.current.resource_state(resource_name)["snapshot_yielded"] = True - yield from table_changes + return resource def create_publication( name: str, - cur: ReplicationCursor, + cur: cursor, publish: str = "insert, update, delete", ) -> None: """Creates a publication for logical replication if it doesn't exist yet. @@ -287,7 +163,7 @@ def add_table_to_publication( table_name: str, schema_name: str, pub_name: str, - cur: ReplicationCursor, + cur: cursor, ) -> None: """Adds a table to a publication for logical replication if the table is not a member yet. @@ -304,6 +180,18 @@ def add_table_to_publication( pass +def add_tables_to_publication( + table_names: Union[str, Sequence[str]], + schema_name: str, + pub_name: str, + cur: cursor, +) -> None: + if isinstance(table_names, str): + table_names = table_names + for table_name in table_names: + add_table_to_publication(table_name, schema_name, pub_name, cur) + + def create_replication_slot( # type: ignore[return] name: str, cur: ReplicationCursor, output_plugin: str = "pgoutput" ) -> Optional[Dict[str, str]]: @@ -328,9 +216,9 @@ def drop_replication_slot(name: str, cur: ReplicationCursor) -> None: """Drops a replication slot if it exists.""" try: cur.drop_replication_slot(name) - logger.warning(f'Successfully dropped replication slot "{name}".') + logger.info(f'Successfully dropped replication slot "{name}".') except psycopg2.errors.UndefinedObject: # the replication slot does not exist - logger.warning( + logger.info( f'Replication slot "{name}" cannot be dropped because it does not exist.' ) @@ -341,9 +229,9 @@ def drop_publication(name: str, cur: ReplicationCursor) -> None: try: cur.execute(f"DROP PUBLICATION {esc_name};") cur.connection.commit() - logger.warning(f"Successfully dropped publication {esc_name}.") + logger.info(f"Successfully dropped publication {esc_name}.") except psycopg2.errors.UndefinedObject: # the publication does not exist - logger.warning( + logger.info( f"Publication {esc_name} cannot be dropped because it does not exist." ) @@ -352,7 +240,7 @@ def persist_snapshot_table( snapshot_name: str, table_name: str, schema_name: str, - cur: psycopg2.extensions.cursor, + cur: cursor, include_columns: Optional[Sequence[str]] = None, ) -> str: """Persists exported snapshot table state. @@ -450,13 +338,15 @@ def advance_slot( cur.connection.close() -def table_replication_items( +def replication_items( credentials: ConnectionStringCredentials, slot_name: str, pub_name: str, - include_columns: Optional[Sequence[str]] = None, + include_columns: Optional[Dict[str, Sequence[str]]] = None, + columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, target_batch_size: int = 1000, flush_slot: bool = True, + write_disposition: TWriteDisposition = "append", # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released ) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Yields data items from generator. @@ -465,7 +355,8 @@ def table_replication_items( Processes in batches to limit memory usage. """ # start where we left off in previous run - start_lsn = dlt.current.resource_state().get("last_commit_lsn", 0) + resource_name = _gen_replication_resource_name(slot_name, pub_name) + start_lsn = dlt.current.resource_state(resource_name).get("last_commit_lsn", 0) if flush_slot: advance_slot(start_lsn, slot_name, credentials) @@ -481,14 +372,18 @@ def table_replication_items( credentials=credentials, slot_name=slot_name, options=options, - start_lsn=start_lsn, upto_lsn=upto_lsn, + start_lsn=start_lsn, target_batch_size=target_batch_size, include_columns=include_columns, + columns=columns, + write_disposition=write_disposition, # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released ) yield from gen if gen.generated_all: - dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + dlt.current.resource_state(resource_name)[ + "last_commit_lsn" + ] = gen.last_commit_lsn break start_lsn = gen.last_commit_lsn @@ -524,13 +419,8 @@ def _make_qualified_table_name(table_name: str, schema_name: str) -> str: ) -def _gen_table_replication_references( - table_name: str, schema_name: str -) -> Tuple[str, str]: - """Generates replication slot and publication names dedicated to a single table.""" - slot_name = f"_dlt_slot_{schema_name}_{table_name}" - pub_name = f"_dlt_pub_{schema_name}_{table_name}" - return (slot_name, pub_name) +def _gen_replication_resource_name(slot_name: str, pub_name: str) -> str: + return slot_name + "_" + pub_name def _get_pk( @@ -568,12 +458,14 @@ class ItemGenerator: credentials: ConnectionStringCredentials slot_name: str options: Dict[str, str] + upto_lsn: int start_lsn: int = 0 - upto_lsn: Optional[int] = None target_batch_size: int = 1000 - include_columns: Optional[Sequence[str]] = None + include_columns: Optional[Dict[str, Sequence[str]]] = (None,) # type: ignore[assignment] + columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = (None,) # type: ignore[assignment] last_commit_lsn: Optional[int] = field(default=None, init=False) generated_all: bool = False + write_disposition: TWriteDisposition = "append" # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Consumes messages from replication slot and generates data items. @@ -590,7 +482,10 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: options=self.options, ) consumer = MessageConsumer( - self.upto_lsn, self.target_batch_size, self.include_columns + upto_lsn=self.upto_lsn, + target_batch_size=self.target_batch_size, + include_columns=self.include_columns, + columns=self.columns, ) cur.consume_stream(consumer) except StopReplication: # completed batch or reached `upto_lsn` @@ -599,32 +494,37 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: cur.connection.close() self.last_commit_lsn = consumer.last_commit_lsn for i in consumer.data_items: + i.meta.hints[ + "write_disposition" + ] = ( + self.write_disposition + ) # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released yield i self.generated_all = consumer.consumed_all class MessageConsumer: - """Consumes messages from a ReplicationCursor. - - Assumes all messages passed to __call__ have the same relation_id, i.e. they - belong to the same table. - """ + """Consumes messages from a ReplicationCursor.""" def __init__( self, upto_lsn: int, target_batch_size: int = 1000, - include_columns: Optional[Sequence[str]] = None, + include_columns: Optional[Dict[str, Sequence[str]]] = None, + columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, ) -> None: self.upto_lsn = upto_lsn self.target_batch_size = target_batch_size self.include_columns = include_columns + self.columns = columns self.consumed_all: bool = False # data_items attribute maintains all data items self.data_items: List[Union[TDataItem, DataItemWithMeta]] = [] # other attributes only maintain last-seen values - self.last_table_schema: TTableSchema + self.last_table_schema: Dict[ + int, TTableSchema + ] = dict() # maps relation_id to table schema self.last_commit_ts: pendulum.DateTime self.last_commit_lsn = None @@ -667,16 +567,14 @@ def process_msg(self, msg: ReplicationMessage) -> None: def process_relation(self, decoded_msg: Relation) -> None: """Processes a replication message of type Relation. - Stores table schema information from Relation message in object state - and adds meta data item to the relation's list of data items to update - the table schema. + Stores table schema information from Relation message in object state. """ # store table schema information columns = {c.name: _to_dlt_column_schema(c) for c in decoded_msg.columns} - self.last_table_schema = {"name": decoded_msg.relation_name, "columns": columns} - # add meta data item to update table schema - meta_data_item = self.gen_meta_data_item(columns, self.include_columns) - self.data_items.append(meta_data_item) + self.last_table_schema[decoded_msg.relation_id] = { + "name": decoded_msg.relation_name, + "columns": columns, + } def process_change( self, decoded_msg: Union[Insert, Update, Delete], msg_start_lsn: int @@ -689,27 +587,36 @@ def process_change( column_data = decoded_msg.new_tuple.column_data elif isinstance(decoded_msg, Delete): column_data = decoded_msg.old_tuple.column_data + table_name = self.last_table_schema[decoded_msg.relation_id]["name"] data_item = self.gen_data_item( + table_name=self.last_table_schema[decoded_msg.relation_id]["name"], data=column_data, - schema=self.last_table_schema["columns"], + column_schema=self.last_table_schema[decoded_msg.relation_id]["columns"], lsn=msg_start_lsn, commit_ts=self.last_commit_ts, for_delete=isinstance(decoded_msg, Delete), - include_columns=self.include_columns, + include_columns=None + if self.include_columns is None + else self.include_columns.get(table_name), + column_hints=None if self.columns is None else self.columns.get(table_name), ) self.data_items.append(data_item) @staticmethod def gen_data_item( + table_name: str, data: List[ColumnData], - schema: TTableSchemaColumns, + column_schema: TTableSchemaColumns, lsn: int, commit_ts: pendulum.DateTime, for_delete: bool, include_columns: Optional[Sequence[str]] = None, + column_hints: TTableHintTemplate[TAnySchemaColumns] = None, ) -> TDataItem: """Generates data item from replication message data and corresponding metadata.""" - columns = zip(schema.values(), data) + pairs = zip(column_schema.values(), data) + if include_columns is not None: + pairs = [(schema, data) for (schema, data) in pairs if schema["name"] in include_columns] # type: ignore[assignment] data_item = { schema["name"]: _to_dlt_val( val=data.col_data, @@ -717,26 +624,18 @@ def gen_data_item( byte1=data.col_data_category, for_delete=for_delete, ) - for (schema, data) in columns + for (schema, data) in pairs } - if include_columns is not None: - data_item = {k: v for k, v in data_item.items() if k in include_columns} data_item["lsn"] = lsn if for_delete: data_item["deleted_ts"] = commit_ts - return data_item - - @staticmethod - def gen_meta_data_item( - columns: TTableSchemaColumns, - include_columns: Optional[Sequence[str]] = None, - ) -> DataItemWithMeta: - """Returns a data item containing only metadata, no data.""" - _columns = deepcopy(columns) - if include_columns is not None: - _columns = {k: v for k, v in columns.items() if k in include_columns} return dlt.mark.with_hints( - [], - # write disposition needs to be explicitly set, else it defaults to "append" - dlt.mark.make_hints(write_disposition="merge", columns=_columns), + data_item, + dlt.mark.make_hints( + table_name=table_name, + primary_key=get_columns_names_with_prop( + {"columns": column_schema}, "primary_key" + ), + columns=column_hints, + ), ) diff --git a/sources/pg_replication/schema_types.py b/sources/pg_replication/schema_types.py index 835bca0a5..0e533b11a 100644 --- a/sources/pg_replication/schema_types.py +++ b/sources/pg_replication/schema_types.py @@ -90,7 +90,6 @@ def _to_dlt_column_schema(col: ColumnType) -> TColumnSchema: partial_column_schema = { "name": col.name, "primary_key": bool(col.part_of_pkey), - # "nullable": not bool(col.part_of_pkey), } return {**dlt_column_type, **partial_column_schema} # type: ignore[typeddict-item] diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py deleted file mode 100644 index 3b68a9771..000000000 --- a/sources/pg_replication_pipeline.py +++ /dev/null @@ -1,46 +0,0 @@ -import dlt - -from pg_replication import pg_replication_source, replicated_table - - -def replicate_single_table() -> None: - pipeline = dlt.pipeline( - pipeline_name="pg_replication_pipeline", - destination="duckdb", - dataset_name="postgres_table", - ) - - table_resource = replicated_table( - table_name="your_table", - schema_name="your_schema", - init_conf={"persist_snapshot": True}, # this enables an initial load - ) - - info = pipeline.run(table_resource) - print(info) - - -def replicate_multiple_tables() -> None: - pipeline = dlt.pipeline( - pipeline_name="pg_replication_pipeline", - destination="duckdb", - dataset_name="postgres_tables", - ) - - replication_source = pg_replication_source( - table_names=["table_x", "table_y", "table_z"], - schema_name="your_schema", - conf={ - "table_x": {"include_columns": ["col_1", "col_2"]}, - "table_y": {"init_conf": {"publish": "insert"}}, - "table_z": {"init_conf": {"persist_snapshot": True}}, - }, - ) - - info = pipeline.run(replication_source) - print(info) - - -if __name__ == "__main__": - replicate_single_table() - # replicate_multiple_tables() diff --git a/tests/pg_replication/conftest.py b/tests/pg_replication/conftest.py index 2d0e25107..0878453ad 100644 --- a/tests/pg_replication/conftest.py +++ b/tests/pg_replication/conftest.py @@ -5,9 +5,6 @@ import dlt -TABLE_NAME = "items" - - @pytest.fixture() def src_pl() -> Iterator[dlt.Pipeline]: # setup @@ -27,21 +24,7 @@ def src_pl() -> Iterator[dlt.Pipeline]: c.drop_dataset() except Exception as e: print(e) - # drop replication slots - slot_names = [ - tup[0] - for tup in c.execute_sql( - f"SELECT slot_name FROM pg_replication_slots WHERE slot_name LIKE '_dlt_slot_{src_pl.dataset_name}_%'" - ) - ] - for slot_name in slot_names: - c.execute_sql(f"SELECT pg_drop_replication_slot('{slot_name}');") - # drop publications - pub_names = [ - tup[0] - for tup in c.execute_sql( - f"SELECT pubname FROM pg_publication WHERE pubname LIKE '_dlt_pub_{src_pl.dataset_name}_%'" - ) - ] - for pub_name in pub_names: - c.execute_sql(f"DROP PUBLICATION IF EXISTS {pub_name};") + # drop replication slot + c.execute_sql("SELECT pg_drop_replication_slot('test_slot');") + # drop publication + c.execute_sql("DROP PUBLICATION IF EXISTS test_pub;") diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 400779bf3..9d38ec302 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -3,186 +3,294 @@ from copy import deepcopy import dlt -from dlt.extract.resource import DltResource +from dlt.destinations.job_client_impl import SqlJobClientBase from tests.utils import ( ALL_DESTINATIONS, assert_load_info, load_table_counts, - select_data, -) -from sources.pg_replication import ( - table_changes, - replicated_table, - pg_replication_source, -) -from sources.pg_replication.helpers import ( - init_table_replication, - _gen_table_replication_references, ) +from sources.pg_replication import replication_resource +from sources.pg_replication.helpers import init_replication from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA -from .conftest import TABLE_NAME -from .utils import add_pk +from .utils import add_pk, assert_loaded_data -@pytest.mark.parametrize("persist_snapshot", [True, False]) -def test_init_table_replication(src_pl: dlt.Pipeline, persist_snapshot: bool): - # resource to load data into postgres source table - @dlt.resource(table_name=TABLE_NAME, primary_key="id", write_disposition="merge") - def items(): - yield {"id": 1, "val": True} +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_core_functionality(src_pl: dlt.Pipeline, destination_name: str) -> None: + @dlt.resource(write_disposition="merge", primary_key="id_x") + def tbl_x(data): + yield data - # create postgres table with single record - src_pl.run(items()) + @dlt.resource(write_disposition="merge", primary_key="id_y") + def tbl_y(data): + yield data - # initialize table replication for table_x, persist snapshot for initial load - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=TABLE_NAME, + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": True}), + ] + ) + add_pk(src_pl.sql_client, "tbl_x", "id_x") + add_pk(src_pl.sql_client, "tbl_y", "id_y") + + slot_name = "test_slot" + pub_name = "test_pub" + + snapshots = init_replication( + table_names=("tbl_x", "tbl_y"), schema_name=src_pl.dataset_name, - persist_snapshot=persist_snapshot, + slot_name=slot_name, + pub_name=pub_name, + persist_snapshots=True, ) - expected_slot_name, expected_pub_name = _gen_table_replication_references( - TABLE_NAME, src_pl.dataset_name + + changes = replication_resource(slot_name, pub_name) + + src_pl.run( + [ + tbl_x([{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}]), + tbl_y({"id_y": 2, "val_y": False}), + ] ) - assert slot_name == expected_slot_name - assert pub_name == expected_pub_name - if persist_snapshot: - assert isinstance(table_snapshot, DltResource) - else: - assert table_snapshot is None - - # initialize table replication again - # method should return names of existing replication slot and publication name - # `table_snapshot` should be None—also when `persist_snapshot` is True, because - # a snapshot is only created when the slot is created - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=TABLE_NAME, - schema_name=src_pl.dataset_name, - persist_snapshot=persist_snapshot, + + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True ) - assert slot_name == expected_slot_name - assert pub_name == expected_pub_name - assert table_snapshot is None - - # initialize table replication again, now use `reset` arg to drop and - # recreate the slot and publication - # since there is a new slot, a `table_snapshot` should be returned when - # `persist_snapshot` is True - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=TABLE_NAME, + + # initial load + info = dest_pl.run(snapshots) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 1, "tbl_y": 1} + exp_tbl_x = [{"id_x": 1, "val_x": "foo"}] + exp_tbl_y = [{"id_y": 1, "val_y": True}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 2} + exp_tbl_x = [ + {"id_x": 1, "val_x": "foo"}, + {"id_x": 2, "val_x": "bar"}, + {"id_x": 3, "val_x": "baz"}, + ] + exp_tbl_y = [{"id_y": 1, "val_y": True}, {"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # change single table + src_pl.run(tbl_y({"id_y": 3, "val_y": True})) + + # process changes + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 3} + exp_tbl_y = [ + {"id_y": 1, "val_y": True}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # update tables + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"UPDATE {qual_name} SET val_x = 'foo_updated' WHERE id_x = 1;") + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_y") + c.execute_sql(f"UPDATE {qual_name} SET val_y = false WHERE id_y = 1;") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 3} + exp_tbl_x = [ + {"id_x": 1, "val_x": "foo_updated"}, + {"id_x": 2, "val_x": "bar"}, + {"id_x": 3, "val_x": "baz"}, + ] + exp_tbl_y = [ + {"id_y": 1, "val_y": False}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # delete from table + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id_x = 1;") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 2, "tbl_y": 3} + exp_tbl_x = [{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [ + {"id_y": 1, "val_y": False}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_without_init_load(src_pl: dlt.Pipeline, destination_name: str) -> None: + @dlt.resource(write_disposition="merge", primary_key="id_x") + def tbl_x(data): + yield data + + @dlt.resource(write_disposition="merge", primary_key="id_y") + def tbl_y(data): + yield data + + # create postgres table + # since we're skipping initial load, these records should not be in the replicated table + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": True}), + ] + ) + add_pk(src_pl.sql_client, "tbl_x", "id_x") + add_pk(src_pl.sql_client, "tbl_y", "id_y") + + # initialize replication and create resource for changes + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + table_names=("tbl_x", "tbl_y"), schema_name=src_pl.dataset_name, - persist_snapshot=persist_snapshot, - reset=True, + slot_name=slot_name, + pub_name=pub_name, ) - expected_slot_name, expected_pub_name = _gen_table_replication_references( - TABLE_NAME, src_pl.dataset_name + changes = replication_resource(slot_name, pub_name) + + # change postgres table after replication has been initialized + # these records should be in the replicated table + src_pl.run( + [ + tbl_x([{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}]), + tbl_y({"id_y": 2, "val_y": False}), + ] ) - assert slot_name == expected_slot_name - assert pub_name == expected_pub_name - if persist_snapshot: - assert isinstance(table_snapshot, DltResource) - else: - assert table_snapshot is None + # load changes to destination and assert expectations + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 2, "tbl_y": 1} + exp_tbl_x = [{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [{"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") -@pytest.mark.parametrize("publish", ["insert", "insert, update, delete"]) -def test_write_disposition(src_pl: dlt.Pipeline, publish: str): - # resource to load data into postgres source table - @dlt.resource(name=TABLE_NAME, primary_key="id", write_disposition="merge") + # delete from table + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id_x = 2;") + + # process change and assert expectations + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 1, "tbl_y": 1} + exp_tbl_x = [{"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [{"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + +def test_insert_only(src_pl: dlt.Pipeline) -> None: def items(data): yield data # create postgres table with single record - src_pl.run(items({"id": 1, "val": True})) - - if publish == "insert, update, delete": - add_pk(src_pl.sql_client, TABLE_NAME, "id") + src_pl.run(items({"id": 1, "foo": "bar"})) - # initialize replication, create resources for snapshot and changes - _, _, table_snapshot = init_table_replication( - table_name=TABLE_NAME, + # initialize replication and create resource for changes + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + table_names="items", schema_name=src_pl.dataset_name, - publish=publish, - persist_snapshot=True, + slot_name=slot_name, + pub_name=pub_name, + publish="insert", ) - changes = table_changes(table_name=TABLE_NAME, schema_name=src_pl.dataset_name) + changes = replication_resource(slot_name, pub_name) - # assert write dispositions - expected_write_disposition = "append" if publish == "insert" else "merge" - assert table_snapshot.write_disposition == expected_write_disposition - assert changes.write_disposition == expected_write_disposition + # insert a record in postgres table + src_pl.run(items({"id": 2, "foo": "bar"})) - # also check replicated_table resource - rep_tbl = replicated_table( - table_name=TABLE_NAME, - schema_name=src_pl.dataset_name, - init_conf={"publish": publish, "persist_snapshot": True, "reset": True}, - ) - assert rep_tbl.write_disposition == expected_write_disposition + # extract items from resource + dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + + # do an update and a delete—these operations should not lead to items in the resource + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("items") + c.execute_sql(f"UPDATE {qual_name} SET foo = 'baz' WHERE id = 2;") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id = 2;") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"] == [] @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @pytest.mark.parametrize("give_hints", [True, False]) -@pytest.mark.parametrize("explicit_init", [True, False]) +@pytest.mark.parametrize("init_load", [True, False]) def test_all_data_types( src_pl: dlt.Pipeline, destination_name: str, give_hints: bool, - explicit_init: bool, -): + init_load: bool, +) -> None: data = deepcopy(TABLE_ROW_ALL_DATA_TYPES) column_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) # resource to load data into postgres source table - @dlt.resource( - name=TABLE_NAME, - primary_key="col1", - write_disposition="merge", - columns=column_schema, - ) + @dlt.resource(primary_key="col1", write_disposition="merge", columns=column_schema) def items(data): yield data # create postgres table with single record containing all data types src_pl.run(items(data)) - add_pk(src_pl.sql_client, TABLE_NAME, "col1") - - # excludes dlt system columns from replication - include_columns = data.keys() - - if explicit_init: - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=TABLE_NAME, - schema_name=src_pl.dataset_name, - columns=column_schema if give_hints else None, - include_columns=include_columns, - persist_snapshot=True, - ) - else: - # init will be done inside `replicated_table` function - slot_name = None - pub_name = None - table_snapshot = None - - rep_tbl = replicated_table( - table_name=TABLE_NAME, + add_pk(src_pl.sql_client, "items", "col1") + + # initialize replication and create resources + slot_name = "test_slot" + pub_name = "test_pub" + snapshots = init_replication( + table_names="items", schema_name=src_pl.dataset_name, - columns=column_schema if give_hints else None, - include_columns=include_columns, - init_conf={"persist_snapshot": True}, slot_name=slot_name, pub_name=pub_name, - table_snapshot=table_snapshot, + persist_snapshots=init_load, + columns={"items": column_schema} if give_hints else None, + ) + + changes = replication_resource( + slot_name=slot_name, + pub_name=pub_name, + columns={"items": column_schema} if give_hints else None, ) # initial load dest_pl = dlt.pipeline( pipeline_name="dest_pl", destination=destination_name, full_refresh=True ) - info = dest_pl.run(rep_tbl) - assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 1 + if init_load: + info = dest_pl.run(snapshots[0]) + assert_load_info(info) + assert load_table_counts(dest_pl, "items")["items"] == 1 # insert two records in postgres table r1 = deepcopy(data) @@ -191,9 +299,9 @@ def items(data): r2["col1"] = 2 src_pl.run(items([r1, r2])) - info = dest_pl.run(rep_tbl) + info = dest_pl.run(changes) assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 if give_hints: # compare observed with expected column types @@ -214,265 +322,299 @@ def items(data): r2["col3"] = False src_pl.run(items([r1, r2])) - info = dest_pl.run(rep_tbl) + # process changes and assert expectations + info = dest_pl.run(changes) assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 - - # compare observed records with expected records - qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) - observed = [ - {"col1": row[0], "col2": row[1], "col3": row[2]} - for row in select_data(dest_pl, f"SELECT col1, col2, col3 FROM {qual_name}") - ] - expected = [ + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 + exp = [ {"col1": 1, "col2": 1.5, "col3": True}, {"col1": 2, "col2": 898912.821982, "col3": False}, - {"col1": 989127831, "col2": 898912.821982, "col3": True}, + { + "col1": 989127831, + "col2": 898912.821982, + "col3": True, + }, # only present with init load ] - assert sorted(observed, key=lambda d: d["col1"]) == expected + if not init_load: + del exp[-1] + assert_loaded_data(dest_pl, "items", ["col1", "col2", "col3"], exp, "col1") # now do an actual update, so postgres will create a replication message of type Update with src_pl.sql_client() as c: - qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) - c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 989127831;") + qual_name = src_pl.sql_client().make_qualified_table_name("items") + c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 2;") - # load the change to the destination - info = dest_pl.run(rep_tbl) + # process change and assert expectation + info = dest_pl.run(changes) assert_load_info(info) - assert load_table_counts(dest_pl, TABLE_NAME)[TABLE_NAME] == 3 - - # compare observed records with expected records - qual_name = dest_pl.sql_client().make_qualified_table_name(TABLE_NAME) - observed = [ - {"col1": row[0], "col2": row[1], "col3": row[2]} - for row in select_data( - dest_pl, f"SELECT col1, col2, col3 FROM {qual_name} WHERE col1 = 989127831;" - ) - ] - expected = [{"col1": 989127831, "col2": 2.5, "col3": True}] - assert sorted(observed, key=lambda d: d["col1"]) == expected + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 + exp = [{"col1": 2, "col2": 2.5, "col3": False}] + assert_loaded_data( + dest_pl, "items", ["col1", "col2", "col3"], exp, "col1", "col1 = 2" + ) -@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_insert_only(src_pl: dlt.Pipeline, destination_name: str): - # resource to load data into postgres source table - @dlt.resource(name=TABLE_NAME, write_disposition="append") +@pytest.mark.parametrize("publish", ["insert", "insert, update, delete"]) +def test_write_disposition(src_pl: dlt.Pipeline, publish: str) -> None: + @dlt.resource def items(data): yield data - # create postgres table with single record - data = {"id": 1, "foo": "bar"} - src_pl.run(items(data)) - - # excludes dlt system columns from replication - include_columns = data.keys() + # create postgres table + src_pl.run(items({"id": 1, "val": True})) - # initialize table replication, persist snapshot for initial load - slot_name, pub_name, table_snapshot = init_table_replication( - table_name=TABLE_NAME, + # create resources + slot_name = "test_slot" + pub_name = "test_pub" + snapshots = init_replication( + table_names="items", schema_name=src_pl.dataset_name, - publish="insert", - persist_snapshot=True, - include_columns=include_columns, + slot_name=slot_name, + pub_name=pub_name, + publish=publish, + persist_snapshots=True, ) + changes = replication_resource(slot_name, pub_name) - # initial load - dest_pl = dlt.pipeline( - pipeline_name="dest_pl", destination=destination_name, full_refresh=True - ) - extract_info = dest_pl.extract(table_snapshot) - assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + # assert write dispositions + expected_write_disposition = "append" if publish == "insert" else "merge" + assert snapshots[0].write_disposition == expected_write_disposition + assert changes.write_disposition == expected_write_disposition - # insert a record in postgres table - data = {"id": 2, "foo": "bar"} - src_pl.run(items(data)) - # create resource for table changes - changes = table_changes( - table_name=TABLE_NAME, +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("init_load", [True, False]) +def test_include_columns( + src_pl: dlt.Pipeline, destination_name: str, init_load: bool +) -> None: + def get_cols(pipeline: dlt.Pipeline, table_name: str) -> set: + with pipeline.destination_client(pipeline.default_schema_name) as client: + client: SqlJobClientBase + return { + k + for k in client.get_storage_table(table_name)[1].keys() + if not k.startswith("_dlt_") + } + + @dlt.resource + def tbl_x(data): + yield data + + @dlt.resource + def tbl_y(data): + yield data + + @dlt.resource + def tbl_z(data): + yield data + + # create three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 1, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 1, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # initialize replication and create resources + slot_name = "test_slot" + pub_name = "test_pub" + include_columns = { + "tbl_x": ["id_x", "val_x"], + "tbl_y": ["id_y", "val_y"], + # tbl_z is not specified, hence all columns should be included + } + snapshots = init_replication( + table_names=("tbl_x", "tbl_y", "tbl_z"), schema_name=src_pl.dataset_name, - include_columns=include_columns, slot_name=slot_name, pub_name=pub_name, + publish="insert", + persist_snapshots=init_load, + include_columns=include_columns, + ) + changes = replication_resource( + slot_name=slot_name, pub_name=pub_name, include_columns=include_columns ) - # extract items from resource - extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + # update three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 2, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 2, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 2, "val_z": "foo", "another_col_z": 1}), + ] + ) - # do an update and a delete—these operations should not lead to items in the resource - with src_pl.sql_client() as c: - qual_name = src_pl.sql_client().make_qualified_table_name(TABLE_NAME) - c.execute_sql(f"UPDATE {qual_name} SET foo = 'baz' WHERE id = 2;") - c.execute_sql(f"DELETE FROM {qual_name} WHERE id = 2;") - extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"] == [] + # load to destination and assert column expectations + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + if init_load: + dest_pl.run(snapshots) + assert get_cols(dest_pl, "tbl_x") == {"id_x", "val_x"} + assert get_cols(dest_pl, "tbl_y") == {"id_y", "val_y"} + assert get_cols(dest_pl, "tbl_z") == {"id_z", "val_z", "another_col_z"} + dest_pl.run(changes) + assert get_cols(dest_pl, "tbl_x") == {"id_x", "val_x", "lsn"} + assert get_cols(dest_pl, "tbl_y") == {"id_y", "val_y", "lsn"} + assert get_cols(dest_pl, "tbl_z") == {"id_z", "val_z", "another_col_z", "lsn"} @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_batching(src_pl: dlt.Pipeline, destination_name: str): - # this test asserts the number of data items yielded by the `table_changes` - # resource is not affected by `target_batch_size` and the number of replication - # messages per transaction +@pytest.mark.parametrize("init_load", [True, False]) +def test_column_hints( + src_pl: dlt.Pipeline, destination_name: str, init_load: bool +) -> None: + def get_cols(pipeline: dlt.Pipeline, table_name: str) -> set: + with pipeline.destination_client(pipeline.default_schema_name) as client: + client: SqlJobClientBase + return { + k + for k in client.get_storage_table(table_name)[1].keys() + if not k.startswith("_dlt_") + } + + @dlt.resource + def tbl_x(data): + yield data - # resource to load data into postgres source table - @dlt.resource(name=TABLE_NAME, primary_key="id", write_disposition="merge") - def items(data): + @dlt.resource + def tbl_y(data): yield data - # create postgres table with single record - data = {"id": 1000, "val": True} - src_pl.run(items(data)) - add_pk(src_pl.sql_client, TABLE_NAME, "id") + @dlt.resource + def tbl_z(data): + yield data - # excludes dlt system columns from replication - include_columns = data.keys() + # create three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 1, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 1, "val_z": "foo", "another_col_z": 1}), + ] + ) - # initialize table replication - slot_name, pub_name, _ = init_table_replication( - table_name=TABLE_NAME, + # initialize replication and create resources + slot_name = "test_slot" + pub_name = "test_pub" + column_hints = { + "tbl_x": {"another_col_x": {"data_type": "double"}}, + "tbl_y": {"another_col_y": {"precision": 32}}, + # tbl_z is not specified, hence all columns should be included + } + snapshots = init_replication( + table_names=("tbl_x", "tbl_y", "tbl_z"), schema_name=src_pl.dataset_name, - persist_snapshot=False, - include_columns=include_columns, + slot_name=slot_name, + pub_name=pub_name, + publish="insert", + persist_snapshots=init_load, + columns=column_hints, + ) + changes = replication_resource( + slot_name=slot_name, pub_name=pub_name, columns=column_hints ) - # create destination pipeline and resource + # update three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 2, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 2, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 2, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # load to destination and assert column expectations dest_pl = dlt.pipeline( pipeline_name="dest_pl", destination=destination_name, full_refresh=True ) - changes = table_changes( - table_name=TABLE_NAME, + if init_load: + dest_pl.run(snapshots) + assert ( + dest_pl.default_schema.get_table_columns("tbl_x")["another_col_x"][ + "data_type" + ] + == "double" + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_y")["another_col_y"][ + "precision" + ] + == 32 + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_z")["another_col_z"][ + "data_type" + ] + == "bigint" + ) + dest_pl.run(changes) + assert ( + dest_pl.default_schema.get_table_columns("tbl_x")["another_col_x"]["data_type"] + == "double" + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_y")["another_col_y"]["precision"] + == 32 + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_z")["another_col_z"]["data_type"] + == "bigint" + ) + + # the tests below should pass, but they don't because of a bug that causes + # column hints to be added to other tables when dispatching to multiple tables + assert "another_col_x" not in dest_pl.default_schema.get_table_columns("tbl_y") + assert "another_col_x" not in dest_pl.default_schema.get_table_columns("tbl_z") + assert "another_col_y" not in dest_pl.default_schema.get_table_columns( + "tbl_x", include_incomplete=True + ) + assert "another_col_y" not in dest_pl.default_schema.get_table_columns( + "tbl_z", include_incomplete=True + ) + + +def test_batching(src_pl: dlt.Pipeline) -> None: + # this test asserts the number of data items yielded by the replication resource + # is not affected by `target_batch_size` and the number of replication messages per transaction + + # create postgres table with single record + data = {"id": 1000, "val": True} + src_pl.run([data], table_name="items") + + # initialize replication and create resource for changes + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + table_names="items", schema_name=src_pl.dataset_name, - include_columns=include_columns, - target_batch_size=50, slot_name=slot_name, pub_name=pub_name, ) + changes = replication_resource(slot_name, pub_name, target_batch_size=50) + + # create destination pipeline and resource + dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) # insert 100 records into source table in one transaction batch = [{**r, **{"id": key}} for r in [data] for key in range(1, 101)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") extract_info = dest_pl.extract(changes) assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 # insert 100 records into source table in 5 transactions batch = [{**r, **{"id": key}} for r in [data] for key in range(101, 121)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") batch = [{**r, **{"id": key}} for r in [data] for key in range(121, 141)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") batch = [{**r, **{"id": key}} for r in [data] for key in range(141, 161)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") batch = [{**r, **{"id": key}} for r in [data] for key in range(161, 181)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") batch = [{**r, **{"id": key}} for r in [data] for key in range(181, 201)] - src_pl.run(items(batch)) + src_pl.run(batch, table_name="items") extract_info = dest_pl.extract(changes) assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 - - -@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -@pytest.mark.parametrize("give_conf", [True, False]) -def test_source_multiple_tables( - src_pl: dlt.Pipeline, destination_name: str, give_conf: bool -): - # resources to load data into postgres source tables - @dlt.resource(primary_key="id_w", write_disposition="merge") - def items_w(data): - yield data - - @dlt.resource(primary_key="id_x", write_disposition="merge") - def items_x(data): - yield data - - @dlt.resource(primary_key="id_y", write_disposition="merge") - def items_y(data): - yield data - - @dlt.resource(primary_key="id_z", write_disposition="merge") - def items_z(data): - yield data - - # create two postgres tables with single record - resources = [ - items_w({"id_w": 1, "val": [1, 2, 3]}), - items_x({"id_x": 1, "val": "foo"}), - items_y([{"id_y": 1, "val": True}, {"id_y": 2, "val": False}]), - items_z({"id_z": 1, "val": "2024-03-11"}), - ] - src_pl.run(resources) - add_pk(src_pl.sql_client, "items_w", "id_w") - add_pk(src_pl.sql_client, "items_x", "id_x") - add_pk(src_pl.sql_client, "items_y", "id_y") - if not give_conf: - add_pk(src_pl.sql_client, "items_z", "id_z") - - conf = ( - { - "items_x": { - "include_columns": ["id_x", "val"], - "init_conf": {"persist_snapshot": True}, - }, - "items_y": { - "include_columns": ["id_y", "val"], - "init_conf": {"persist_snapshot": False}, - }, - "items_z": { - "include_columns": ["id_z", "val"], - "init_conf": {"persist_snapshot": True, "publish": "insert"}, - }, - } - if give_conf - else None - ) - - rep_tbls = pg_replication_source( - table_names=["items_w", "items_x", "items_y", "items_z"], - schema_name=src_pl.dataset_name, - conf=conf, - ) - dest_pl = dlt.pipeline( - pipeline_name="dest_pl", destination=destination_name, full_refresh=True - ) - info = dest_pl.run(rep_tbls) - assert_load_info(info) - if give_conf: - assert load_table_counts(dest_pl, "items_x", "items_z") == { - "items_x": 1, - "items_z": 1, - } - with pytest.raises(dlt.destinations.exceptions.DatabaseUndefinedRelation): - # "items_w" table does not exist because we didn't specify "persist_snapshot" and it defaults to False - load_table_counts(dest_pl, "items_w") - with pytest.raises(dlt.destinations.exceptions.DatabaseUndefinedRelation): - # "items_y" table does not exist because we set "persist_snapshot" to False - load_table_counts(dest_pl, "items_y") - - # insert one record in both postgres tables - resources = [ - items_w({"id_w": 2, "val": [1, 2]}), - items_x({"id_x": 2, "val": "foo"}), - items_y({"id_y": 3, "val": True}), - items_z({"id_z": 2, "val": "2000-01-01"}), - ] - src_pl.run(resources) - - info = dest_pl.run(rep_tbls) - assert_load_info(info) - if give_conf: - assert load_table_counts( - dest_pl, "items_w", "items_x", "items_y", "items_z" - ) == { - "items_w": 1, - "items_x": 2, - "items_y": 1, - "items_z": 2, - } - else: - assert load_table_counts( - dest_pl, "items_w", "items_x", "items_y", "items_z" - ) == { - "items_w": 1, - "items_x": 1, - "items_y": 1, - "items_z": 1, - } diff --git a/tests/pg_replication/utils.py b/tests/pg_replication/utils.py index 78ab1e67f..75542aa91 100644 --- a/tests/pg_replication/utils.py +++ b/tests/pg_replication/utils.py @@ -1,3 +1,11 @@ +from typing import Sequence, List, Dict, Any, Optional + +from dlt import Pipeline +from dlt.common.data_writers.escape import escape_postgres_identifier + +from tests.utils import select_data + + def add_pk(sql_client, table_name: str, column_name: str) -> None: """Adds primary key to postgres table. @@ -7,3 +15,24 @@ def add_pk(sql_client, table_name: str, column_name: str) -> None: with sql_client() as c: qual_name = c.make_qualified_table_name(table_name) c.execute_sql(f"ALTER TABLE {qual_name} ADD PRIMARY KEY ({column_name});") + + +def assert_loaded_data( + pipeline: Pipeline, + table_name: str, + column_names: Sequence[str], + expectation: List[Dict[str, Any]], + sort_column_name: str, + where_clause: Optional[str] = None, +) -> None: + """Asserts loaded data meets expectation.""" + qual_name = pipeline.sql_client().make_qualified_table_name(table_name) + column_str = ", ".join(map(escape_postgres_identifier, column_names)) + qry = f"SELECT {column_str} FROM {qual_name}" + if where_clause is not None: + qry += " WHERE " + where_clause + observation = [ + {column_name: row[idx] for idx, column_name in enumerate(column_names)} + for row in select_data(pipeline, qry) + ] + assert sorted(observation, key=lambda d: d[sort_column_name]) == expectation From 9fc3c393888e7d6438998902b6975e0553ab4032 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sat, 16 Mar 2024 20:07:46 +0100 Subject: [PATCH 11/38] add support for schema replication --- sources/pg_replication/helpers.py | 39 +++++++-- tests/pg_replication/test_pg_replication.py | 96 +++++++++++++++------ 2 files changed, 105 insertions(+), 30 deletions(-) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index a4f8fc93c..dd3c523cb 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -57,10 +57,10 @@ @dlt.sources.config.with_config(sections=("sources", "pg_replication")) def init_replication( - table_names: Union[str, Sequence[str]], - schema_name: str, slot_name: str, pub_name: str, + schema_name: str, + table_names: Union[str, Sequence[str]] = None, credentials: ConnectionStringCredentials = dlt.secrets.value, publish: str = "insert, update, delete", persist_snapshots: bool = False, @@ -75,7 +75,10 @@ def init_replication( drop_replication_slot(slot_name, cur) drop_publication(pub_name, cur) create_publication(pub_name, cur, publish) - add_tables_to_publication(table_names, schema_name, pub_name, cur) + if table_names is None: + add_schema_to_publication(schema_name, pub_name, cur) + else: + add_tables_to_publication(table_names, schema_name, pub_name, cur) slot = create_replication_slot(slot_name, cur) if persist_snapshots: if slot is None: @@ -176,8 +179,10 @@ def add_table_to_publication( logger.info( f"Successfully added table {qual_name} to publication {esc_pub_name}." ) - except psycopg2.errors.DuplicateObject: # table is already member of publication - pass + except psycopg2.errors.DuplicateObject: + logger.info( + f"Table {qual_name} is already a member of publication {esc_pub_name}." + ) def add_tables_to_publication( @@ -192,6 +197,30 @@ def add_tables_to_publication( add_table_to_publication(table_name, schema_name, pub_name, cur) +def add_schema_to_publication( + schema_name: str, + pub_name: str, + cur: cursor, +) -> None: + """Adds a schema to a publication for logical replication if the schema is not a member yet. + + Raises error if the user is not a superuser. + """ + esc_schema_name = escape_postgres_identifier(schema_name) + esc_pub_name = escape_postgres_identifier(pub_name) + try: + cur.execute( + f"ALTER PUBLICATION {esc_pub_name} ADD TABLES IN SCHEMA {esc_schema_name};" + ) + logger.info( + f"Successfully added schema {esc_schema_name} to publication {esc_pub_name}." + ) + except psycopg2.errors.DuplicateObject: + logger.info( + f"Schema {esc_schema_name} is already a member of publication {esc_pub_name}." + ) + + def create_replication_slot( # type: ignore[return] name: str, cur: ReplicationCursor, output_plugin: str = "pgoutput" ) -> Optional[Dict[str, str]]: diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 9d38ec302..122054482 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -40,10 +40,10 @@ def tbl_y(data): pub_name = "test_pub" snapshots = init_replication( - table_names=("tbl_x", "tbl_y"), - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), persist_snapshots=True, ) @@ -165,10 +165,10 @@ def tbl_y(data): slot_name = "test_slot" pub_name = "test_pub" init_replication( - table_names=("tbl_x", "tbl_y"), - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), ) changes = replication_resource(slot_name, pub_name) @@ -219,10 +219,10 @@ def items(data): slot_name = "test_slot" pub_name = "test_pub" init_replication( - table_names="items", - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", publish="insert", ) changes = replication_resource(slot_name, pub_name) @@ -269,10 +269,10 @@ def items(data): slot_name = "test_slot" pub_name = "test_pub" snapshots = init_replication( - table_names="items", - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", persist_snapshots=init_load, columns={"items": column_schema} if give_hints else None, ) @@ -367,10 +367,10 @@ def items(data): slot_name = "test_slot" pub_name = "test_pub" snapshots = init_replication( - table_names="items", - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", publish=publish, persist_snapshots=True, ) @@ -426,10 +426,10 @@ def tbl_z(data): # tbl_z is not specified, hence all columns should be included } snapshots = init_replication( - table_names=("tbl_x", "tbl_y", "tbl_z"), - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), publish="insert", persist_snapshots=init_load, include_columns=include_columns, @@ -467,15 +467,6 @@ def tbl_z(data): def test_column_hints( src_pl: dlt.Pipeline, destination_name: str, init_load: bool ) -> None: - def get_cols(pipeline: dlt.Pipeline, table_name: str) -> set: - with pipeline.destination_client(pipeline.default_schema_name) as client: - client: SqlJobClientBase - return { - k - for k in client.get_storage_table(table_name)[1].keys() - if not k.startswith("_dlt_") - } - @dlt.resource def tbl_x(data): yield data @@ -506,10 +497,10 @@ def tbl_z(data): # tbl_z is not specified, hence all columns should be included } snapshots = init_replication( - table_names=("tbl_x", "tbl_y", "tbl_z"), - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), publish="insert", persist_snapshots=init_load, columns=column_hints, @@ -589,10 +580,10 @@ def test_batching(src_pl: dlt.Pipeline) -> None: slot_name = "test_slot" pub_name = "test_pub" init_replication( - table_names="items", - schema_name=src_pl.dataset_name, slot_name=slot_name, pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", ) changes = replication_resource(slot_name, pub_name, target_batch_size=50) @@ -618,3 +609,58 @@ def test_batching(src_pl: dlt.Pipeline) -> None: src_pl.run(batch, table_name="items") extract_info = dest_pl.extract(changes) assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + +def test_replicate_schema(src_pl: dlt.Pipeline) -> None: + @dlt.resource + def tbl_x(data): + yield data + + @dlt.resource + def tbl_y(data): + yield data + + @dlt.resource + def tbl_z(data): + yield data + + # create two postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": "foo"}), + ] + ) + + # initialize replication and create resource + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, # we only specify `schema_name`, not `table_names` + publish="insert", + ) + changes = replication_resource(slot_name, pub_name) + + # change source tables and load to destination + src_pl.run( + [ + tbl_x({"id_x": 2, "val_x": "foo"}), + tbl_y({"id_y": 2, "val_y": "foo"}), + ] + ) + dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) + dest_pl.extract(changes) + assert set(dest_pl.default_schema.data_table_names()) == {"tbl_x", "tbl_y"} + + # introduce new table in source and assert it gets included in the replication + src_pl.run( + [ + tbl_x({"id_x": 3, "val_x": "foo"}), + tbl_y({"id_y": 3, "val_y": "foo"}), + tbl_z({"id_z": 1, "val_z": "foo"}), + ] + ) + dest_pl.extract(changes) + assert set(dest_pl.default_schema.data_table_names()) == {"tbl_x", "tbl_y", "tbl_z"} From 8c2f9053fce93f2b3e85e9b290f332d22dcfc718 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sat, 16 Mar 2024 22:15:33 +0100 Subject: [PATCH 12/38] add support for unmapped data types --- sources/pg_replication/schema_types.py | 11 +++-- tests/pg_replication/test_pg_replication.py | 45 ++++++++++++++++++++- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/sources/pg_replication/schema_types.py b/sources/pg_replication/schema_types.py index 0e533b11a..c5f2faff3 100644 --- a/sources/pg_replication/schema_types.py +++ b/sources/pg_replication/schema_types.py @@ -24,8 +24,9 @@ "timestamp": "2000-01-01T00:00:00", "wei": 0, } +"""Dummy values used to replace NULLs in NOT NULL colums in key-only delete records.""" + -# maps postgres type OID to type string _PG_TYPES: Dict[int, str] = { 16: "boolean", 17: "bytea", @@ -40,6 +41,7 @@ 1700: "numeric", 3802: "jsonb", } +"""Maps postgres type OID to type string. Only includes types present in PostgresTypeMapper.""" def _get_precision(type_id: int, atttypmod: int) -> Optional[int]: @@ -76,8 +78,11 @@ def _get_scale(type_id: int, atttypmod: int) -> Optional[int]: def _to_dlt_column_type(type_id: int, atttypmod: int) -> TColumnType: - """Converts postgres type to dlt column type.""" - pg_type = _PG_TYPES[type_id] + """Converts postgres type OID to dlt column type. + + Type OIDs not in _PG_TYPES mapping default to "text" type. + """ + pg_type = _PG_TYPES.get(type_id) precision = _get_precision(type_id, atttypmod) scale = _get_scale(type_id, atttypmod) mapper = PostgresTypeMapper(capabilities()) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 122054482..b399bc298 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -247,12 +247,14 @@ def items(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @pytest.mark.parametrize("give_hints", [True, False]) @pytest.mark.parametrize("init_load", [True, False]) -def test_all_data_types( +def test_mapped_data_types( src_pl: dlt.Pipeline, destination_name: str, give_hints: bool, init_load: bool, ) -> None: + """Assert common data types (the ones mapped in PostgresTypeMapper) are properly handled.""" + data = deepcopy(TABLE_ROW_ALL_DATA_TYPES) column_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) @@ -354,6 +356,47 @@ def items(data): ) +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_unmapped_data_types(src_pl: dlt.Pipeline, destination_name: str) -> None: + """Assert postgres data types that aren't explicitly mapped default to "text" type.""" + + # create postgres table with some unmapped types + with src_pl.sql_client() as c: + c.create_dataset() + c.execute_sql( + "CREATE TABLE data_types (bit_col bit(1), box_col box, uuid_col uuid);" + ) + + # initialize replication and create resource + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="data_types", + publish="insert", + ) + changes = replication_resource(slot_name, pub_name) + + # insert record in source table to create replication item + with src_pl.sql_client() as c: + c.execute_sql( + "INSERT INTO data_types VALUES (B'1', box '((1,1), (0,0))', gen_random_uuid());" + ) + + # run destination pipeline and assert resulting data types + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + dest_pl.extract(changes) + dest_pl.normalize() + columns = dest_pl.default_schema.get_table_columns("data_types") + assert columns["bit_col"]["data_type"] == "text" + assert columns["box_col"]["data_type"] == "text" + assert columns["uuid_col"]["data_type"] == "text" + + @pytest.mark.parametrize("publish", ["insert", "insert, update, delete"]) def test_write_disposition(src_pl: dlt.Pipeline, publish: str) -> None: @dlt.resource From a0af6058fe50b92e29ff409b58395280fd3eddb2 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sun, 17 Mar 2024 17:31:07 +0100 Subject: [PATCH 13/38] add test for init_replication --- tests/pg_replication/test_pg_replication.py | 134 +++++++++++++++----- 1 file changed, 104 insertions(+), 30 deletions(-) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index b399bc298..718e57239 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -1,5 +1,6 @@ import pytest +from typing import Set from copy import deepcopy import dlt @@ -611,47 +612,77 @@ def tbl_z(data): ) -def test_batching(src_pl: dlt.Pipeline) -> None: - # this test asserts the number of data items yielded by the replication resource - # is not affected by `target_batch_size` and the number of replication messages per transaction +def test_init_replication(src_pl: dlt.Pipeline) -> None: + def get_table_names_in_pub() -> Set[str]: + with src_pl.sql_client() as c: + result = c.execute_sql( + f"SELECT tablename FROM pg_publication_tables WHERE pubname = '{pub_name}';" + ) + return {tup[0] for tup in result} - # create postgres table with single record - data = {"id": 1000, "val": True} - src_pl.run([data], table_name="items") + @dlt.resource + def tbl_x(data): + yield data - # initialize replication and create resource for changes + @dlt.resource + def tbl_y(data): + yield data + + @dlt.resource + def tbl_z(data): + yield data + + # create three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": "foo"}), + tbl_z({"id_z": 1, "val_z": "foo"}), + ] + ) + + # initialize replication with a single table slot_name = "test_slot" pub_name = "test_pub" - init_replication( + snapshots = init_replication( slot_name=slot_name, pub_name=pub_name, schema_name=src_pl.dataset_name, - table_names="items", + table_names="tbl_x", + persist_snapshots=True, ) - changes = replication_resource(slot_name, pub_name, target_batch_size=50) + assert len(snapshots) == 1 + assert get_table_names_in_pub() == {"tbl_x"} - # create destination pipeline and resource - dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) + # adding another table is supported, but snapshot tables won't be persisted + snapshots = init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + persist_snapshots=True, + ) + assert snapshots is None + assert get_table_names_in_pub() == {"tbl_x", "tbl_y"} - # insert 100 records into source table in one transaction - batch = [{**r, **{"id": key}} for r in [data] for key in range(1, 101)] - src_pl.run(batch, table_name="items") - extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + # removing a table is not supported + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="tbl_x", # "tbl_y" is no longer provided + ) + # "tbl_y" is still in the publication + assert get_table_names_in_pub() == {"tbl_x", "tbl_y"} - # insert 100 records into source table in 5 transactions - batch = [{**r, **{"id": key}} for r in [data] for key in range(101, 121)] - src_pl.run(batch, table_name="items") - batch = [{**r, **{"id": key}} for r in [data] for key in range(121, 141)] - src_pl.run(batch, table_name="items") - batch = [{**r, **{"id": key}} for r in [data] for key in range(141, 161)] - src_pl.run(batch, table_name="items") - batch = [{**r, **{"id": key}} for r in [data] for key in range(161, 181)] - src_pl.run(batch, table_name="items") - batch = [{**r, **{"id": key}} for r in [data] for key in range(181, 201)] - src_pl.run(batch, table_name="items") - extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + # switching to whole schema replication is supported by omitting `table_names` + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + ) + # includes dlt system tables + assert get_table_names_in_pub() >= {"tbl_x", "tbl_y", "tbl_z"} def test_replicate_schema(src_pl: dlt.Pipeline) -> None: @@ -707,3 +738,46 @@ def tbl_z(data): ) dest_pl.extract(changes) assert set(dest_pl.default_schema.data_table_names()) == {"tbl_x", "tbl_y", "tbl_z"} + + +def test_batching(src_pl: dlt.Pipeline) -> None: + # this test asserts the number of data items yielded by the replication resource + # is not affected by `target_batch_size` and the number of replication messages per transaction + + # create postgres table with single record + data = {"id": 1000, "val": True} + src_pl.run([data], table_name="items") + + # initialize replication and create resource for changes + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", + ) + changes = replication_resource(slot_name, pub_name, target_batch_size=50) + + # create destination pipeline and resource + dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) + + # insert 100 records into source table in one transaction + batch = [{**r, **{"id": key}} for r in [data] for key in range(1, 101)] + src_pl.run(batch, table_name="items") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + # insert 100 records into source table in 5 transactions + batch = [{**r, **{"id": key}} for r in [data] for key in range(101, 121)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(121, 141)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(141, 161)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(161, 181)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(181, 201)] + src_pl.run(batch, table_name="items") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 From 051830c8be2df07de3c0b1a8e34b9fa03fe80769 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sun, 17 Mar 2024 18:35:34 +0100 Subject: [PATCH 14/38] update docstrings --- sources/pg_replication/__init__.py | 48 ++++++++++ sources/pg_replication/helpers.py | 148 ++++++++++++++++++++++------- 2 files changed, 163 insertions(+), 33 deletions(-) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index 72a8a7a2b..98159c4b3 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -26,6 +26,54 @@ def replication_resource( target_batch_size: int = 1000, flush_slot: bool = True, ) -> DltResource: + """Returns a dlt resource that yields data items for changes in one or more postgres tables. + + Relies on a replication slot and publication that publishes DML operations + (i.e. `insert`, `update`, and/or `delete`). Helper `init_replication` can be + used to set this up. + + Uses `append` write disposition when the publication only publishes `insert` + operations, `merge` otherwise. + + Args: + slot_name (str): Name of the replication slot to consume replication messages from. + pub_name (str): Name of the publication that publishes DML operations for the table(s). + credentials (ConnectionStringCredentials): Postgres database credentials. + include_columns (Optional[Dict[str, Sequence[str]]]): Maps table name(s) to + sequence of names of columns to include in the generated data items. + Any column not in the sequence is excluded. If not provided, all columns + are included. For example: + ``` + include_columns={ + "table_x": ["col_a", "col_c"], + "table_y": ["col_x", "col_y", "col_z"], + } + ``` + columns (Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]]): Maps + table name(s) to column hints to apply on the replicated table(s). For example: + ``` + columns={ + "table_x": {"col_a": {"data_type": "complex"}}, + "table_y": {"col_y": {"precision": 32}}, + } + ``` + target_batch_size (int): Desired number of data items yielded in a batch. + Can be used to limit the data items in memory. Note that the number of + data items yielded can be (far) greater than `target_batch_size`, because + all messages belonging to the same transaction are always processed in + the same batch, regardless of the number of messages in the transaction + and regardless of the value of `target_batch_size`. The number of data + items can also be smaller than `target_batch_size` when the replication + slot is exhausted before a batch is full. + flush_slot (bool): Whether processed messages are discarded from the replication + slot. Recommended value is True. Be careful when setting False—not flushing + can eventually lead to a “disk full” condition on the server, because + the server retains all the WAL segments that might be needed to stream + the changes via all of the currently open replication slots. + + Returns: + DltResource that yields data items for changes published in the publication. + """ write_disposition: TWriteDisposition = "append" rep_cols: TTableSchemaColumns = {"lsn": {"data_type": "bigint"}} resource_name = _gen_replication_resource_name(slot_name, pub_name) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index dd3c523cb..1c251a80e 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -60,7 +60,7 @@ def init_replication( slot_name: str, pub_name: str, schema_name: str, - table_names: Union[str, Sequence[str]] = None, + table_names: Optional[Union[str, Sequence[str]]] = None, credentials: ConnectionStringCredentials = dlt.secrets.value, publish: str = "insert, update, delete", persist_snapshots: bool = False, @@ -68,6 +68,69 @@ def init_replication( columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, reset: bool = False, ) -> Optional[List[DltResource]]: + """Initializes replication for one, several, or all tables within a schema. + + Can be called repeatedly with the same `slot_name` and `pub_name`: + - creates a replication slot and publication with provided names if they do not exist yet + - skips creation of slot and publication if they already exist (unless`reset` is set to `False`) + - supports addition of new tables by extending `table_names` + - removing tables is not supported, i.e. exluding a table from `table_names` + will not remove it from the publication + - switching from a table selection to an entire schema is possible by omitting + the `table_names` argument + - changing `publish` has no effect (altering the published DML operations is not supported) + - table snapshots can only be persisted on the first call (because the snapshot + is exported when the slot is created) + + Args: + slot_name (str): Name of the replication slot to create if it does not exist yet. + pub_name (str): Name of the publication to create if it does not exist yet. + schema_name (str): Name of the schema to replicate tables from. + table_names (Optional[Union[str, Sequence[str]]]): Name(s) of the table(s) + to include in the publication. If not provided, all tables in the schema + are included (also tables added to the schema after the publication was created). + credentials (ConnectionStringCredentials): Postgres database credentials. + publish (str): Comma-separated string of DML operations. Can be used to + control which changes are included in the publication. Allowed operations + are `insert`, `update`, and `delete`. `truncate` is currently not + supported—messages of that type are ignored. + E.g. `publish="insert"` will create a publication that only publishes insert operations. + persist_snapshots (bool): Whether the table states in the snapshot exported + during replication slot creation are persisted to tables. If true, a + snapshot table is created in Postgres for all included tables, and corresponding + resources (`DltResource` objects) for these tables are created and returned. + The resources can be used to perform an initial load of all data present + in the tables at the moment the replication slot got created. + include_columns (Optional[Dict[str, Sequence[str]]]): Maps table name(s) to + sequence of names of columns to include in the snapshot table(s). + Any column not in the sequence is excluded. If not provided, all columns + are included. For example: + ``` + include_columns={ + "table_x": ["col_a", "col_c"], + "table_y": ["col_x", "col_y", "col_z"], + } + ``` + Argument is only used if `persist_snapshots` is `True`. + columns (Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]]): Maps + table name(s) to column hints to apply on the snapshot table resource(s). + For example: + ``` + columns={ + "table_x": {"col_a": {"data_type": "complex"}}, + "table_y": {"col_y": {"precision": 32}}, + } + ``` + Argument is only used if `persist_snapshots` is `True`. + reset (bool): If set to True, the existing slot and publication are dropped + and recreated. Has no effect if a slot and publication with the provided + names do not yet exist. + + Returns: + None if `persist_snapshots` is `False`. A list of `DltResource` objects for + the snapshot tables if `persist_snapshots` is `True` and the replication + slot did not yet exist. + """ if isinstance(table_names, str): table_names = [table_names] cur = _get_rep_conn(credentials).cursor() @@ -118,30 +181,6 @@ def init_replication( return None -def snapshot_table_resource( - snapshot_table_name: str, - schema_name: str, - table_name: str, - write_disposition: TWriteDisposition, - columns: TTableHintTemplate[TAnySchemaColumns] = None, - credentials: ConnectionStringCredentials = dlt.secrets.value, -) -> DltResource: - resource: DltResource = sql_table( - credentials=credentials, - table=snapshot_table_name, - schema=schema_name, - detect_precision_hints=True, - ) - primary_key = _get_pk(table_name, schema_name, credentials) - resource.apply_hints( - table_name=table_name, - write_disposition=write_disposition, - columns=columns, - primary_key=primary_key, - ) - return resource - - def create_publication( name: str, cur: cursor, @@ -168,8 +207,9 @@ def add_table_to_publication( pub_name: str, cur: cursor, ) -> None: - """Adds a table to a publication for logical replication if the table is not a member yet. + """Adds a table to a publication for logical replication. + Does nothing if the table is already a member of the publication. Raises error if the user is not owner of the table. """ qual_name = _make_qualified_table_name(table_name, schema_name) @@ -191,6 +231,10 @@ def add_tables_to_publication( pub_name: str, cur: cursor, ) -> None: + """Adds one or multiple tables to a publication for logical replication. + + Calls `add_table_to_publication` for each table in `table_names`. + """ if isinstance(table_names, str): table_names = table_names for table_name in table_names: @@ -294,6 +338,35 @@ def persist_snapshot_table( return snapshot_table_name +def snapshot_table_resource( + snapshot_table_name: str, + schema_name: str, + table_name: str, + write_disposition: TWriteDisposition, + columns: TTableHintTemplate[TAnySchemaColumns] = None, + credentials: ConnectionStringCredentials = dlt.secrets.value, +) -> DltResource: + """Returns a resource for a persisted snapshot table. + + Can be used to perform an initial load of the table, so all data that + existed in the table prior to initializing replication is also captured. + """ + resource: DltResource = sql_table( + credentials=credentials, + table=snapshot_table_name, + schema=schema_name, + detect_precision_hints=True, + ) + primary_key = _get_pk(table_name, schema_name, credentials) + resource.apply_hints( + table_name=table_name, + write_disposition=write_disposition, + columns=columns, + primary_key=primary_key, + ) + return resource + + def get_max_lsn( slot_name: str, options: Dict[str, str], @@ -323,6 +396,7 @@ def get_pub_ops( pub_name: str, credentials: ConnectionStringCredentials, ) -> Dict[str, bool]: + """Returns dictionary of DML operations and their publish status.""" cur = _get_conn(credentials).cursor() cur.execute( f""" @@ -449,6 +523,10 @@ def _make_qualified_table_name(table_name: str, schema_name: str) -> str: def _gen_replication_resource_name(slot_name: str, pub_name: str) -> str: + """Generates name for a resource used for replication. + + Based on names of replication slot and publication the resource consumes from. + """ return slot_name + "_" + pub_name @@ -497,10 +575,11 @@ class ItemGenerator: write_disposition: TWriteDisposition = "append" # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: - """Consumes messages from replication slot and generates data items. + """Yields replication messages from MessageConsumer. - Does not advance the slot. + Starts replication of messages published by the publication from the replication slot. Maintains LSN of last consumed Commit message in object state. + Does not advance the slot. """ try: cur = _get_rep_conn(self.credentials).cursor() @@ -533,7 +612,12 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: class MessageConsumer: - """Consumes messages from a ReplicationCursor.""" + """Consumes messages from a ReplicationCursor sequentially. + + Generates data item for each `insert`, `update`, and `delete` message. + Processes in batches to limit memory usage. + Maintains message data needed by subsequent messages in internal state. + """ def __init__( self, @@ -558,10 +642,7 @@ def __init__( self.last_commit_lsn = None def __call__(self, msg: ReplicationMessage) -> None: - """Processes message received from stream. - - Breaks out of stream when `upto_lsn` is reached. - """ + """Processes message received from stream.""" self.process_msg(msg) def process_msg(self, msg: ReplicationMessage) -> None: @@ -569,6 +650,7 @@ def process_msg(self, msg: ReplicationMessage) -> None: Identifies message type and decodes accordingly. Message treatment is different for various message types. + Breaks out of stream when `upto_lsn` or `target_batch_size` is reached. """ op = (msg.payload[:1]).decode("utf-8") if op == "B": From 656989a4c4b2e0108672dc967805d23cae8e4e7f Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sun, 17 Mar 2024 19:16:20 +0100 Subject: [PATCH 15/38] return resource instead of single-element list --- sources/pg_replication/helpers.py | 10 ++++++---- tests/pg_replication/test_pg_replication.py | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 1c251a80e..87070fda7 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -67,7 +67,7 @@ def init_replication( include_columns: Optional[Dict[str, Sequence[str]]] = None, columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, reset: bool = False, -) -> Optional[List[DltResource]]: +) -> Optional[Union[DltResource, List[DltResource]]]: """Initializes replication for one, several, or all tables within a schema. Can be called repeatedly with the same `slot_name` and `pub_name`: @@ -127,9 +127,9 @@ def init_replication( names do not yet exist. Returns: - None if `persist_snapshots` is `False`. A list of `DltResource` objects for - the snapshot tables if `persist_snapshots` is `True` and the replication - slot did not yet exist. + - None if `persist_snapshots` is `False` + - a `DltResource` object or a list of `DltResource` objects for the snapshot + table(s) if `persist_snapshots` is `True` and the replication slot did not yet exist """ if isinstance(table_names, str): table_names = [table_names] @@ -177,6 +177,8 @@ def init_replication( table_names, snapshot_table_names ) ] + if len(snapshot_table_resources) == 1: + return snapshot_table_resources[0] return snapshot_table_resources return None diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 718e57239..a09cd0582 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -271,7 +271,7 @@ def items(data): # initialize replication and create resources slot_name = "test_slot" pub_name = "test_pub" - snapshots = init_replication( + snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, schema_name=src_pl.dataset_name, @@ -291,7 +291,7 @@ def items(data): pipeline_name="dest_pl", destination=destination_name, full_refresh=True ) if init_load: - info = dest_pl.run(snapshots[0]) + info = dest_pl.run(snapshot) assert_load_info(info) assert load_table_counts(dest_pl, "items")["items"] == 1 @@ -410,7 +410,7 @@ def items(data): # create resources slot_name = "test_slot" pub_name = "test_pub" - snapshots = init_replication( + snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, schema_name=src_pl.dataset_name, @@ -422,7 +422,7 @@ def items(data): # assert write dispositions expected_write_disposition = "append" if publish == "insert" else "merge" - assert snapshots[0].write_disposition == expected_write_disposition + assert snapshot.write_disposition == expected_write_disposition assert changes.write_disposition == expected_write_disposition @@ -644,14 +644,14 @@ def tbl_z(data): # initialize replication with a single table slot_name = "test_slot" pub_name = "test_pub" - snapshots = init_replication( + snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, schema_name=src_pl.dataset_name, table_names="tbl_x", persist_snapshots=True, ) - assert len(snapshots) == 1 + assert snapshot is not None assert get_table_names_in_pub() == {"tbl_x"} # adding another table is supported, but snapshot tables won't be persisted From d014645b418ebad7b9044dcf3e112a105107d9bc Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 18 Mar 2024 13:18:03 +0100 Subject: [PATCH 16/38] add example pipeline --- sources/pg_replication/helpers.py | 5 +- sources/pg_replication_pipeline.py | 91 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 sources/pg_replication_pipeline.py diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 87070fda7..f22f681a0 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -511,7 +511,10 @@ def _get_conn( def _get_rep_conn( credentials: ConnectionStringCredentials, ) -> LogicalReplicationConnection: - """Returns a psycopg2 LogicalReplicationConnection to interact with postgres replication functionality.""" + """Returns a psycopg2 LogicalReplicationConnection to interact with postgres replication functionality. + + Raises error if the user does not have the REPLICATION attribute assigned. + """ return _get_conn(credentials, LogicalReplicationConnection) # type: ignore[return-value] diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py new file mode 100644 index 000000000..5578045f0 --- /dev/null +++ b/sources/pg_replication_pipeline.py @@ -0,0 +1,91 @@ +import dlt + + +from pg_replication import replication_resource +from pg_replication.helpers import init_replication + + +def replicate_single_table() -> None: + """Sets up replication for a single Postgres table and loads changes into a destination. + + Demonstrates basic usage of `init_replication` helper and `replication_resource` resource. + Uses `src_pl` to create and change the replicated Postgres table—this + is only for demonstration purposes, you won't need this when you run in production + as you'll probably have another process feeding your Postgres instance. + """ + # create source and destination pipelines + src_pl = dlt.pipeline( + pipeline_name="replicate_single_table_src_pl", + destination="postgres", + dataset_name="replicate_single_table", + full_refresh=True, + ) + dest_pl = dlt.pipeline( + pipeline_name="replicate_single_table_dest_pl", + destination="duckdb", + dataset_name="replicate_single_table", + full_refresh=True, + ) + + # create table in source to demonstrate replication + create_source_table( + src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" + ) + + # initialize replication for the source table—this creates a replication slot and publication + slot_name = "replicate_single_table_slot" + pub_name = "replicate_single_table_pub" + init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="my_source_table", + reset=True, + ) + + # create a resource that generates items for each change in the source table + changes = replication_resource(slot_name, pub_name) + + # insert two records in source table and propagate changes to destination + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true), (2, false);" + ) + dest_pl.run(changes) + show_destination_table(dest_pl) + + # update record in source table and propagate change to destination + change_source_table(src_pl, "UPDATE {table_name} SET val = true WHERE id = 2;") + dest_pl.run(changes) + show_destination_table(dest_pl) + + # delete record from source table and propagate change to destination + change_source_table(src_pl, "DELETE FROM {table_name} WHERE id = 2;") + dest_pl.run(changes) + show_destination_table(dest_pl) + + +# define some helper methods to make examples more readable + + +def create_source_table(src_pl: dlt.Pipeline, sql: str) -> None: + with src_pl.sql_client() as c: + c.create_dataset() + qual_name = c.make_qualified_table_name("my_source_table") + c.execute_sql(sql.format(table_name=qual_name)) + + +def change_source_table(src_pl: dlt.Pipeline, sql: str) -> None: + with src_pl.sql_client() as c: + qual_name = c.make_qualified_table_name("my_source_table") + c.execute_sql(sql.format(table_name=qual_name)) + + +def show_destination_table(dest_pl: dlt.Pipeline) -> None: + with dest_pl.sql_client() as c: + dest_qual_name = c.make_qualified_table_name("my_source_table") + dest_records = c.execute_sql(f"SELECT id, val FROM {dest_qual_name};") + print(dest_records) + + +if __name__ == "__main__": + replicate_single_table() From 269422e2cde5dea7a709ddba40db7ad82c9258fe Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 18 Mar 2024 16:38:30 +0100 Subject: [PATCH 17/38] add more example pipelines --- sources/pg_replication_pipeline.py | 219 +++++++++++++++++++++++++++-- 1 file changed, 207 insertions(+), 12 deletions(-) diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py index 5578045f0..fc7343b97 100644 --- a/sources/pg_replication_pipeline.py +++ b/sources/pg_replication_pipeline.py @@ -27,14 +27,14 @@ def replicate_single_table() -> None: full_refresh=True, ) - # create table in source to demonstrate replication + # create table "my_source_table" in source to demonstrate replication create_source_table( src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" ) # initialize replication for the source table—this creates a replication slot and publication - slot_name = "replicate_single_table_slot" - pub_name = "replicate_single_table_pub" + slot_name = "example_slot" + pub_name = "example_pub" init_replication( # requires the Postgres user to have the REPLICATION attribute assigned slot_name=slot_name, pub_name=pub_name, @@ -64,28 +64,223 @@ def replicate_single_table() -> None: show_destination_table(dest_pl) +def replicate_with_initial_load() -> None: + """Sets up replication with initial load. + + Demonstrates usage of `persist_snapshots` argument and snapshot resource + returned by `init_replication` helper. + """ + # create source and destination pipelines + src_pl = dlt.pipeline( + pipeline_name="replicate_with_initial_load_src_pl", + destination="postgres", + dataset_name="replicate_with_initial_load", + full_refresh=True, + ) + dest_pl = dlt.pipeline( + pipeline_name="replicate_with_initial_load_dest_pl", + destination="duckdb", + dataset_name="replicate_with_initial_load", + full_refresh=True, + ) + + # create table "my_source_table" in source to demonstrate replication + create_source_table( + src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" + ) + + # insert records before initializing replication + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true), (2, false);" + ) + + # initialize replication for the source table + slot_name = "example_slot" + pub_name = "example_pub" + snapshot = init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="my_source_table", + persist_snapshots=True, # persist snapshot table(s) and let function return resource(s) for initial load + reset=True, + ) + + # perform initial load to capture all records present in source table prior to replication initialization + dest_pl.run(snapshot) + show_destination_table(dest_pl) + + # insert record in source table and propagate change to destination + change_source_table(src_pl, "INSERT INTO {table_name} VALUES (3, true);") + changes = replication_resource(slot_name, pub_name) + dest_pl.run(changes) + show_destination_table(dest_pl) + + +def replicate_entire_schema() -> None: + """Demonstrates setup and usage of schema replication.""" + # create source and destination pipelines + src_pl = dlt.pipeline( + pipeline_name="replicate_entire_schema_src_pl", + destination="postgres", + dataset_name="replicate_entire_schema", + full_refresh=True, + ) + dest_pl = dlt.pipeline( + pipeline_name="replicate_entire_schema_dest_pl", + destination="duckdb", + dataset_name="replicate_entire_schema", + full_refresh=True, + ) + + # create two source tables to demonstrate schema replication + create_source_table( + src_pl, + "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);", + "tbl_x", + ) + create_source_table( + src_pl, + "CREATE TABLE {table_name} (id integer PRIMARY KEY, val varchar);", + "tbl_y", + ) + + # initialize schema replication by omitting the `table_names` argument + slot_name = "example_slot" + pub_name = "example_pub" + init_replication( # initializing schema replication requires the Postgres user to be a superuser + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + reset=True, + ) + + # create a resource that generates items for each change in the schema's tables + changes = replication_resource(slot_name, pub_name) + + # insert records in source tables and propagate changes to destination + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true), (2, false);", "tbl_x" + ) + change_source_table(src_pl, "INSERT INTO {table_name} VALUES (1, 'foo');", "tbl_y") + dest_pl.run(changes) + show_destination_table(dest_pl, "tbl_x") + show_destination_table(dest_pl, "tbl_y") + + # tables added to the schema later are also included in the replication + create_source_table( + src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val date);", "tbl_z" + ) + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, '2023-03-18');", "tbl_z" + ) + dest_pl.run(changes) + show_destination_table(dest_pl, "tbl_z") + + +def replicate_with_column_selection() -> None: + """Sets up replication with column selection. + + Demonstrates usage of `include_columns` argument. + """ + # create source and destination pipelines + src_pl = dlt.pipeline( + pipeline_name="replicate_with_column_selection_src_pl", + destination="postgres", + dataset_name="replicate_with_column_selection", + full_refresh=True, + ) + dest_pl = dlt.pipeline( + pipeline_name="replicate_with_column_selection_dest_pl", + destination="duckdb", + dataset_name="replicate_with_column_selection", + full_refresh=True, + ) + + # create two source tables to demonstrate schema replication + create_source_table( + src_pl, + "CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);", + "tbl_x", + ) + create_source_table( + src_pl, + "CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);", + "tbl_y", + ) + + # initialize schema replication by omitting the `table_names` argument + slot_name = "example_slot" + pub_name = "example_pub" + init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + reset=True, + ) + + # create a resource that generates items for each change in the schema's tables + changes = replication_resource( + slot_name=slot_name, + pub_name=pub_name, + include_columns={ + "tbl_x": ("c1", "c2") + }, # columns not specified here are excluded from generated data items + ) + + # insert records in source tables and propagate changes to destination + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true, 'foo');", "tbl_x" + ) + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, false, 'bar');", "tbl_y" + ) + dest_pl.run(changes) + + # show columns in schema for both tables + # column c3 is not in the schema for tbl_x because we did not include it + # tbl_y does have column c3 because we didn't specify include columns for this table and by default all columns are included + print("tbl_x", ":", list(dest_pl.default_schema.get_table_columns("tbl_x").keys())) + print("tbl_y", ":", list(dest_pl.default_schema.get_table_columns("tbl_y").keys())) + + # define some helper methods to make examples more readable -def create_source_table(src_pl: dlt.Pipeline, sql: str) -> None: +def create_source_table( + src_pl: dlt.Pipeline, sql: str, table_name: str = "my_source_table" +) -> None: with src_pl.sql_client() as c: - c.create_dataset() - qual_name = c.make_qualified_table_name("my_source_table") + try: + c.create_dataset() + except dlt.destinations.exceptions.DatabaseTerminalException: + pass + qual_name = c.make_qualified_table_name(table_name) c.execute_sql(sql.format(table_name=qual_name)) -def change_source_table(src_pl: dlt.Pipeline, sql: str) -> None: +def change_source_table( + src_pl: dlt.Pipeline, sql: str, table_name: str = "my_source_table" +) -> None: with src_pl.sql_client() as c: - qual_name = c.make_qualified_table_name("my_source_table") + qual_name = c.make_qualified_table_name(table_name) c.execute_sql(sql.format(table_name=qual_name)) -def show_destination_table(dest_pl: dlt.Pipeline) -> None: +def show_destination_table( + dest_pl: dlt.Pipeline, + table_name: str = "my_source_table", + column_names: str = "id, val", +) -> None: with dest_pl.sql_client() as c: - dest_qual_name = c.make_qualified_table_name("my_source_table") - dest_records = c.execute_sql(f"SELECT id, val FROM {dest_qual_name};") - print(dest_records) + dest_qual_name = c.make_qualified_table_name(table_name) + dest_records = c.execute_sql(f"SELECT {column_names} FROM {dest_qual_name};") + print(table_name, ":", dest_records) if __name__ == "__main__": replicate_single_table() + # replicate_with_initial_load() + # replicate_entire_schema() + # replicate_with_column_selection() From c674f241189e5e3b658afb25c40660e8953c2004 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 18 Mar 2024 22:38:01 +0100 Subject: [PATCH 18/38] add nullability hints --- sources/pg_replication/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index 98159c4b3..fed685f80 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -75,7 +75,7 @@ def replication_resource( DltResource that yields data items for changes published in the publication. """ write_disposition: TWriteDisposition = "append" - rep_cols: TTableSchemaColumns = {"lsn": {"data_type": "bigint"}} + rep_cols: TTableSchemaColumns = {"lsn": {"data_type": "bigint", "nullable": True}} resource_name = _gen_replication_resource_name(slot_name, pub_name) pub_ops = get_pub_ops(pub_name, credentials) @@ -83,7 +83,11 @@ def replication_resource( write_disposition = "merge" rep_cols["lsn"]["dedup_sort"] = "desc" if pub_ops["delete"]: - rep_cols["deleted_ts"] = {"hard_delete": True, "data_type": "timestamp"} + rep_cols["deleted_ts"] = { + "hard_delete": True, + "data_type": "timestamp", + "nullable": True, + } return dlt.resource( replication_items, From a919c82accbf976d8f074044f422464e77fd641c Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 18 Mar 2024 22:57:53 +0100 Subject: [PATCH 19/38] add README --- sources/pg_replication/README.md | 44 ++++++++++++++++++++++++++++-- sources/pg_replication_pipeline.py | 16 +++++------ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/sources/pg_replication/README.md b/sources/pg_replication/README.md index 275a2dc93..ee0093781 100644 --- a/sources/pg_replication/README.md +++ b/sources/pg_replication/README.md @@ -1,4 +1,21 @@ -## Prerequisites +# Postgres replication +[Postgres](https://www.postgresql.org/) is one of the most popular relational database management systems. This verified source uses Postgres' replication functionality to efficiently process changes in tables (a process often referred to as _Change Data Capture_ or CDC). It uses [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) and the standard built-in `pgoutput` [output plugin](https://www.postgresql.org/docs/current/logicaldecoding-output-plugin.html). + +Resources that can be loaded using this verified source are: + +| Name | Description | +|----------------------|-------------------------------------------------| +| replication_resource | Load published messages from a replication slot | + +## Initialize the pipeline + +```bash +dlt init pg_replication duckdb +``` + +This uses `duckdb` as destination, but you can choose any of the supported [destinations](https://dlthub.com/docs/dlt-ecosystem/destinations/). + +## Set up user The Postgres user needs to have the `LOGIN` and `REPLICATION` attributes assigned: @@ -14,9 +31,30 @@ GRANT CREATE ON DATABASE dlt_data TO replication_user; ## Add credentials 1. Open `.dlt/secrets.toml`. -2. Enter the credentials +2. Enter your Postgres credentials: ```toml [sources.pg_replication] credentials="postgresql://replication_user:<>@localhost:5432/dlt_data" - ``` \ No newline at end of file + ``` +3. Enter credentials for your chosen destination as per the [docs](https://dlthub.com/docs/dlt-ecosystem/destinations/). + +## Run the pipeline + +1. Install the necessary dependencies by running the following command: + + ```bash + pip install -r requirements.txt + ``` + +1. Now the pipeline can be run by using the command: + + ```bash + python pg_replication_pipeline.py + ``` + +1. To make sure that everything is loaded as expected, use the command: + + ```bash + dlt pipeline pg_replication_pipeline show + ``` \ No newline at end of file diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py index fc7343b97..f980ab135 100644 --- a/sources/pg_replication_pipeline.py +++ b/sources/pg_replication_pipeline.py @@ -15,13 +15,13 @@ def replicate_single_table() -> None: """ # create source and destination pipelines src_pl = dlt.pipeline( - pipeline_name="replicate_single_table_src_pl", + pipeline_name="source_pipeline", destination="postgres", dataset_name="replicate_single_table", full_refresh=True, ) dest_pl = dlt.pipeline( - pipeline_name="replicate_single_table_dest_pl", + pipeline_name="pg_replication_pipeline", destination="duckdb", dataset_name="replicate_single_table", full_refresh=True, @@ -72,13 +72,13 @@ def replicate_with_initial_load() -> None: """ # create source and destination pipelines src_pl = dlt.pipeline( - pipeline_name="replicate_with_initial_load_src_pl", + pipeline_name="source_pipeline", destination="postgres", dataset_name="replicate_with_initial_load", full_refresh=True, ) dest_pl = dlt.pipeline( - pipeline_name="replicate_with_initial_load_dest_pl", + pipeline_name="pg_replication_pipeline", destination="duckdb", dataset_name="replicate_with_initial_load", full_refresh=True, @@ -121,13 +121,13 @@ def replicate_entire_schema() -> None: """Demonstrates setup and usage of schema replication.""" # create source and destination pipelines src_pl = dlt.pipeline( - pipeline_name="replicate_entire_schema_src_pl", + pipeline_name="source_pipeline", destination="postgres", dataset_name="replicate_entire_schema", full_refresh=True, ) dest_pl = dlt.pipeline( - pipeline_name="replicate_entire_schema_dest_pl", + pipeline_name="pg_replication_pipeline", destination="duckdb", dataset_name="replicate_entire_schema", full_refresh=True, @@ -185,13 +185,13 @@ def replicate_with_column_selection() -> None: """ # create source and destination pipelines src_pl = dlt.pipeline( - pipeline_name="replicate_with_column_selection_src_pl", + pipeline_name="source_pipeline", destination="postgres", dataset_name="replicate_with_column_selection", full_refresh=True, ) dest_pl = dlt.pipeline( - pipeline_name="replicate_with_column_selection_dest_pl", + pipeline_name="pg_replication_pipeline", destination="duckdb", dataset_name="replicate_with_column_selection", full_refresh=True, From 57b5e1eedf896b32669421914ba090216a17cf6d Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 19 Mar 2024 21:06:31 +0100 Subject: [PATCH 20/38] add sql_database dependency instruction --- sources/pg_replication/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sources/pg_replication/README.md b/sources/pg_replication/README.md index ee0093781..8975d01b1 100644 --- a/sources/pg_replication/README.md +++ b/sources/pg_replication/README.md @@ -15,6 +15,13 @@ dlt init pg_replication duckdb This uses `duckdb` as destination, but you can choose any of the supported [destinations](https://dlthub.com/docs/dlt-ecosystem/destinations/). +## Add `sql_database` source + +```bash +dlt init sql_database duckdb +``` + +This source depends on the [sql_database](../sql_database/README.md) verified source internally to perform initial loads. This step can be skipped if you don't do initial loads. ## Set up user The Postgres user needs to have the `LOGIN` and `REPLICATION` attributes assigned: From 5636e078c9f4b0085549d13cdc29bb5e17224eae Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Fri, 22 Mar 2024 22:03:43 +0100 Subject: [PATCH 21/38] batch data items per table and yield hints only once --- sources/pg_replication/__init__.py | 12 +-- sources/pg_replication/helpers.py | 100 +++++++++++++----------- sources/pg_replication/requirements.txt | 2 +- 3 files changed, 59 insertions(+), 55 deletions(-) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index fed685f80..979220322 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -4,14 +4,9 @@ import dlt -from dlt.common.schema.typing import ( - TTableSchemaColumns, - TAnySchemaColumns, - TWriteDisposition, -) +from dlt.common.schema.typing import TTableSchemaColumns, TWriteDisposition from dlt.sources.credentials import ConnectionStringCredentials from dlt.extract.resource import DltResource -from dlt.extract.typing import TTableHintTemplate from .helpers import _gen_replication_resource_name, get_pub_ops, replication_items @@ -22,7 +17,7 @@ def replication_resource( pub_name: str, credentials: ConnectionStringCredentials = dlt.secrets.value, include_columns: Optional[Dict[str, Sequence[str]]] = None, - columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, + columns: Optional[Dict[str, TTableSchemaColumns]] = None, target_batch_size: int = 1000, flush_slot: bool = True, ) -> DltResource: @@ -49,7 +44,7 @@ def replication_resource( "table_y": ["col_x", "col_y", "col_z"], } ``` - columns (Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]]): Maps + columns (Optional[Dict[str, TTableSchemaColumns]]): Maps table name(s) to column hints to apply on the replicated table(s). For example: ``` columns={ @@ -102,5 +97,4 @@ def replication_resource( columns=columns, target_batch_size=target_batch_size, flush_slot=flush_slot, - write_disposition=write_disposition, ) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index f22f681a0..ac407dbf8 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -36,14 +36,12 @@ from dlt.common.schema.typing import ( TTableSchema, TTableSchemaColumns, - TAnySchemaColumns, TColumnNames, TWriteDisposition, ) -from dlt.common.schema.utils import get_columns_names_with_prop +from dlt.common.schema.utils import merge_column from dlt.common.data_writers.escape import escape_postgres_identifier -from dlt.common.configuration.specs import BaseConfiguration, configspec -from dlt.extract.typing import DataItemWithMeta, TTableHintTemplate +from dlt.extract.items import DataItemWithMeta from dlt.extract.resource import DltResource from dlt.sources.credentials import ConnectionStringCredentials @@ -65,7 +63,7 @@ def init_replication( publish: str = "insert, update, delete", persist_snapshots: bool = False, include_columns: Optional[Dict[str, Sequence[str]]] = None, - columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, + columns: Optional[Dict[str, TTableSchemaColumns]] = None, reset: bool = False, ) -> Optional[Union[DltResource, List[DltResource]]]: """Initializes replication for one, several, or all tables within a schema. @@ -112,7 +110,7 @@ def init_replication( } ``` Argument is only used if `persist_snapshots` is `True`. - columns (Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]]): Maps + columns (Optional[Dict[str, TTableSchemaColumns]]): Maps table name(s) to column hints to apply on the snapshot table resource(s). For example: ``` @@ -345,7 +343,7 @@ def snapshot_table_resource( schema_name: str, table_name: str, write_disposition: TWriteDisposition, - columns: TTableHintTemplate[TAnySchemaColumns] = None, + columns: TTableSchemaColumns = None, credentials: ConnectionStringCredentials = dlt.secrets.value, ) -> DltResource: """Returns a resource for a persisted snapshot table. @@ -448,10 +446,9 @@ def replication_items( slot_name: str, pub_name: str, include_columns: Optional[Dict[str, Sequence[str]]] = None, - columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, + columns: Optional[Dict[str, TTableSchemaColumns]] = None, target_batch_size: int = 1000, flush_slot: bool = True, - write_disposition: TWriteDisposition = "append", # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released ) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Yields data items from generator. @@ -482,7 +479,6 @@ def replication_items( target_batch_size=target_batch_size, include_columns=include_columns, columns=columns, - write_disposition=write_disposition, # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released ) yield from gen if gen.generated_all: @@ -574,10 +570,9 @@ class ItemGenerator: start_lsn: int = 0 target_batch_size: int = 1000 include_columns: Optional[Dict[str, Sequence[str]]] = (None,) # type: ignore[assignment] - columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = (None,) # type: ignore[assignment] + columns: Optional[Dict[str, TTableSchemaColumns]] = (None,) # type: ignore[assignment] last_commit_lsn: Optional[int] = field(default=None, init=False) generated_all: bool = False - write_disposition: TWriteDisposition = "append" # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: """Yields replication messages from MessageConsumer. @@ -606,13 +601,10 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: finally: cur.connection.close() self.last_commit_lsn = consumer.last_commit_lsn - for i in consumer.data_items: - i.meta.hints[ - "write_disposition" - ] = ( - self.write_disposition - ) # TODO: remove after https://github.com/dlt-hub/dlt/issues/1031 has been released - yield i + for rel_id, data_items in consumer.data_items.items(): + table_name = consumer.last_table_schema[rel_id]["name"] + yield data_items[0] # meta item with column hints only, no data + yield dlt.mark.with_table_name(data_items[1:], table_name) self.generated_all = consumer.consumed_all @@ -629,7 +621,7 @@ def __init__( upto_lsn: int, target_batch_size: int = 1000, include_columns: Optional[Dict[str, Sequence[str]]] = None, - columns: Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]] = None, + columns: Optional[Dict[str, TTableSchemaColumns]] = None, ) -> None: self.upto_lsn = upto_lsn self.target_batch_size = target_batch_size @@ -638,7 +630,9 @@ def __init__( self.consumed_all: bool = False # data_items attribute maintains all data items - self.data_items: List[Union[TDataItem, DataItemWithMeta]] = [] + self.data_items: Dict[ + int, List[Union[TDataItem, DataItemWithMeta]] + ] = dict() # maps relation_id to list of data items # other attributes only maintain last-seen values self.last_table_schema: Dict[ int, TTableSchema @@ -664,7 +658,10 @@ def process_msg(self, msg: ReplicationMessage) -> None: self.last_commit_lsn = msg.data_start if msg.data_start >= self.upto_lsn: self.consumed_all = True - if self.consumed_all or len(self.data_items) >= self.target_batch_size: + n_items = sum( + [len(items) for items in self.data_items.values()] + ) # combine items for all tables + if self.consumed_all or n_items >= self.target_batch_size: raise StopReplication elif op == "R": self.process_relation(Relation(msg.payload)) @@ -683,15 +680,43 @@ def process_msg(self, msg: ReplicationMessage) -> None: def process_relation(self, decoded_msg: Relation) -> None: """Processes a replication message of type Relation. - Stores table schema information from Relation message in object state. + Stores table schema in object state. + Creates meta item to emit column hints while yielding data. """ - # store table schema information - columns = {c.name: _to_dlt_column_schema(c) for c in decoded_msg.columns} + # get table schema information from source and store in object state + table_name = decoded_msg.relation_name + columns: TTableSchemaColumns = { + c.name: _to_dlt_column_schema(c) for c in decoded_msg.columns + } self.last_table_schema[decoded_msg.relation_id] = { - "name": decoded_msg.relation_name, + "name": table_name, "columns": columns, } + # apply user input + # 1) exclude columns + include_columns = ( + None + if self.include_columns is None + else self.include_columns.get(table_name) + ) + if include_columns is not None: + columns = {k: v for k, v in columns.items() if k in include_columns} + # 2) override source hints + column_hints: TTableSchemaColumns = ( + dict() if self.columns is None else self.columns.get(table_name, dict()) + ) + for column_name, column_val in column_hints.items(): + columns[column_name] = merge_column(columns[column_name], column_val) + + # include meta item to emit hints while yielding data + meta_item = dlt.mark.with_hints( + [], + dlt.mark.make_hints(table_name=table_name, columns=columns), + create_table_variant=True, + ) + self.data_items[decoded_msg.relation_id] = [meta_item] + def process_change( self, decoded_msg: Union[Insert, Update, Delete], msg_start_lsn: int ) -> None: @@ -705,7 +730,6 @@ def process_change( column_data = decoded_msg.old_tuple.column_data table_name = self.last_table_schema[decoded_msg.relation_id]["name"] data_item = self.gen_data_item( - table_name=self.last_table_schema[decoded_msg.relation_id]["name"], data=column_data, column_schema=self.last_table_schema[decoded_msg.relation_id]["columns"], lsn=msg_start_lsn, @@ -714,25 +738,19 @@ def process_change( include_columns=None if self.include_columns is None else self.include_columns.get(table_name), - column_hints=None if self.columns is None else self.columns.get(table_name), ) - self.data_items.append(data_item) + self.data_items[decoded_msg.relation_id].append(data_item) @staticmethod def gen_data_item( - table_name: str, data: List[ColumnData], column_schema: TTableSchemaColumns, lsn: int, commit_ts: pendulum.DateTime, for_delete: bool, include_columns: Optional[Sequence[str]] = None, - column_hints: TTableHintTemplate[TAnySchemaColumns] = None, ) -> TDataItem: """Generates data item from replication message data and corresponding metadata.""" - pairs = zip(column_schema.values(), data) - if include_columns is not None: - pairs = [(schema, data) for (schema, data) in pairs if schema["name"] in include_columns] # type: ignore[assignment] data_item = { schema["name"]: _to_dlt_val( val=data.col_data, @@ -740,18 +758,10 @@ def gen_data_item( byte1=data.col_data_category, for_delete=for_delete, ) - for (schema, data) in pairs + for (schema, data) in zip(column_schema.values(), data) + if (True if include_columns is None else schema["name"] in include_columns) } data_item["lsn"] = lsn if for_delete: data_item["deleted_ts"] = commit_ts - return dlt.mark.with_hints( - data_item, - dlt.mark.make_hints( - table_name=table_name, - primary_key=get_columns_names_with_prop( - {"columns": column_schema}, "primary_key" - ), - columns=column_hints, - ), - ) + return data_item diff --git a/sources/pg_replication/requirements.txt b/sources/pg_replication/requirements.txt index 5f4179973..34f7ec982 100644 --- a/sources/pg_replication/requirements.txt +++ b/sources/pg_replication/requirements.txt @@ -1,3 +1,3 @@ -dlt>=0.4.5 +dlt>=0.4.7 psycopg2>=2.9.9 pypgoutput==0.0.3 \ No newline at end of file From 271346425e917247d97d48ce1948e3cdbb80cfc8 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Fri, 22 Mar 2024 22:35:51 +0100 Subject: [PATCH 22/38] postpone replication column hints to preserve order --- sources/pg_replication/__init__.py | 9 --------- sources/pg_replication/helpers.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index 979220322..d6ab671f4 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -70,25 +70,16 @@ def replication_resource( DltResource that yields data items for changes published in the publication. """ write_disposition: TWriteDisposition = "append" - rep_cols: TTableSchemaColumns = {"lsn": {"data_type": "bigint", "nullable": True}} resource_name = _gen_replication_resource_name(slot_name, pub_name) pub_ops = get_pub_ops(pub_name, credentials) if pub_ops["update"] or pub_ops["delete"]: write_disposition = "merge" - rep_cols["lsn"]["dedup_sort"] = "desc" - if pub_ops["delete"]: - rep_cols["deleted_ts"] = { - "hard_delete": True, - "data_type": "timestamp", - "nullable": True, - } return dlt.resource( replication_items, name=resource_name, write_disposition=write_disposition, - columns=rep_cols, )( credentials=credentials, slot_name=slot_name, diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index ac407dbf8..9879cbeda 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -591,6 +591,9 @@ def __iter__(self) -> Iterator[Union[TDataItem, DataItemWithMeta]]: ) consumer = MessageConsumer( upto_lsn=self.upto_lsn, + pub_ops=get_pub_ops( + self.options["publication_names"], self.credentials + ), target_batch_size=self.target_batch_size, include_columns=self.include_columns, columns=self.columns, @@ -619,11 +622,13 @@ class MessageConsumer: def __init__( self, upto_lsn: int, + pub_ops: Dict[str, bool], target_batch_size: int = 1000, include_columns: Optional[Dict[str, Sequence[str]]] = None, columns: Optional[Dict[str, TTableSchemaColumns]] = None, ) -> None: self.upto_lsn = upto_lsn + self.pub_ops = pub_ops self.target_batch_size = target_batch_size self.include_columns = include_columns self.columns = columns @@ -709,6 +714,17 @@ def process_relation(self, decoded_msg: Relation) -> None: for column_name, column_val in column_hints.items(): columns[column_name] = merge_column(columns[column_name], column_val) + # add hints for replication columns + columns["lsn"] = {"data_type": "bigint", "nullable": True} + if self.pub_ops["update"] or self.pub_ops["delete"]: + columns["lsn"]["dedup_sort"] = "desc" + if self.pub_ops["delete"]: + columns["deleted_ts"] = { + "hard_delete": True, + "data_type": "timestamp", + "nullable": True, + } + # include meta item to emit hints while yielding data meta_item = dlt.mark.with_hints( [], From eec75f02f756e063c20cfee8ea816adbe0d90af7 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Fri, 22 Mar 2024 23:37:56 +0100 Subject: [PATCH 23/38] refactor to use resource decorator --- sources/pg_replication/__init__.py | 76 ++++++++++++--------- sources/pg_replication/helpers.py | 67 +++--------------- tests/pg_replication/test_pg_replication.py | 14 +++- 3 files changed, 65 insertions(+), 92 deletions(-) diff --git a/sources/pg_replication/__init__.py b/sources/pg_replication/__init__.py index d6ab671f4..9ec0e9b7b 100644 --- a/sources/pg_replication/__init__.py +++ b/sources/pg_replication/__init__.py @@ -1,17 +1,21 @@ """Replicates postgres tables in batch using logical decoding.""" -from typing import Dict, Sequence, Optional +from typing import Dict, Sequence, Optional, Iterable, Union import dlt -from dlt.common.schema.typing import TTableSchemaColumns, TWriteDisposition +from dlt.common.typing import TDataItem +from dlt.common.schema.typing import TTableSchemaColumns +from dlt.extract.items import DataItemWithMeta from dlt.sources.credentials import ConnectionStringCredentials -from dlt.extract.resource import DltResource -from .helpers import _gen_replication_resource_name, get_pub_ops, replication_items +from .helpers import advance_slot, get_max_lsn, ItemGenerator -@dlt.sources.config.with_config(sections=("sources", "pg_replication")) +@dlt.resource( + name=lambda args: args["slot_name"] + "_" + args["pub_name"], + standalone=True, +) def replication_resource( slot_name: str, pub_name: str, @@ -20,15 +24,15 @@ def replication_resource( columns: Optional[Dict[str, TTableSchemaColumns]] = None, target_batch_size: int = 1000, flush_slot: bool = True, -) -> DltResource: - """Returns a dlt resource that yields data items for changes in one or more postgres tables. +) -> Iterable[Union[TDataItem, DataItemWithMeta]]: + """Resource yielding data items for changes in one or more postgres tables. - Relies on a replication slot and publication that publishes DML operations + - Relies on a replication slot and publication that publishes DML operations (i.e. `insert`, `update`, and/or `delete`). Helper `init_replication` can be used to set this up. - - Uses `append` write disposition when the publication only publishes `insert` - operations, `merge` otherwise. + - Maintains LSN of last consumed message in state to track progress. + - At start of the run, advances the slot upto last consumed message in previous run. + - Processes in batches to limit memory usage. Args: slot_name (str): Name of the replication slot to consume replication messages from. @@ -44,7 +48,7 @@ def replication_resource( "table_y": ["col_x", "col_y", "col_z"], } ``` - columns (Optional[Dict[str, TTableSchemaColumns]]): Maps + columns (Optional[Dict[str, TTableHintTemplate[TAnySchemaColumns]]]): Maps table name(s) to column hints to apply on the replicated table(s). For example: ``` columns={ @@ -66,26 +70,34 @@ def replication_resource( the server retains all the WAL segments that might be needed to stream the changes via all of the currently open replication slots. - Returns: - DltResource that yields data items for changes published in the publication. + Yields: + Data items for changes published in the publication. """ - write_disposition: TWriteDisposition = "append" - resource_name = _gen_replication_resource_name(slot_name, pub_name) + # start where we left off in previous run + start_lsn = dlt.current.resource_state().get("last_commit_lsn", 0) + if flush_slot: + advance_slot(start_lsn, slot_name, credentials) - pub_ops = get_pub_ops(pub_name, credentials) - if pub_ops["update"] or pub_ops["delete"]: - write_disposition = "merge" + # continue until last message in replication slot + options = {"publication_names": pub_name, "proto_version": "1"} + upto_lsn = get_max_lsn(slot_name, options, credentials) + if upto_lsn is None: + return "Replication slot is empty." - return dlt.resource( - replication_items, - name=resource_name, - write_disposition=write_disposition, - )( - credentials=credentials, - slot_name=slot_name, - pub_name=pub_name, - include_columns=include_columns, - columns=columns, - target_batch_size=target_batch_size, - flush_slot=flush_slot, - ) + # generate items in batches + while True: + gen = ItemGenerator( + credentials=credentials, + slot_name=slot_name, + options=options, + upto_lsn=upto_lsn, + start_lsn=start_lsn, + target_batch_size=target_batch_size, + include_columns=include_columns, + columns=columns, + ) + yield from gen + if gen.generated_all: + dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + break + start_lsn = gen.last_commit_lsn diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 9879cbeda..3ecc32c7a 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -441,54 +441,6 @@ def advance_slot( cur.connection.close() -def replication_items( - credentials: ConnectionStringCredentials, - slot_name: str, - pub_name: str, - include_columns: Optional[Dict[str, Sequence[str]]] = None, - columns: Optional[Dict[str, TTableSchemaColumns]] = None, - target_batch_size: int = 1000, - flush_slot: bool = True, -) -> Iterator[Union[TDataItem, DataItemWithMeta]]: - """Yields data items from generator. - - Maintains LSN of last consumed message in state to track progress. - At start of the run, advances the slot upto last consumed message in previous run. - Processes in batches to limit memory usage. - """ - # start where we left off in previous run - resource_name = _gen_replication_resource_name(slot_name, pub_name) - start_lsn = dlt.current.resource_state(resource_name).get("last_commit_lsn", 0) - if flush_slot: - advance_slot(start_lsn, slot_name, credentials) - - # continue until last message in replication slot - options = {"publication_names": pub_name, "proto_version": "1"} - upto_lsn = get_max_lsn(slot_name, options, credentials) - if upto_lsn is None: - return "Replication slot is empty." - - # generate items in batches - while True: - gen = ItemGenerator( - credentials=credentials, - slot_name=slot_name, - options=options, - upto_lsn=upto_lsn, - start_lsn=start_lsn, - target_batch_size=target_batch_size, - include_columns=include_columns, - columns=columns, - ) - yield from gen - if gen.generated_all: - dlt.current.resource_state(resource_name)[ - "last_commit_lsn" - ] = gen.last_commit_lsn - break - start_lsn = gen.last_commit_lsn - - def _get_conn( credentials: ConnectionStringCredentials, connection_factory: Optional[Any] = None, @@ -523,14 +475,6 @@ def _make_qualified_table_name(table_name: str, schema_name: str) -> str: ) -def _gen_replication_resource_name(slot_name: str, pub_name: str) -> str: - """Generates name for a resource used for replication. - - Based on names of replication slot and publication the resource consumes from. - """ - return slot_name + "_" + pub_name - - def _get_pk( table_name: str, schema_name: str, @@ -725,10 +669,19 @@ def process_relation(self, decoded_msg: Relation) -> None: "nullable": True, } + # determine write disposition + write_disposition: TWriteDisposition = "append" + if self.pub_ops["update"] or self.pub_ops["delete"]: + write_disposition = "merge" + # include meta item to emit hints while yielding data meta_item = dlt.mark.with_hints( [], - dlt.mark.make_hints(table_name=table_name, columns=columns), + dlt.mark.make_hints( + table_name=table_name, + write_disposition=write_disposition, + columns=columns, + ), create_table_variant=True, ) self.data_items[decoded_msg.relation_id] = [meta_item] diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index a09cd0582..2d04dc245 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -418,12 +418,20 @@ def items(data): publish=publish, persist_snapshots=True, ) - changes = replication_resource(slot_name, pub_name) - # assert write dispositions + # assert write disposition on snapshot resource expected_write_disposition = "append" if publish == "insert" else "merge" assert snapshot.write_disposition == expected_write_disposition - assert changes.write_disposition == expected_write_disposition + + # assert write disposition on tables dispatched by changes resource + changes = replication_resource(slot_name, pub_name) + src_pl.run(items({"id": 2, "val": True})) + dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) + dest_pl.extract(changes) + assert ( + dest_pl.default_schema.get_table("items")["write_disposition"] + == expected_write_disposition + ) @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) From 493147d58eb707261a93cd9f1806246979708052 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Sat, 23 Mar 2024 02:30:50 +0100 Subject: [PATCH 24/38] add support for table schema changes --- sources/pg_replication/helpers.py | 34 +++++++++++----- tests/pg_replication/test_pg_replication.py | 45 +++++++++++++++++++++ 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 3ecc32c7a..71e97497e 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -598,20 +598,16 @@ def process_msg(self, msg: ReplicationMessage) -> None: Identifies message type and decodes accordingly. Message treatment is different for various message types. - Breaks out of stream when `upto_lsn` or `target_batch_size` is reached. + Breaks out of stream with StopReplication exception when + - `upto_lsn` is reached + - `target_batch_size` is reached + - a table's schema has changed """ op = (msg.payload[:1]).decode("utf-8") if op == "B": self.last_commit_ts = Begin(msg.payload).commit_ts elif op == "C": - self.last_commit_lsn = msg.data_start - if msg.data_start >= self.upto_lsn: - self.consumed_all = True - n_items = sum( - [len(items) for items in self.data_items.values()] - ) # combine items for all tables - if self.consumed_all or n_items >= self.target_batch_size: - raise StopReplication + self.process_commit(msg) elif op == "R": self.process_relation(Relation(msg.payload)) elif op == "I": @@ -626,12 +622,32 @@ def process_msg(self, msg: ReplicationMessage) -> None: "Truncate replication messages are ignored." ) + def process_commit(self, msg: ReplicationMessage) -> None: + """Updates object state when Commit message is observed. + + Raises StopReplication when `upto_lsn` or `target_batch_size` is reached. + """ + self.last_commit_lsn = msg.data_start + if msg.data_start >= self.upto_lsn: + self.consumed_all = True + n_items = sum( + [len(items) for items in self.data_items.values()] + ) # combine items for all tables + if self.consumed_all or n_items >= self.target_batch_size: + raise StopReplication + def process_relation(self, decoded_msg: Relation) -> None: """Processes a replication message of type Relation. Stores table schema in object state. Creates meta item to emit column hints while yielding data. + + Raises StopReplication when a table's schema changes. """ + if ( + self.data_items.get(decoded_msg.relation_id) is not None + ): # table schema change + raise StopReplication # get table schema information from source and store in object state table_name = decoded_msg.relation_name columns: TTableSchemaColumns = { diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 2d04dc245..8c75b7265 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -620,6 +620,51 @@ def tbl_z(data): ) +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_table_schema_change(src_pl: dlt.Pipeline, destination_name: str) -> None: + # create postgres table + src_pl.run([{"c1": 1, "c2": 1}], table_name="items") + + # initialize replication + slot_name = "test_slot" + pub_name = "test_pub" + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + table_names="items", + publish="insert", + ) + + # create resource and pipeline + changes = replication_resource(slot_name, pub_name) + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, full_refresh=True + ) + + # add a column in one commit, this will create one Relation message + src_pl.run([{"c1": 2, "c2": 1}, {"c1": 3, "c2": 1, "c3": 1}], table_name="items") + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "items") == {"items": 2} + exp = [{"c1": 2, "c2": 1, "c3": None}, {"c1": 3, "c2": 1, "c3": 1}] + assert_loaded_data(dest_pl, "items", ["c1", "c2", "c3"], exp, "c1") + + # add a column in two commits, this will create two Relation messages + src_pl.run([{"c1": 4, "c2": 1, "c3": 1}], table_name="items") + src_pl.run([{"c1": 5, "c2": 1, "c3": 1, "c4": 1}], table_name="items") + dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "items") == {"items": 4} + exp = [ + {"c1": 4, "c2": 1, "c3": 1, "c4": None}, + {"c1": 5, "c2": 1, "c3": 1, "c4": 1}, + ] + assert_loaded_data( + dest_pl, "items", ["c1", "c2", "c3", "c4"], exp, "c1", "c1 IN ('4', '5')" + ) + + def test_init_replication(src_pl: dlt.Pipeline) -> None: def get_table_names_in_pub() -> Set[str]: with src_pl.sql_client() as c: From 7bd211b7d70de759c0809a0010e3de3fe5b66d4d Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 26 Mar 2024 00:12:07 +0100 Subject: [PATCH 25/38] optimize message type detection for performance --- sources/pg_replication/helpers.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 71e97497e..27a4692cf 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -603,20 +603,20 @@ def process_msg(self, msg: ReplicationMessage) -> None: - `target_batch_size` is reached - a table's schema has changed """ - op = (msg.payload[:1]).decode("utf-8") - if op == "B": - self.last_commit_ts = Begin(msg.payload).commit_ts - elif op == "C": - self.process_commit(msg) - elif op == "R": - self.process_relation(Relation(msg.payload)) - elif op == "I": + op = msg.payload[:1] + if op == b"I": self.process_change(Insert(msg.payload), msg.data_start) - elif op == "U": + elif op == b"U": self.process_change(Update(msg.payload), msg.data_start) - elif op == "D": + elif op == b"D": self.process_change(Delete(msg.payload), msg.data_start) - elif op == "T": + elif op == b"B": + self.last_commit_ts = Begin(msg.payload).commit_ts + elif op == b"C": + self.process_commit(msg) + elif op == b"R": + self.process_relation(Relation(msg.payload)) + elif op == b"T": logger.warning( "The truncate operation is currently not supported. " "Truncate replication messages are ignored." From 48442baf5dfa5bb90c608bc2e61271f9c6c24a3c Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Wed, 10 Apr 2024 00:53:24 +0400 Subject: [PATCH 26/38] upgrade dlt to 0.4.8 --- poetry.lock | 11 ++++++----- pyproject.toml | 2 +- sources/pg_replication/requirements.txt | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index b65bee4db..34c66fb28 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1010,13 +1010,13 @@ files = [ [[package]] name = "dlt" -version = "0.4.7" +version = "0.4.8" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." optional = false python-versions = "<3.13,>=3.8.1" files = [ - {file = "dlt-0.4.7-py3-none-any.whl", hash = "sha256:73ae8edabca97ceb7d4a9fb114c525646d390cd5da01c367c998703504a2a4d1"}, - {file = "dlt-0.4.7.tar.gz", hash = "sha256:692ad0f4abfce728ff59f75de4743f6c48ce843e89ef467ff8aeee260012c8a4"}, + {file = "dlt-0.4.8-py3-none-any.whl", hash = "sha256:ade57b2745986c8aada7b2e28856df20164a7738a281c8a87596af62a378be06"}, + {file = "dlt-0.4.8.tar.gz", hash = "sha256:fc46d2ee61bd8d128db84a8214081f78ff642bad5677fdac210080a1f1bcbcf8"}, ] [package.dependencies] @@ -1043,7 +1043,7 @@ pathvalidate = ">=2.5.2" pendulum = ">=2.1.2" psycopg2-binary = {version = ">=2.9.1", optional = true, markers = "extra == \"postgres\" or extra == \"redshift\""} psycopg2cffi = {version = ">=2.9.0", optional = true, markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"redshift\")"} -pyarrow = {version = ">=12.0.0", optional = true, markers = "extra == \"bigquery\" or extra == \"parquet\" or extra == \"motherduck\" or extra == \"athena\" or extra == \"synapse\""} +pyarrow = {version = ">=12.0.0", optional = true, markers = "extra == \"bigquery\" or extra == \"parquet\" or extra == \"motherduck\" or extra == \"athena\" or extra == \"synapse\" or extra == \"dremio\""} pytz = ">=2022.6" PyYAML = ">=5.4.1" requests = ">=2.26.0" @@ -1065,6 +1065,7 @@ bigquery = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (> cli = ["cron-descriptor (>=1.2.32)", "pipdeptree (>=2.9.0,<2.10)"] databricks = ["databricks-sql-connector (>=2.9.3,<3.0.0)"] dbt = ["dbt-athena-community (>=1.2.0)", "dbt-bigquery (>=1.2.0)", "dbt-core (>=1.2.0)", "dbt-databricks (>=1.7.3,<2.0.0)", "dbt-duckdb (>=1.2.0)", "dbt-redshift (>=1.2.0)", "dbt-snowflake (>=1.2.0)"] +dremio = ["pyarrow (>=12.0.0)"] duckdb = ["duckdb (>=0.10.0,<0.11.0)", "duckdb (>=0.6.1,<0.10.0)"] filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] gcp = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"] @@ -6350,4 +6351,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "60f6c443d7051b7af207e3e14add7051b522ffdd9ea81dbe32858588fc0ae67e" +content-hash = "31b23ab2fd9f270c3196d7fcd06c0abd498ec3f1701d870228510a7023c56622" diff --git a/pyproject.toml b/pyproject.toml index abd141ac1..3f8bff9a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ packages = [{include = "sources"}] [tool.poetry.dependencies] python = ">=3.8.1,<3.13" -dlt = {version = "0.4.7", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb", "s3", "gs"]} +dlt = {version = "0.4.8", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb", "s3", "gs"]} graphlib-backport = {version = "*", python = "<3.9"} [tool.poetry.group.dev.dependencies] diff --git a/sources/pg_replication/requirements.txt b/sources/pg_replication/requirements.txt index 34f7ec982..95ee4eb8a 100644 --- a/sources/pg_replication/requirements.txt +++ b/sources/pg_replication/requirements.txt @@ -1,3 +1,3 @@ -dlt>=0.4.7 +dlt>=0.4.8 psycopg2>=2.9.9 pypgoutput==0.0.3 \ No newline at end of file From 524945f1a264d978d6ef0ab5f10bad5c065eaf68 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 14 Apr 2024 21:40:11 +0200 Subject: [PATCH 27/38] enables to run tests in parallel --- sources/pg_replication/README.md | 12 +++ sources/pg_replication/helpers.py | 3 +- tests/pg_replication/conftest.py | 20 +++-- tests/pg_replication/test_pg_replication.py | 83 +++++++++++---------- 4 files changed, 74 insertions(+), 44 deletions(-) diff --git a/sources/pg_replication/README.md b/sources/pg_replication/README.md index 8975d01b1..f34fcd4d6 100644 --- a/sources/pg_replication/README.md +++ b/sources/pg_replication/README.md @@ -36,6 +36,18 @@ It also needs `CREATE` privilege on the database: GRANT CREATE ON DATABASE dlt_data TO replication_user; ``` +### Set up RDS +1. You must enable replication for RDS Postgres instance via **Parameter Group**: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PostgreSQL.Replication.ReadReplicas.html +2. `WITH LOGIN REPLICATION;` does not work on RDS, instead do: +```sql +GRANT rds_replication TO replication_user; +``` +3. Do not fallback to non SSL connection by setting connection parameters: +```toml +sources.pg_replication.credentials="postgresql://loader:password@host.rds.amazonaws.com:5432/dlt_data?sslmode=require&connect_timeout=300" +``` + + ## Add credentials 1. Open `.dlt/secrets.toml`. 2. Enter your Postgres credentials: diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 27a4692cf..acbc00455 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -446,13 +446,14 @@ def _get_conn( connection_factory: Optional[Any] = None, ) -> Union[psycopg2.extensions.connection, LogicalReplicationConnection]: """Returns a psycopg2 connection to interact with postgres.""" - return psycopg2.connect( # type: ignore[no-any-return] + return psycopg2.connect( # type: ignore[call-overload,no-any-return] database=credentials.database, user=credentials.username, password=credentials.password, host=credentials.host, port=credentials.port, connection_factory=connection_factory, + **credentials.query, ) diff --git a/tests/pg_replication/conftest.py b/tests/pg_replication/conftest.py index 0878453ad..8ff9a2dcd 100644 --- a/tests/pg_replication/conftest.py +++ b/tests/pg_replication/conftest.py @@ -1,17 +1,21 @@ import pytest -from typing import Iterator +from typing import Iterator, Tuple import dlt +from dlt.common.utils import uniq_id @pytest.fixture() -def src_pl() -> Iterator[dlt.Pipeline]: +def src_config() -> Iterator[Tuple[dlt.Pipeline, str, str]]: + # random slot and pub to enable parallel runs + slot = "test_slot_" + uniq_id(4) + pub = "test_pub" + uniq_id(4) # setup src_pl = dlt.pipeline( pipeline_name="src_pl", destination="postgres", full_refresh=True ) - yield src_pl + yield src_pl, slot, pub # teardown with src_pl.sql_client() as c: # drop tables @@ -25,6 +29,12 @@ def src_pl() -> Iterator[dlt.Pipeline]: except Exception as e: print(e) # drop replication slot - c.execute_sql("SELECT pg_drop_replication_slot('test_slot');") + try: + c.execute_sql(f"SELECT pg_drop_replication_slot('{slot}');") + except Exception as e: + print(e) # drop publication - c.execute_sql("DROP PUBLICATION IF EXISTS test_pub;") + try: + c.execute_sql(f"DROP PUBLICATION IF EXISTS {pub};") + except Exception as e: + print(e) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 8c75b7265..13f13d07f 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -1,6 +1,6 @@ import pytest -from typing import Set +from typing import Set, Tuple from copy import deepcopy import dlt @@ -19,7 +19,9 @@ @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_core_functionality(src_pl: dlt.Pipeline, destination_name: str) -> None: +def test_core_functionality( + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str +) -> None: @dlt.resource(write_disposition="merge", primary_key="id_x") def tbl_x(data): yield data @@ -28,6 +30,8 @@ def tbl_x(data): def tbl_y(data): yield data + src_pl, slot_name, pub_name = src_config + src_pl.run( [ tbl_x({"id_x": 1, "val_x": "foo"}), @@ -37,9 +41,6 @@ def tbl_y(data): add_pk(src_pl.sql_client, "tbl_x", "id_x") add_pk(src_pl.sql_client, "tbl_y", "id_y") - slot_name = "test_slot" - pub_name = "test_pub" - snapshots = init_replication( slot_name=slot_name, pub_name=pub_name, @@ -142,7 +143,9 @@ def tbl_y(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_without_init_load(src_pl: dlt.Pipeline, destination_name: str) -> None: +def test_without_init_load( + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str +) -> None: @dlt.resource(write_disposition="merge", primary_key="id_x") def tbl_x(data): yield data @@ -151,6 +154,8 @@ def tbl_x(data): def tbl_y(data): yield data + src_pl, slot_name, pub_name = src_config + # create postgres table # since we're skipping initial load, these records should not be in the replicated table src_pl.run( @@ -163,8 +168,6 @@ def tbl_y(data): add_pk(src_pl.sql_client, "tbl_y", "id_y") # initialize replication and create resource for changes - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, @@ -209,16 +212,16 @@ def tbl_y(data): assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") -def test_insert_only(src_pl: dlt.Pipeline) -> None: +def test_insert_only(src_config: Tuple[dlt.Pipeline, str, str]) -> None: def items(data): yield data + src_pl, slot_name, pub_name = src_config + # create postgres table with single record src_pl.run(items({"id": 1, "foo": "bar"})) # initialize replication and create resource for changes - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, @@ -249,7 +252,7 @@ def items(data): @pytest.mark.parametrize("give_hints", [True, False]) @pytest.mark.parametrize("init_load", [True, False]) def test_mapped_data_types( - src_pl: dlt.Pipeline, + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str, give_hints: bool, init_load: bool, @@ -264,13 +267,13 @@ def test_mapped_data_types( def items(data): yield data + src_pl, slot_name, pub_name = src_config + # create postgres table with single record containing all data types src_pl.run(items(data)) add_pk(src_pl.sql_client, "items", "col1") # initialize replication and create resources - slot_name = "test_slot" - pub_name = "test_pub" snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, @@ -358,8 +361,11 @@ def items(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_unmapped_data_types(src_pl: dlt.Pipeline, destination_name: str) -> None: +def test_unmapped_data_types( + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str +) -> None: """Assert postgres data types that aren't explicitly mapped default to "text" type.""" + src_pl, slot_name, pub_name = src_config # create postgres table with some unmapped types with src_pl.sql_client() as c: @@ -369,8 +375,6 @@ def test_unmapped_data_types(src_pl: dlt.Pipeline, destination_name: str) -> Non ) # initialize replication and create resource - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, @@ -399,17 +403,19 @@ def test_unmapped_data_types(src_pl: dlt.Pipeline, destination_name: str) -> Non @pytest.mark.parametrize("publish", ["insert", "insert, update, delete"]) -def test_write_disposition(src_pl: dlt.Pipeline, publish: str) -> None: +def test_write_disposition( + src_config: Tuple[dlt.Pipeline, str, str], publish: str +) -> None: @dlt.resource def items(data): yield data + src_pl, slot_name, pub_name = src_config + # create postgres table src_pl.run(items({"id": 1, "val": True})) # create resources - slot_name = "test_slot" - pub_name = "test_pub" snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, @@ -437,7 +443,7 @@ def items(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @pytest.mark.parametrize("init_load", [True, False]) def test_include_columns( - src_pl: dlt.Pipeline, destination_name: str, init_load: bool + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str, init_load: bool ) -> None: def get_cols(pipeline: dlt.Pipeline, table_name: str) -> set: with pipeline.destination_client(pipeline.default_schema_name) as client: @@ -460,6 +466,8 @@ def tbl_y(data): def tbl_z(data): yield data + src_pl, slot_name, pub_name = src_config + # create three postgres tables src_pl.run( [ @@ -470,8 +478,6 @@ def tbl_z(data): ) # initialize replication and create resources - slot_name = "test_slot" - pub_name = "test_pub" include_columns = { "tbl_x": ["id_x", "val_x"], "tbl_y": ["id_y", "val_y"], @@ -517,7 +523,7 @@ def tbl_z(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @pytest.mark.parametrize("init_load", [True, False]) def test_column_hints( - src_pl: dlt.Pipeline, destination_name: str, init_load: bool + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str, init_load: bool ) -> None: @dlt.resource def tbl_x(data): @@ -531,6 +537,8 @@ def tbl_y(data): def tbl_z(data): yield data + src_pl, slot_name, pub_name = src_config + # create three postgres tables src_pl.run( [ @@ -541,8 +549,6 @@ def tbl_z(data): ) # initialize replication and create resources - slot_name = "test_slot" - pub_name = "test_pub" column_hints = { "tbl_x": {"another_col_x": {"data_type": "double"}}, "tbl_y": {"another_col_y": {"precision": 32}}, @@ -621,13 +627,15 @@ def tbl_z(data): @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_table_schema_change(src_pl: dlt.Pipeline, destination_name: str) -> None: +def test_table_schema_change( + src_config: Tuple[dlt.Pipeline, str, str], destination_name: str +) -> None: + src_pl, slot_name, pub_name = src_config + # create postgres table src_pl.run([{"c1": 1, "c2": 1}], table_name="items") # initialize replication - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, @@ -665,7 +673,7 @@ def test_table_schema_change(src_pl: dlt.Pipeline, destination_name: str) -> Non ) -def test_init_replication(src_pl: dlt.Pipeline) -> None: +def test_init_replication(src_config: Tuple[dlt.Pipeline, str, str]) -> None: def get_table_names_in_pub() -> Set[str]: with src_pl.sql_client() as c: result = c.execute_sql( @@ -685,6 +693,8 @@ def tbl_y(data): def tbl_z(data): yield data + src_pl, slot_name, pub_name = src_config + # create three postgres tables src_pl.run( [ @@ -695,8 +705,6 @@ def tbl_z(data): ) # initialize replication with a single table - slot_name = "test_slot" - pub_name = "test_pub" snapshot = init_replication( slot_name=slot_name, pub_name=pub_name, @@ -738,7 +746,7 @@ def tbl_z(data): assert get_table_names_in_pub() >= {"tbl_x", "tbl_y", "tbl_z"} -def test_replicate_schema(src_pl: dlt.Pipeline) -> None: +def test_replicate_schema(src_config: Tuple[dlt.Pipeline, str, str]) -> None: @dlt.resource def tbl_x(data): yield data @@ -751,6 +759,8 @@ def tbl_y(data): def tbl_z(data): yield data + src_pl, slot_name, pub_name = src_config + # create two postgres tables src_pl.run( [ @@ -760,8 +770,6 @@ def tbl_z(data): ) # initialize replication and create resource - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, @@ -793,17 +801,16 @@ def tbl_z(data): assert set(dest_pl.default_schema.data_table_names()) == {"tbl_x", "tbl_y", "tbl_z"} -def test_batching(src_pl: dlt.Pipeline) -> None: +def test_batching(src_config: Tuple[dlt.Pipeline, str, str]) -> None: # this test asserts the number of data items yielded by the replication resource # is not affected by `target_batch_size` and the number of replication messages per transaction + src_pl, slot_name, pub_name = src_config # create postgres table with single record data = {"id": 1000, "val": True} src_pl.run([data], table_name="items") # initialize replication and create resource for changes - slot_name = "test_slot" - pub_name = "test_pub" init_replication( slot_name=slot_name, pub_name=pub_name, From c5961808f6590d94f30af1fc5d1a6fbc313da91f Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 14 Apr 2024 21:41:49 +0200 Subject: [PATCH 28/38] fixes format --- sources/filesystem/helpers.py | 4 ++-- sources/freshdesk/freshdesk_client.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sources/filesystem/helpers.py b/sources/filesystem/helpers.py index 8bd11705d..f241c6160 100644 --- a/sources/filesystem/helpers.py +++ b/sources/filesystem/helpers.py @@ -1,5 +1,5 @@ """Helpers for the filesystem resource.""" -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Type, Union from fsspec import AbstractFileSystem # type: ignore from dlt.common.configuration import resolve_type @@ -19,7 +19,7 @@ @configspec class FilesystemConfigurationResource(FilesystemConfiguration): - credentials: Union[FileSystemCredentials, AbstractFileSystem] + credentials: Union[FileSystemCredentials, AbstractFileSystem] = None file_glob: Optional[str] = "*" files_per_page: int = DEFAULT_CHUNK_SIZE extract_content: bool = False diff --git a/sources/freshdesk/freshdesk_client.py b/sources/freshdesk/freshdesk_client.py index 61960a168..fdc6c68cb 100644 --- a/sources/freshdesk/freshdesk_client.py +++ b/sources/freshdesk/freshdesk_client.py @@ -48,7 +48,6 @@ def _request_with_rate_limit(self, url: str, **kwargs: Any) -> requests.Response return response except requests.HTTPError as e: if e.response.status_code == 429: - # Get the 'Retry-After' header to know how long to wait # Fallback to 60 seconds if header is missing seconds_to_wait = int(e.response.headers.get("Retry-After", 60)) From 34610b634f29348d6362a2f26fa946ed3b93cf37 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Mon, 15 Apr 2024 22:55:07 +0400 Subject: [PATCH 29/38] make test more specific to handle postgres version differences --- tests/pg_replication/test_pg_replication.py | 7 +++++-- tests/utils.py | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 13f13d07f..f789be107 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -10,6 +10,7 @@ ALL_DESTINATIONS, assert_load_info, load_table_counts, + get_table_metrics, ) from sources.pg_replication import replication_resource from sources.pg_replication.helpers import init_replication @@ -237,7 +238,7 @@ def items(data): # extract items from resource dest_pl = dlt.pipeline(pipeline_name="dest_pl", full_refresh=True) extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"][0]["items_count"] == 1 + assert get_table_metrics(extract_info, "items")["items_count"] == 1 # do an update and a delete—these operations should not lead to items in the resource with src_pl.sql_client() as c: @@ -245,7 +246,9 @@ def items(data): c.execute_sql(f"UPDATE {qual_name} SET foo = 'baz' WHERE id = 2;") c.execute_sql(f"DELETE FROM {qual_name} WHERE id = 2;") extract_info = dest_pl.extract(changes) - assert extract_info.asdict()["job_metrics"] == [] + assert ( + get_table_metrics(extract_info, "items") is None + ) # there should be no metrics for the "items" table @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) diff --git a/tests/utils.py b/tests/utils.py index f9216dce7..92478554f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,7 +1,7 @@ import os import platform import pytest -from typing import Any, Iterator, List, Sequence +from typing import Any, Iterator, List, Sequence, Dict, Optional from os import environ from unittest.mock import patch @@ -16,7 +16,7 @@ ConfigTomlProvider, SecretsTomlProvider, ) -from dlt.common.pipeline import LoadInfo, PipelineContext +from dlt.common.pipeline import LoadInfo, PipelineContext, ExtractInfo from dlt.common.storages import FileStorage from dlt.pipeline.exceptions import SqlClientNotAvailable @@ -231,3 +231,15 @@ def select_data( with p.sql_client(schema_name=schema_name) as c: with c.execute_query(sql) as cur: return list(cur.fetchall()) + + +def get_table_metrics( + extract_info: ExtractInfo, table_name: str +) -> Optional[Dict[str, Any]]: + """Returns table metrics from ExtractInfo object.""" + table_metrics_list = [ + d + for d in extract_info.asdict()["table_metrics"] + if d["table_name"] == table_name + ] + return None if len(table_metrics_list) == 0 else table_metrics_list[0] From 7a070453e93bed4c7863b5ee82f681dd12e909e5 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 16 Apr 2024 00:04:52 +0400 Subject: [PATCH 30/38] add postgres server version requirement for schema replication functionality --- sources/pg_replication/exceptions.py | 4 +++ sources/pg_replication/helpers.py | 18 ++++++++++++ sources/pg_replication_pipeline.py | 6 +++- tests/pg_replication/test_pg_replication.py | 31 +++++++++++++++------ 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/sources/pg_replication/exceptions.py b/sources/pg_replication/exceptions.py index 6edf03da1..2b2642777 100644 --- a/sources/pg_replication/exceptions.py +++ b/sources/pg_replication/exceptions.py @@ -1,2 +1,6 @@ class NoPrimaryKeyException(Exception): pass + + +class IncompatiblePostgresVersionException(Exception): + pass diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index acbc00455..0793aaa02 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -51,6 +51,7 @@ from sql_database import sql_table from .schema_types import _to_dlt_column_schema, _to_dlt_val +from .exceptions import IncompatiblePostgresVersionException @dlt.sources.config.with_config(sections=("sources", "pg_replication")) @@ -181,6 +182,17 @@ def init_replication( return None +@dlt.sources.config.with_config(sections=("sources", "pg_replication")) +def get_pg_version( + cur: cursor = None, + credentials: ConnectionStringCredentials = dlt.secrets.value, +) -> int: + """Returns Postgres server version as int.""" + if cur is not None: + return cur.connection.server_version + return _get_conn(credentials).server_version + + def create_publication( name: str, cur: cursor, @@ -250,6 +262,12 @@ def add_schema_to_publication( Raises error if the user is not a superuser. """ + if (version := get_pg_version(cur)) < 150000: + raise IncompatiblePostgresVersionException( + f"Cannot add schema to publication because the Postgres server version {version} is too low." + " Adding schemas to a publication is only supported for Postgres version 15 or higher." + " Upgrade your Postgres server version or set the `table_names` argument to explicitly specify table names." + ) esc_schema_name = escape_postgres_identifier(schema_name) esc_pub_name = escape_postgres_identifier(pub_name) try: diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py index f980ab135..6a23f195a 100644 --- a/sources/pg_replication_pipeline.py +++ b/sources/pg_replication_pipeline.py @@ -118,7 +118,11 @@ def replicate_with_initial_load() -> None: def replicate_entire_schema() -> None: - """Demonstrates setup and usage of schema replication.""" + """Demonstrates setup and usage of schema replication. + + Schema replication requires a Postgres server version of 15 or higher. An + exception is raised if that's not the case. + """ # create source and destination pipelines src_pl = dlt.pipeline( pipeline_name="source_pipeline", diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index f789be107..7fe269e25 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -13,7 +13,8 @@ get_table_metrics, ) from sources.pg_replication import replication_resource -from sources.pg_replication.helpers import init_replication +from sources.pg_replication.helpers import init_replication, get_pg_version +from sources.pg_replication.exceptions import IncompatiblePostgresVersionException from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA from .utils import add_pk, assert_loaded_data @@ -739,17 +740,29 @@ def tbl_z(data): # "tbl_y" is still in the publication assert get_table_names_in_pub() == {"tbl_x", "tbl_y"} - # switching to whole schema replication is supported by omitting `table_names` - init_replication( - slot_name=slot_name, - pub_name=pub_name, - schema_name=src_pl.dataset_name, - ) - # includes dlt system tables - assert get_table_names_in_pub() >= {"tbl_x", "tbl_y", "tbl_z"} + # switching to whole schema replication is supported by omitting `table_names`, + # but only for Postgres server versions 15 or higher + if get_pg_version() >= 150000: + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + ) + # includes dlt system tables + assert get_table_names_in_pub() >= {"tbl_x", "tbl_y", "tbl_z"} + else: + with pytest.raises(IncompatiblePostgresVersionException): + init_replication( + slot_name=slot_name, + pub_name=pub_name, + schema_name=src_pl.dataset_name, + ) def test_replicate_schema(src_config: Tuple[dlt.Pipeline, str, str]) -> None: + if get_pg_version() < 150000: + pytest.skip("incompatible Postgres server version") + @dlt.resource def tbl_x(data): yield data From 61712b4e2b2288d25b0790bda94e197fb685fd5d Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 16 Apr 2024 00:16:26 +0400 Subject: [PATCH 31/38] removed whitespace --- sources/pg_replication_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/pg_replication_pipeline.py b/sources/pg_replication_pipeline.py index 6a23f195a..811337ddc 100644 --- a/sources/pg_replication_pipeline.py +++ b/sources/pg_replication_pipeline.py @@ -119,7 +119,7 @@ def replicate_with_initial_load() -> None: def replicate_entire_schema() -> None: """Demonstrates setup and usage of schema replication. - + Schema replication requires a Postgres server version of 15 or higher. An exception is raised if that's not the case. """ From fd1d9736dabc3efdfdd15ee406d9e869fbc672ac Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 23 Apr 2024 03:32:30 +0400 Subject: [PATCH 32/38] explicitly fetch credentials from pg_replication source --- tests/pg_replication/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/pg_replication/conftest.py b/tests/pg_replication/conftest.py index 8ff9a2dcd..ed74cc110 100644 --- a/tests/pg_replication/conftest.py +++ b/tests/pg_replication/conftest.py @@ -13,7 +13,10 @@ def src_config() -> Iterator[Tuple[dlt.Pipeline, str, str]]: pub = "test_pub" + uniq_id(4) # setup src_pl = dlt.pipeline( - pipeline_name="src_pl", destination="postgres", full_refresh=True + pipeline_name="src_pl", + destination="postgres", + full_refresh=True, + credentials=dlt.secrets.get("sources.pg_replication.credentials"), ) yield src_pl, slot, pub # teardown From 8bc4da36d545ff690beea6c8e45e297757e0490d Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Tue, 23 Apr 2024 03:32:56 +0400 Subject: [PATCH 33/38] add superuser check --- tests/pg_replication/test_pg_replication.py | 15 +++++++++++---- tests/pg_replication/utils.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 7fe269e25..2c77952a8 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -2,6 +2,7 @@ from typing import Set, Tuple from copy import deepcopy +from psycopg2.errors import InsufficientPrivilege import dlt from dlt.destinations.job_client_impl import SqlJobClientBase @@ -17,7 +18,7 @@ from sources.pg_replication.exceptions import IncompatiblePostgresVersionException from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA -from .utils import add_pk, assert_loaded_data +from .utils import add_pk, assert_loaded_data, is_super_user @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @@ -741,8 +742,9 @@ def tbl_z(data): assert get_table_names_in_pub() == {"tbl_x", "tbl_y"} # switching to whole schema replication is supported by omitting `table_names`, - # but only for Postgres server versions 15 or higher - if get_pg_version() >= 150000: + # but only for Postgres server versions 15 or higher and with superuser privileges + is_su = is_super_user(src_pl.sql_client) + if get_pg_version() >= 150000 and is_su: init_replication( slot_name=slot_name, pub_name=pub_name, @@ -751,7 +753,10 @@ def tbl_z(data): # includes dlt system tables assert get_table_names_in_pub() >= {"tbl_x", "tbl_y", "tbl_z"} else: - with pytest.raises(IncompatiblePostgresVersionException): + exp_err = ( + InsufficientPrivilege if not is_su else IncompatiblePostgresVersionException + ) + with pytest.raises(exp_err): init_replication( slot_name=slot_name, pub_name=pub_name, @@ -762,6 +767,8 @@ def tbl_z(data): def test_replicate_schema(src_config: Tuple[dlt.Pipeline, str, str]) -> None: if get_pg_version() < 150000: pytest.skip("incompatible Postgres server version") + if not is_super_user(src_config[0].sql_client): + pytest.skip("Postgres user needs to be superuser") @dlt.resource def tbl_x(data): diff --git a/tests/pg_replication/utils.py b/tests/pg_replication/utils.py index 75542aa91..117099fb6 100644 --- a/tests/pg_replication/utils.py +++ b/tests/pg_replication/utils.py @@ -1,7 +1,9 @@ from typing import Sequence, List, Dict, Any, Optional +import dlt from dlt import Pipeline from dlt.common.data_writers.escape import escape_postgres_identifier +from dlt.common.configuration.specs import ConnectionStringCredentials from tests.utils import select_data @@ -36,3 +38,14 @@ def assert_loaded_data( for row in select_data(pipeline, qry) ] assert sorted(observation, key=lambda d: d[sort_column_name]) == expectation + + +def is_super_user(sql_client) -> bool: + """Returns True if Postgres user is superuser, False otherwise.""" + username = dlt.secrets.get( + "sources.pg_replication.credentials", ConnectionStringCredentials + ).username + with sql_client() as c: + return c.execute_sql( + f"SELECT rolsuper FROM pg_roles WHERE rolname = '{username}';" + )[0][0] From 77fb1dd6b9330b524671d13781024f6a6cd0e139 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 1 May 2024 23:07:54 +0200 Subject: [PATCH 34/38] updates lock file --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index b42a3b320..f28fa617f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6400,4 +6400,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "7f07a18a1d2b6ab04b964d6236676d60c0c0551c95f361bb9065c9b268207768" +content-hash = "900fd0a08a58b0b641e8ebaf683227f140ea98849e4b098b65723bbfd57253b8" From 8a1d910be390bd3ad1296b68ed4cc9f75d33d791 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Thu, 2 May 2024 15:33:35 +0400 Subject: [PATCH 35/38] use psycopg2-binary instead of psycopg2 --- poetry.lock | 2 +- pyproject.toml | 2 +- sources/pg_replication/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index f28fa617f..d5563ccaf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6400,4 +6400,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "900fd0a08a58b0b641e8ebaf683227f140ea98849e4b098b65723bbfd57253b8" +content-hash = "91051d5637073992c5470a7ecd91bc1ae32a874be72433d6c57460adebf3ad5a" diff --git a/pyproject.toml b/pyproject.toml index 43645fbda..58ffaba87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ pymysql = "^1.0.3" connectorx = ">=0.3.1" [tool.poetry.group.pg_replication.dependencies] -psycopg2 = ">=2.9.9" +psycopg2-binary = ">=2.9.9" pypgoutput = "0.0.3" [tool.poetry.group.google_sheets.dependencies] diff --git a/sources/pg_replication/requirements.txt b/sources/pg_replication/requirements.txt index 95ee4eb8a..bdcb04e4d 100644 --- a/sources/pg_replication/requirements.txt +++ b/sources/pg_replication/requirements.txt @@ -1,3 +1,3 @@ dlt>=0.4.8 -psycopg2>=2.9.9 +psycopg2-binary>=2.9.9 pypgoutput==0.0.3 \ No newline at end of file From b0d2abbfbd55100959f11e11af11768e05b501c1 Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Thu, 2 May 2024 15:56:21 +0400 Subject: [PATCH 36/38] use destination-specific escape identifier --- tests/pg_replication/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/pg_replication/utils.py b/tests/pg_replication/utils.py index 117099fb6..fe7695b91 100644 --- a/tests/pg_replication/utils.py +++ b/tests/pg_replication/utils.py @@ -29,7 +29,8 @@ def assert_loaded_data( ) -> None: """Asserts loaded data meets expectation.""" qual_name = pipeline.sql_client().make_qualified_table_name(table_name) - column_str = ", ".join(map(escape_postgres_identifier, column_names)) + escape_id = pipeline.destination_client().capabilities.escape_identifier + column_str = ", ".join(map(escape_id, column_names)) qry = f"SELECT {column_str} FROM {qual_name}" if where_clause is not None: qry += " WHERE " + where_clause From f63ceff906cff584f28472075e19f92887f5fe9f Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Thu, 2 May 2024 16:40:03 +0400 Subject: [PATCH 37/38] replace string literal with int literal --- tests/pg_replication/test_pg_replication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pg_replication/test_pg_replication.py b/tests/pg_replication/test_pg_replication.py index 2c77952a8..6d63fa3bc 100644 --- a/tests/pg_replication/test_pg_replication.py +++ b/tests/pg_replication/test_pg_replication.py @@ -674,7 +674,7 @@ def test_table_schema_change( {"c1": 5, "c2": 1, "c3": 1, "c4": 1}, ] assert_loaded_data( - dest_pl, "items", ["c1", "c2", "c3", "c4"], exp, "c1", "c1 IN ('4', '5')" + dest_pl, "items", ["c1", "c2", "c3", "c4"], exp, "c1", "c1 IN (4, 5)" ) From 22758fe05081a8dcc77a7001bfcc41a4390306cb Mon Sep 17 00:00:00 2001 From: Jorrit Sandbrink Date: Thu, 2 May 2024 17:27:21 +0400 Subject: [PATCH 38/38] include pypgoutput decoders in library --- poetry.lock | 39 +-- pyproject.toml | 1 - sources/pg_replication/decoders.py | 427 ++++++++++++++++++++++++ sources/pg_replication/helpers.py | 20 +- sources/pg_replication/requirements.txt | 3 +- sources/pg_replication/schema_types.py | 4 +- 6 files changed, 440 insertions(+), 54 deletions(-) create mode 100644 sources/pg_replication/decoders.py diff --git a/poetry.lock b/poetry.lock index d5563ccaf..b720978fb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3762,28 +3762,6 @@ files = [ {file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"}, ] -[[package]] -name = "psycopg2" -version = "2.9.9" -description = "psycopg2 - Python-PostgreSQL Database Adapter" -optional = false -python-versions = ">=3.7" -files = [ - {file = "psycopg2-2.9.9-cp310-cp310-win32.whl", hash = "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516"}, - {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, - {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, - {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, - {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, - {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, - {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, - {file = "psycopg2-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e"}, - {file = "psycopg2-2.9.9-cp39-cp39-win32.whl", hash = "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59"}, - {file = "psycopg2-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913"}, - {file = "psycopg2-2.9.9.tar.gz", hash = "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156"}, -] - [[package]] name = "psycopg2-binary" version = "2.9.9" @@ -4348,21 +4326,6 @@ docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] full = ["Pillow", "PyCryptodome"] image = ["Pillow"] -[[package]] -name = "pypgoutput" -version = "0.0.3" -description = "PostgreSQL CDC library using pgoutput and python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pypgoutput-0.0.3-py3-none-any.whl", hash = "sha256:8790f83cc15f4e52e9df9fea6a42cfb86d9839ac5c93b16587bd0950873904e0"}, - {file = "pypgoutput-0.0.3.tar.gz", hash = "sha256:0866a11ee4938a234bdac58624646760109348614b436612978b3496aae4fddb"}, -] - -[package.dependencies] -psycopg2 = "*" -pydantic = "*" - [[package]] name = "pypydispatcher" version = "2.1.2" @@ -6400,4 +6363,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "91051d5637073992c5470a7ecd91bc1ae32a874be72433d6c57460adebf3ad5a" +content-hash = "ae9798483262d3ecccae313723ac6b67fc416a71be8a3a7a467b78624950e0cc" diff --git a/pyproject.toml b/pyproject.toml index 58ffaba87..f6dbf3199 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,6 @@ connectorx = ">=0.3.1" [tool.poetry.group.pg_replication.dependencies] psycopg2-binary = ">=2.9.9" -pypgoutput = "0.0.3" [tool.poetry.group.google_sheets.dependencies] google-api-python-client = "^2.78.0" diff --git a/sources/pg_replication/decoders.py b/sources/pg_replication/decoders.py new file mode 100644 index 000000000..c2707b46a --- /dev/null +++ b/sources/pg_replication/decoders.py @@ -0,0 +1,427 @@ +# flake8: noqa +# file copied from https://raw.githubusercontent.com/dgea005/pypgoutput/master/src/pypgoutput/decoders.py +# we do this instead of importing `pypgoutput` because it depends on `psycopg2`, which causes errors when installing on macOS + +import io +from abc import ABC, abstractmethod +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import List, Optional, Union + +# integer byte lengths +INT8 = 1 +INT16 = 2 +INT32 = 4 +INT64 = 8 + + +def convert_pg_ts(_ts_in_microseconds: int) -> datetime: + ts = datetime(2000, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc) + return ts + timedelta(microseconds=_ts_in_microseconds) + + +def convert_bytes_to_int(_in_bytes: bytes) -> int: + return int.from_bytes(_in_bytes, byteorder="big", signed=True) + + +def convert_bytes_to_utf8(_in_bytes: Union[bytes, bytearray]) -> str: + return (_in_bytes).decode("utf-8") + + +@dataclass(frozen=True) +class ColumnData: + # col_data_category is NOT the type. it means null value/toasted(not sent)/text formatted + col_data_category: Optional[str] + col_data_length: Optional[int] = None + col_data: Optional[str] = None + + def __repr__(self) -> str: + return f"[col_data_category='{self.col_data_category}', col_data_length={self.col_data_length}, col_data='{self.col_data}']" + + +@dataclass(frozen=True) +class ColumnType: + """https://www.postgresql.org/docs/12/catalog-pg-attribute.html""" + + part_of_pkey: int + name: str + type_id: int + atttypmod: int + + +@dataclass(frozen=True) +class TupleData: + n_columns: int + column_data: List[ColumnData] + + def __repr__(self) -> str: + return f"n_columns: {self.n_columns}, data: {self.column_data}" + + +class PgoutputMessage(ABC): + def __init__(self, buffer: bytes): + self.buffer: io.BytesIO = io.BytesIO(buffer) + self.byte1: str = self.read_utf8(1) + self.decode_buffer() + + @abstractmethod + def decode_buffer(self) -> None: + """Decoding is implemented for each message type""" + + @abstractmethod + def __repr__(self) -> str: + """Implemented for each message type""" + + def read_int8(self) -> int: + return convert_bytes_to_int(self.buffer.read(INT8)) + + def read_int16(self) -> int: + return convert_bytes_to_int(self.buffer.read(INT16)) + + def read_int32(self) -> int: + return convert_bytes_to_int(self.buffer.read(INT32)) + + def read_int64(self) -> int: + return convert_bytes_to_int(self.buffer.read(INT64)) + + def read_utf8(self, n: int = 1) -> str: + return convert_bytes_to_utf8(self.buffer.read(n)) + + def read_timestamp(self) -> datetime: + # 8 chars -> int64 -> timestamp + return convert_pg_ts(_ts_in_microseconds=self.read_int64()) + + def read_string(self) -> str: + output = bytearray() + while (next_char := self.buffer.read(1)) != b"\x00": + output += next_char + return convert_bytes_to_utf8(output) + + def read_tuple_data(self) -> TupleData: + """ + TupleData + Int16 Number of columns. + Next, one of the following submessages appears for each column (except generated columns): + Byte1('n') Identifies the data as NULL value. + Or + Byte1('u') Identifies unchanged TOASTed value (the actual value is not sent). + Or + Byte1('t') Identifies the data as text formatted value. + Int32 Length of the column value. + Byten The value of the column, in text format. (A future release might support additional formats.) n is the above length. + """ + # TODO: investigate what happens with the generated columns + column_data = list() + n_columns = self.read_int16() + for column in range(n_columns): + col_data_category = self.read_utf8() + if col_data_category in ("n", "u"): + # "n"=NULL, "t"=TOASTed + column_data.append(ColumnData(col_data_category=col_data_category)) + elif col_data_category == "t": + # t = tuple + col_data_length = self.read_int32() + col_data = self.read_utf8(col_data_length) + column_data.append( + ColumnData( + col_data_category=col_data_category, + col_data_length=col_data_length, + col_data=col_data, + ) + ) + return TupleData(n_columns=n_columns, column_data=column_data) + + +class Begin(PgoutputMessage): + """ + https://pgpedia.info/x/xlogrecptr.html + https://www.postgresql.org/docs/14/datatype-pg-lsn.html + + byte1 Byte1('B') Identifies the message as a begin message. + lsn Int64 The final LSN of the transaction. + commit_tx_ts Int64 Commit timestamp of the transaction. The value is in number of microseconds since PostgreSQL epoch (2000-01-01). + tx_xid Int32 Xid of the transaction. + """ + + byte1: str + lsn: int + commit_ts: datetime + tx_xid: int + + def decode_buffer(self) -> None: + if self.byte1 != "B": + raise ValueError("first byte in buffer does not match Begin message") + self.lsn = self.read_int64() + self.commit_ts = self.read_timestamp() + self.tx_xid = self.read_int64() + + def __repr__(self) -> str: + return ( + f"BEGIN \n\tbyte1: '{self.byte1}', \n\tLSN: {self.lsn}, " + f"\n\tcommit_ts {self.commit_ts}, \n\ttx_xid: {self.tx_xid}" + ) + + +class Commit(PgoutputMessage): + """ + byte1: Byte1('C') Identifies the message as a commit message. + flags: Int8 Flags; currently unused (must be 0). + lsn_commit: Int64 The LSN of the commit. + lsn: Int64 The end LSN of the transaction. + Int64 Commit timestamp of the transaction. The value is in number of microseconds since PostgreSQL epoch (2000-01-01). + """ + + byte1: str + flags: int + lsn_commit: int + lsn: int + commit_ts: datetime + + def decode_buffer(self) -> None: + if self.byte1 != "C": + raise ValueError("first byte in buffer does not match Commit message") + self.flags = self.read_int8() + self.lsn_commit = self.read_int64() + self.lsn = self.read_int64() + self.commit_ts = self.read_timestamp() + + def __repr__(self) -> str: + return ( + f"COMMIT \n\tbyte1: {self.byte1}, \n\tflags {self.flags}, \n\tlsn_commit: {self.lsn_commit}" + f"\n\tLSN: {self.lsn}, \n\tcommit_ts {self.commit_ts}" + ) + + +class Origin: + """ + Byte1('O') Identifies the message as an origin message. + Int64 The LSN of the commit on the origin server. + String Name of the origin. + Note that there can be multiple Origin messages inside a single transaction. + This seems to be what origin means: https://www.postgresql.org/docs/12/replication-origins.html + """ + + pass + + +class Relation(PgoutputMessage): + """ + Byte1('R') Identifies the message as a relation message. + Int32 ID of the relation. + String Namespace (empty string for pg_catalog). + String Relation name. + Int8 Replica identity setting for the relation (same as relreplident in pg_class). + # select relreplident from pg_class where relname = 'test_table'; + # from reading the documentation and looking at the tables this is not int8 but a single character + # background: https://www.postgresql.org/docs/10/sql-altertable.html#SQL-CREATETABLE-REPLICA-IDENTITY + Int16 Number of columns. + Next, the following message part appears for each column (except generated columns): + Int8 Flags for the column. Currently can be either 0 for no flags or 1 which marks the column as part of the key. + String Name of the column. + Int32 ID of the column's data type. + Int32 Type modifier of the column (atttypmod). + """ + + byte1: str + relation_id: int + namespace: str + relation_name: str + replica_identity_setting: str + n_columns: int + columns: List[ColumnType] + + def decode_buffer(self) -> None: + if self.byte1 != "R": + raise ValueError("first byte in buffer does not match Relation message") + self.relation_id = self.read_int32() + self.namespace = self.read_string() + self.relation_name = self.read_string() + self.replica_identity_setting = self.read_utf8() + self.n_columns = self.read_int16() + self.columns = list() + + for column in range(self.n_columns): + part_of_pkey = self.read_int8() + col_name = self.read_string() + data_type_id = self.read_int32() + # TODO: check on use of signed / unsigned + # check with select oid from pg_type where typname = ; timestamp == 1184, int4 = 23 + col_modifier = self.read_int32() + self.columns.append( + ColumnType( + part_of_pkey=part_of_pkey, + name=col_name, + type_id=data_type_id, + atttypmod=col_modifier, + ) + ) + + def __repr__(self) -> str: + return ( + f"RELATION \n\tbyte1: '{self.byte1}', \n\trelation_id: {self.relation_id}" + f",\n\tnamespace/schema: '{self.namespace}',\n\trelation_name: '{self.relation_name}'" + f",\n\treplica_identity_setting: '{self.replica_identity_setting}',\n\tn_columns: {self.n_columns} " + f",\n\tcolumns: {self.columns}" + ) + + +class PgType: + """ + Renamed to PgType not to collide with "type" + + Byte1('Y') Identifies the message as a type message. + Int32 ID of the data type. + String Namespace (empty string for pg_catalog). + String Name of the data type. + """ + + pass + + +class Insert(PgoutputMessage): + """ + Byte1('I') Identifies the message as an insert message. + Int32 ID of the relation corresponding to the ID in the relation message. + Byte1('N') Identifies the following TupleData message as a new tuple. + TupleData TupleData message part representing the contents of new tuple. + """ + + byte1: str + relation_id: int + new_tuple_byte: str + new_tuple: TupleData + + def decode_buffer(self) -> None: + if self.byte1 != "I": + raise ValueError( + f"first byte in buffer does not match Insert message (expected 'I', got '{self.byte1}'" + ) + self.relation_id = self.read_int32() + self.new_tuple_byte = self.read_utf8() + self.new_tuple = self.read_tuple_data() + + def __repr__(self) -> str: + return ( + f"INSERT \n\tbyte1: '{self.byte1}', \n\trelation_id: {self.relation_id} " + f"\n\tnew tuple byte: '{self.new_tuple_byte}', \n\tnew_tuple: {self.new_tuple}" + ) + + +class Update(PgoutputMessage): + """ + Byte1('U') Identifies the message as an update message. + Int32 ID of the relation corresponding to the ID in the relation message. + Byte1('K') Identifies the following TupleData submessage as a key. This field is optional and is only present if the update changed data in any of the column(s) that are part of the REPLICA IDENTITY index. + Byte1('O') Identifies the following TupleData submessage as an old tuple. This field is optional and is only present if table in which the update happened has REPLICA IDENTITY set to FULL. + TupleData TupleData message part representing the contents of the old tuple or primary key. Only present if the previous 'O' or 'K' part is present. + Byte1('N') Identifies the following TupleData message as a new tuple. + TupleData TupleData message part representing the contents of a new tuple. + + The Update message may contain either a 'K' message part or an 'O' message part or neither of them, but never both of them. + """ + + byte1: str + relation_id: int + next_byte_identifier: Optional[str] + optional_tuple_identifier: Optional[str] + old_tuple: Optional[TupleData] + new_tuple_byte: str + new_tuple: TupleData + + def decode_buffer(self) -> None: + self.optional_tuple_identifier = None + self.old_tuple = None + if self.byte1 != "U": + raise ValueError( + f"first byte in buffer does not match Update message (expected 'U', got '{self.byte1}'" + ) + self.relation_id = self.read_int32() + # TODO test update to PK, test update with REPLICA IDENTITY = FULL + self.next_byte_identifier = self.read_utf8() # one of K, O or N + if self.next_byte_identifier == "K" or self.next_byte_identifier == "O": + self.optional_tuple_identifier = self.next_byte_identifier + self.old_tuple = self.read_tuple_data() + self.new_tuple_byte = self.read_utf8() + else: + self.new_tuple_byte = self.next_byte_identifier + if self.new_tuple_byte != "N": + # TODO: test exception handling + raise ValueError( + f"did not find new_tuple_byte ('N') at position: {self.buffer.tell()}, found: '{self.new_tuple_byte}'" + ) + self.new_tuple = self.read_tuple_data() + + def __repr__(self) -> str: + return ( + f"UPDATE \n\tbyte1: '{self.byte1}', \n\trelation_id: {self.relation_id}" + f"\n\toptional_tuple_identifier: '{self.optional_tuple_identifier}', \n\toptional_old_tuple_data: {self.old_tuple}" + f"\n\tnew_tuple_byte: '{self.new_tuple_byte}', \n\tnew_tuple: {self.new_tuple}" + ) + + +class Delete(PgoutputMessage): + """ + Byte1('D') Identifies the message as a delete message. + Int32 ID of the relation corresponding to the ID in the relation message. + Byte1('K') Identifies the following TupleData submessage as a key. This field is present if the table in which the delete has happened uses an index as REPLICA IDENTITY. + Byte1('O') Identifies the following TupleData message as a old tuple. This field is present if the table in which the delete has happened has REPLICA IDENTITY set to FULL. + TupleData TupleData message part representing the contents of the old tuple or primary key, depending on the previous field. + + The Delete message may contain either a 'K' message part or an 'O' message part, but never both of them. + """ + + byte1: str + relation_id: int + message_type: str + old_tuple: TupleData + + def decode_buffer(self) -> None: + if self.byte1 != "D": + raise ValueError( + f"first byte in buffer does not match Delete message (expected 'D', got '{self.byte1}'" + ) + self.relation_id = self.read_int32() + self.message_type = self.read_utf8() + # TODO: test with replica identity full + if self.message_type not in ["K", "O"]: + raise ValueError( + f"message type byte is not 'K' or 'O', got: '{self.message_type}'" + ) + self.old_tuple = self.read_tuple_data() + + def __repr__(self) -> str: + return ( + f"DELETE \n\tbyte1: {self.byte1} \n\trelation_id: {self.relation_id} " + f"\n\tmessage_type: {self.message_type} \n\told_tuple: {self.old_tuple}" + ) + + +class Truncate(PgoutputMessage): + """ + Byte1('T') Identifies the message as a truncate message. + Int32 Number of relations + Int8 Option bits for TRUNCATE: 1 for CASCADE, 2 for RESTART IDENTITY + Int32 ID of the relation corresponding to the ID in the relation message. This field is repeated for each relation. + """ + + byte1: str + number_of_relations: int + option_bits: int + relation_ids: List[int] + + def decode_buffer(self) -> None: + if self.byte1 != "T": + raise ValueError( + f"first byte in buffer does not match Truncate message (expected 'T', got '{self.byte1}'" + ) + self.number_of_relations = self.read_int32() + self.option_bits = self.read_int8() + self.relation_ids = [] + for relation in range(self.number_of_relations): + self.relation_ids.append(self.read_int32()) + + def __repr__(self) -> str: + return ( + f"TRUNCATE \n\tbyte1: {self.byte1} \n\tn_relations: {self.number_of_relations} " + f"option_bits: {self.option_bits}, relation_ids: {self.relation_ids}" + ) diff --git a/sources/pg_replication/helpers.py b/sources/pg_replication/helpers.py index 0793aaa02..112c0b1c6 100644 --- a/sources/pg_replication/helpers.py +++ b/sources/pg_replication/helpers.py @@ -17,16 +17,6 @@ ReplicationMessage, StopReplication, ) -from pypgoutput.decoders import ( # type: ignore[import-untyped] - Begin, - Commit, - Relation, - Insert, - Update, - Delete, - Truncate, - ColumnData, -) import dlt @@ -52,6 +42,14 @@ from .schema_types import _to_dlt_column_schema, _to_dlt_val from .exceptions import IncompatiblePostgresVersionException +from .decoders import ( + Begin, + Relation, + Insert, + Update, + Delete, + ColumnData, +) @dlt.sources.config.with_config(sections=("sources", "pg_replication")) @@ -630,7 +628,7 @@ def process_msg(self, msg: ReplicationMessage) -> None: elif op == b"D": self.process_change(Delete(msg.payload), msg.data_start) elif op == b"B": - self.last_commit_ts = Begin(msg.payload).commit_ts + self.last_commit_ts = Begin(msg.payload).commit_ts # type: ignore[assignment] elif op == b"C": self.process_commit(msg) elif op == b"R": diff --git a/sources/pg_replication/requirements.txt b/sources/pg_replication/requirements.txt index bdcb04e4d..7a49c8ab2 100644 --- a/sources/pg_replication/requirements.txt +++ b/sources/pg_replication/requirements.txt @@ -1,3 +1,2 @@ dlt>=0.4.8 -psycopg2-binary>=2.9.9 -pypgoutput==0.0.3 \ No newline at end of file +psycopg2-binary>=2.9.9 \ No newline at end of file diff --git a/sources/pg_replication/schema_types.py b/sources/pg_replication/schema_types.py index c5f2faff3..a5758c32c 100644 --- a/sources/pg_replication/schema_types.py +++ b/sources/pg_replication/schema_types.py @@ -1,8 +1,6 @@ import json from typing import Optional, Any, Dict -from pypgoutput.decoders import ColumnType # type: ignore[import-untyped] - from dlt.common import Decimal from dlt.common.data_types.typing import TDataType from dlt.common.data_types.type_helpers import coerce_value @@ -10,6 +8,8 @@ from dlt.destinations.impl.postgres import capabilities from dlt.destinations.impl.postgres.postgres import PostgresTypeMapper +from .decoders import ColumnType + _DUMMY_VALS: Dict[TDataType, Any] = { "bigint": 0,