diff --git a/docker/Dockerfile b/docker/Dockerfile index 156006cd3..cadabad1f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -42,6 +42,7 @@ ENV CONTAINER_PUDL_WORKSPACE=${CONTAINER_HOME}/pudl_work ENV PUDL_INPUT=${CONTAINER_PUDL_WORKSPACE}/input ENV PUDL_OUTPUT=${CONTAINER_PUDL_WORKSPACE}/output ENV DAGSTER_HOME=${CONTAINER_PUDL_WORKSPACE}/dagster_home +ENV USE_PUDL_MODELS=True RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} ${PUDL_REPO} diff --git a/environments/conda-linux-64.lock.yml b/environments/conda-linux-64.lock.yml index a8b1c7408..6ac586607 100644 --- a/environments/conda-linux-64.lock.yml +++ b/environments/conda-linux-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 878091495eccf0117775fe4202ca87ee48aa6c4eab88572727c5cbdfc8b568d3 +# input_hash: a40867fce368971e202d16de20a1494fbfc8f7b889e0602a82b3473598c76e11 channels: - conda-forge @@ -43,7 +43,7 @@ dependencies: - aws-c-sdkutils=0.2.2=h4e1184b_0 - aws-checksums=0.2.2=h4e1184b_4 - aws-crt-cpp=0.29.9=he0e7f3f_2 - - aws-sdk-cpp=1.11.458=h4d475cb_6 + - aws-sdk-cpp=1.11.489=h4d475cb_0 - azure-core-cpp=1.14.0=h5cfcd09_0 - azure-identity-cpp=1.10.0=h113e628_0 - azure-storage-blobs-cpp=12.13.0=h3cf044e_1 @@ -113,6 +113,7 @@ dependencies: - debugpy=1.8.12=py312h2ec8cdc_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 + - deltalake=0.24.0=py312h07cb367_0 - deprecated=1.2.15=pyhd8ed1ab_1 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -251,10 +252,10 @@ dependencies: - lerc=4.0.0=h27087fc_0 - libabseil=20240722.0=cxx17_hbbce691_4 - libarchive=3.7.7=h4585015_3 - - libarrow=18.1.0=h8f076bb_12_cpu - - libarrow-acero=18.1.0=hcb10f89_12_cpu - - libarrow-dataset=18.1.0=hcb10f89_12_cpu - - libarrow-substrait=18.1.0=h08228c5_12_cpu + - libarrow=18.1.0=h461ed7b_13_cpu + - libarrow-acero=18.1.0=hcb10f89_13_cpu + - libarrow-dataset=18.1.0=hcb10f89_13_cpu + - libarrow-substrait=18.1.0=h08228c5_13_cpu - libavif16=1.1.1=h1909e37_2 - libblas=3.9.0=26_linux64_openblas - libbrotlicommon=1.1.0=hb9d3cd8_2 @@ -292,7 +293,7 @@ dependencies: - libnsl=2.0.1=hd590300_0 - libntlm=1.8=hb9d3cd8_0 - libopenblas=0.3.28=pthreads_h94d23a6_1 - - libparquet=18.1.0=h081d1f1_12_cpu + - libparquet=18.1.0=h081d1f1_13_cpu - libpng=1.6.45=h943b412_0 - libpq=17.2=h3b95a9b_1 - libprotobuf=5.28.3=h6128344_1 @@ -334,9 +335,9 @@ dependencies: - mergedeep=1.3.4=pyhd8ed1ab_1 - minizip=4.0.7=h05a5f5f_3 - mistune=3.1.0=pyhd8ed1ab_0 - - mlflow=2.19.0=h7900ff3_0 - - mlflow-skinny=2.19.0=py312h7900ff3_0 - - mlflow-ui=2.19.0=py312h7900ff3_0 + - mlflow=2.20.0=h7900ff3_0 + - mlflow-skinny=2.20.0=py312h7900ff3_0 + - mlflow-ui=2.20.0=py312h7900ff3_0 - more-itertools=10.6.0=pyhd8ed1ab_0 - msgpack-python=1.1.0=py312h68727a3_0 - multidict=6.1.0=py312h178313f_2 @@ -409,6 +410,7 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312h7900ff3_0 - pyarrow-core=18.1.0=py312h01725c0_0_cpu + - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 diff --git a/environments/conda-lock.yml b/environments/conda-lock.yml index eb2d1b408..e47d50b22 100644 --- a/environments/conda-lock.yml +++ b/environments/conda-lock.yml @@ -15,9 +15,9 @@ version: 1 metadata: content_hash: - linux-64: 878091495eccf0117775fe4202ca87ee48aa6c4eab88572727c5cbdfc8b568d3 - osx-64: 6f5f2716d95b3f5dfce0a2a55c80a997b5224c0ca3ea6386de439a120ac76ab9 - osx-arm64: 893f66740883ac9f298ed6c1703550c97fc74fa837aa11d4062ca8e3a1a233d8 + linux-64: a40867fce368971e202d16de20a1494fbfc8f7b889e0602a82b3473598c76e11 + osx-64: 01b55c205e542a6ec3079e568f2f105dd6e70c6461a90cd04d98ed9157f334c2 + osx-arm64: f127291e8632894e5612179a9a91ca72882a883e124bfabff789c780fdaf09fa channels: - url: conda-forge used_env_vars: [] @@ -1658,7 +1658,7 @@ package: category: main optional: false - name: aws-sdk-cpp - version: 1.11.458 + version: 1.11.489 manager: conda platform: linux-64 dependencies: @@ -1672,14 +1672,14 @@ package: libstdcxx: ">=13" libzlib: ">=1.3.1,<2.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.458-h4d475cb_6.conda + url: https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.489-h4d475cb_0.conda hash: - md5: 6139e84bbb6fdb27ca49c2981613a5fa - sha256: 2309d96d537b5c3810c6e9fbf5b6bb7e06ce907a609d172063ab504b2cc67f30 + md5: b775e9f46dfa94b228a81d8e8c6d8b1d + sha256: 08d6b7d2ed17bfcc7deb903c7751278ee434abdb27e3be0dceb561f30f030c75 category: main optional: false - name: aws-sdk-cpp - version: 1.11.458 + version: 1.11.489 manager: conda platform: osx-64 dependencies: @@ -1692,14 +1692,14 @@ package: libcxx: ">=18" libzlib: ">=1.3.1,<2.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/aws-sdk-cpp-1.11.458-h904bc55_6.conda + url: https://conda.anaconda.org/conda-forge/osx-64/aws-sdk-cpp-1.11.489-h904bc55_0.conda hash: - md5: be7fbaf4afe5fec70c9f24143028cffa - sha256: c2753de5f4a4796aaa770b9b8f1a7a3ed540331ba9dfa0d5a27a0288071ba814 + md5: b860858f5b5d146af55a3ae58574e7f6 + sha256: 06476455d8cd32c2f701ee609b6368b54a5e7bd8f5fd0c8b9a9240f68848703c category: main optional: false - name: aws-sdk-cpp - version: 1.11.458 + version: 1.11.489 manager: conda platform: osx-arm64 dependencies: @@ -1712,10 +1712,10 @@ package: libcxx: ">=18" libzlib: ">=1.3.1,<2.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/aws-sdk-cpp-1.11.458-h0e5014b_6.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/aws-sdk-cpp-1.11.489-h0e5014b_0.conda hash: - md5: a3b7b9a81d8c6a8f963f619d9e362c39 - sha256: 8c21ce4692d1631b96adea099d7c2b905fc5d078db63bf754d85600dcf184651 + md5: 156cfb45a1bb8cffc81e59047bb34f51 + sha256: d82451530ddf363d8bb31a8a7391bb9699f745e940ace91d78c0e6170deef03c category: main optional: false - name: azure-core-cpp @@ -4823,6 +4823,54 @@ package: sha256: 9717a059677553562a8f38ff07f3b9f61727bd614f505658b0a5ecbcf8df89be category: main optional: false + - name: deltalake + version: 0.24.0 + manager: conda + platform: linux-64 + dependencies: + __glibc: ">=2.17,<3.0.a0" + libgcc: ">=13" + liblzma: ">=5.6.3,<6.0a0" + pyarrow: ">=16" + pyarrow-hotfix: "" + python: ">=3.12,<3.13.0a0" + python_abi: 3.12.* + url: https://conda.anaconda.org/conda-forge/linux-64/deltalake-0.24.0-py312h07cb367_0.conda + hash: + md5: 8c0b7066e75926407bbb0307bc63e2b5 + sha256: 7ab302d46564e707ecdaf09d06239cb46dcb1dedd55247885f68f551fd62ae05 + category: main + optional: false + - name: deltalake + version: 0.24.0 + manager: conda + platform: osx-64 + dependencies: + pyarrow: ">=16" + pyarrow-hotfix: "" + python: ">=3.12,<3.13.0a0" + python_abi: 3.12.* + url: https://conda.anaconda.org/conda-forge/osx-64/deltalake-0.24.0-py312h16aec2d_0.conda + hash: + md5: 2cbeb7bb52d6f593c5e91721d052406b + sha256: 185fc067ace39a524e599619ef51ccbc1d3bb737ae4ed574059f2e911dc5524a + category: main + optional: false + - name: deltalake + version: 0.24.0 + manager: conda + platform: osx-arm64 + dependencies: + pyarrow: ">=16" + pyarrow-hotfix: "" + python: ">=3.12,<3.13.0a0" + python_abi: 3.12.* + url: https://conda.anaconda.org/conda-forge/osx-arm64/deltalake-0.24.0-py312h30a1e44_0.conda + hash: + md5: 449b7fd8a970fc53ff23249963f3949c + sha256: 38135afb692f17aaf14e026144bfaaa2ae0eee019678b66724439540d3452b8e + category: main + optional: false - name: deprecated version: 1.2.15 manager: conda @@ -8031,8 +8079,8 @@ package: platform: osx-64 dependencies: python: ">=3.9" - hyperframe: ">=6.0,<7" hpack: ">=4.0,<5" + hyperframe: ">=6.0,<7" url: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_1.conda hash: md5: 825927dc7b0f287ef8d4d0011bb113b1 @@ -8045,8 +8093,8 @@ package: platform: osx-arm64 dependencies: python: ">=3.9" - hyperframe: ">=6.0,<7" hpack: ">=4.0,<5" + hyperframe: ">=6.0,<7" url: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_1.conda hash: md5: 825927dc7b0f287ef8d4d0011bb113b1 @@ -10926,7 +10974,7 @@ package: dependencies: __glibc: ">=2.17,<3.0.a0" aws-crt-cpp: ">=0.29.9,<0.29.10.0a0" - aws-sdk-cpp: ">=1.11.458,<1.11.459.0a0" + aws-sdk-cpp: ">=1.11.489,<1.11.490.0a0" azure-core-cpp: ">=1.14.0,<1.14.1.0a0" azure-identity-cpp: ">=1.10.0,<1.10.1.0a0" azure-storage-blobs-cpp: ">=12.13.0,<12.13.1.0a0" @@ -10949,10 +10997,10 @@ package: re2: "" snappy: ">=1.2.1,<1.3.0a0" zstd: ">=1.5.6,<1.6.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-18.1.0-h8f076bb_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-18.1.0-h461ed7b_13_cpu.conda hash: - md5: 0f2278356c717d5b34e2ceb3d0c3ba04 - sha256: 7292768892add7d8c0c9323ffc2b8829935b110530f27fb329d8c84ea811518c + md5: 6917c81dcf0d18135de47c18093668ef + sha256: 340137cf62ea7193148a55e7919c67cc8bca6231bcecc983fcf9faef8b555910 category: main optional: false - name: libarrow @@ -10962,7 +11010,7 @@ package: dependencies: __osx: ">=10.13" aws-crt-cpp: ">=0.29.9,<0.29.10.0a0" - aws-sdk-cpp: ">=1.11.458,<1.11.459.0a0" + aws-sdk-cpp: ">=1.11.489,<1.11.490.0a0" azure-core-cpp: ">=1.14.0,<1.14.1.0a0" azure-identity-cpp: ">=1.10.0,<1.10.1.0a0" azure-storage-blobs-cpp: ">=12.13.0,<12.13.1.0a0" @@ -10983,10 +11031,10 @@ package: re2: "" snappy: ">=1.2.1,<1.3.0a0" zstd: ">=1.5.6,<1.6.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-18.1.0-h36d682d_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-18.1.0-h553eae1_13_cpu.conda hash: - md5: 6cf3d16aa654b55dc9e728f2b274e155 - sha256: 070011eb1215cf2415400b4c7dc44f4b0c88c5e1dac9d2379867970fe34ea12c + md5: 0f31775de2db22bbe0a2eb98e523c257 + sha256: 606ae01da8ed31db993c9fa08d50a885f5a06ce09498c6a8be495d246c95ce07 category: main optional: false - name: libarrow @@ -10996,7 +11044,7 @@ package: dependencies: __osx: ">=11.0" aws-crt-cpp: ">=0.29.9,<0.29.10.0a0" - aws-sdk-cpp: ">=1.11.458,<1.11.459.0a0" + aws-sdk-cpp: ">=1.11.489,<1.11.490.0a0" azure-core-cpp: ">=1.14.0,<1.14.1.0a0" azure-identity-cpp: ">=1.10.0,<1.10.1.0a0" azure-storage-blobs-cpp: ">=12.13.0,<12.13.1.0a0" @@ -11017,10 +11065,10 @@ package: re2: "" snappy: ">=1.2.1,<1.3.0a0" zstd: ">=1.5.6,<1.6.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-18.1.0-hd1aa4b5_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-18.1.0-h5b094fc_13_cpu.conda hash: - md5: 2289a15afb26de5f5ea2a10eb3526845 - sha256: 80c4fe4384eb7643f1ef2bb41dd47e58aba6297e9ca2b29ecc17cbe86b907c39 + md5: 884110fe0a7b64fad6ef531a1d4f03ad + sha256: db525827251ddbb811a5a8fd5a0b2a32dc07f2dbb06f4d76f6e2fbfe84c0638e category: main optional: false - name: libarrow-acero @@ -11032,10 +11080,10 @@ package: libarrow: 18.1.0 libgcc: ">=13" libstdcxx: ">=13" - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-18.1.0-hcb10f89_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-18.1.0-hcb10f89_13_cpu.conda hash: - md5: 411f7123e24833c0940ab9a30275ef84 - sha256: 4c684a046a786fcb19f3b16f2c8874d736fc4e79853c332084aa1402ea4623c4 + md5: a365aca4deec9ab3368c08894a1dd96a + sha256: 253300ae754ecf8163a9d40a45f5cc531337830bbbaf9c48142adb78036916a1 category: main optional: false - name: libarrow-acero @@ -11046,10 +11094,10 @@ package: __osx: ">=10.13" libarrow: 18.1.0 libcxx: ">=18" - url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-acero-18.1.0-ha6338a2_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-acero-18.1.0-ha6338a2_13_cpu.conda hash: - md5: 24933fdb5fe94c37dd4e85fb13643414 - sha256: 4a575114629bcfeb0fe7d4fb32fda3c7a4a25d323246f6bafaa18ab2880372c4 + md5: 9b404a3220fcb906807a24de7acc6161 + sha256: 2c0242ecd0a24d6b5ad15b9a2d16d08c375359f864b74bf62ef26633c964728d category: main optional: false - name: libarrow-acero @@ -11060,10 +11108,10 @@ package: __osx: ">=11.0" libarrow: 18.1.0 libcxx: ">=18" - url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-acero-18.1.0-hf07054f_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-acero-18.1.0-hf07054f_13_cpu.conda hash: - md5: cfa2723548570a329064bd42bc538b02 - sha256: 6ad2f4bdb3bf9cb2d4f6878d5f20def10a1ae50e70b39dba98f339f0687ffc03 + md5: 8f5c477327d5ecf8a16a73528b733abe + sha256: 0f427898854f3b1c03d2c8ca697ea822e4489c28c2ee47b7d37f0b0d80cb5e70 category: main optional: false - name: libarrow-dataset @@ -11077,10 +11125,10 @@ package: libgcc: ">=13" libparquet: 18.1.0 libstdcxx: ">=13" - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-18.1.0-hcb10f89_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-18.1.0-hcb10f89_13_cpu.conda hash: - md5: cb8739ea145691d46a78cbb782ef6c68 - sha256: 73a078f6140a3739d5c6edbc2d2303cbb2c149d576ecaf0a360c7ccd98b67ac6 + md5: 826d4f1d7e7e4c3cdc8f104e382c45f3 + sha256: d6cde6fbbbc11fdd6fe33cd94171e937bf111b3c42ac75f8a9d06b4b404d6761 category: main optional: false - name: libarrow-dataset @@ -11093,10 +11141,10 @@ package: libarrow-acero: 18.1.0 libcxx: ">=18" libparquet: 18.1.0 - url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-dataset-18.1.0-ha6338a2_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-dataset-18.1.0-ha6338a2_13_cpu.conda hash: - md5: 590d9ee9b61520d8b351fb90bb0dcdb1 - sha256: d6d66aaf042d4245195b79174617cf01c44abd07c654adb626cbabcb6b972b8b + md5: 6c521b7c0db6b125f7a8865e7d5b67ac + sha256: 669dd9d065235800473211dece0b30b2846ff6757331bc1f91cea97428d96181 category: main optional: false - name: libarrow-dataset @@ -11109,10 +11157,10 @@ package: libarrow-acero: 18.1.0 libcxx: ">=18" libparquet: 18.1.0 - url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-dataset-18.1.0-hf07054f_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-dataset-18.1.0-hf07054f_13_cpu.conda hash: - md5: f355430c2109e59abc10e9fe0fb45a8f - sha256: 3c2d077209ebb6feede3e4712d2bbb23ace9f2ed86c441fef2bd97db177c7fed + md5: eca5bc6672bcbdc1774df6a9f85e2fe8 + sha256: 71c29c18d78872d4e5352d5912e84adbcb58febe49f789a2039fa528daf6f2ca category: main optional: false - name: libarrow-substrait @@ -11128,10 +11176,10 @@ package: libgcc: ">=13" libprotobuf: ">=5.28.3,<5.28.4.0a0" libstdcxx: ">=13" - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-18.1.0-h08228c5_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-18.1.0-h08228c5_13_cpu.conda hash: - md5: 05c44b6957d9eacee011cfa6d301c76a - sha256: 4ec741b4a43235b2ef7b420087e56bfc07fd5e0645b56553dec2afe00ce1c53a + md5: 3cf949f157f29638e806bcf36e2fa42d + sha256: 75ec009488201e75875a63bb85dc42e6367bbded893e5c8f3be64fcd912b81a4 category: main optional: false - name: libarrow-substrait @@ -11146,10 +11194,10 @@ package: libarrow-dataset: 18.1.0 libcxx: ">=18" libprotobuf: ">=5.28.3,<5.28.4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-substrait-18.1.0-h5c2345d_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-64/libarrow-substrait-18.1.0-h5c2345d_13_cpu.conda hash: - md5: 8a3a129f9b7b182ccb98e368a9d7b00a - sha256: 0ea6021dd492294506fa572a5f29013ae28953bee39ccb5f407695fc547e3f06 + md5: ed1a22449ab3af5ae21a154c20fa0659 + sha256: d28b5fe61c383a3dbe1cb29cedbbfb667d7daded7be65d724974b7e7898714a0 category: main optional: false - name: libarrow-substrait @@ -11164,10 +11212,10 @@ package: libarrow-dataset: 18.1.0 libcxx: ">=18" libprotobuf: ">=5.28.3,<5.28.4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-substrait-18.1.0-h4239455_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-substrait-18.1.0-h4239455_13_cpu.conda hash: - md5: 05eff0e69d56b9828cca8de6ef696d1e - sha256: 551db05126b9f4f34bbf82f16238ad5ca36fe66d45cae68c169d3f2c74e156e8 + md5: 6dee639885a89f3e58b9cd8c1f9e930d + sha256: 01520dc1676f16e47f14cfbe4c293414b727201a6d1ba8bfcb287364c718f354 category: main optional: false - name: libavif16 @@ -12776,10 +12824,10 @@ package: libstdcxx: ">=13" libthrift: ">=0.21.0,<0.21.1.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/linux-64/libparquet-18.1.0-h081d1f1_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/linux-64/libparquet-18.1.0-h081d1f1_13_cpu.conda hash: - md5: e3f7b26202278a1b5356a108884baa30 - sha256: 354d0604387de652c5773b254983e82b5fd722493dcb80bcb401b0040a12ddad + md5: 6dd4698f4bfdd7bf09945a9c6acd9020 + sha256: 04798112e195cae6bcba4b8e412b4ea62863322c5d97f319ab6dc954921efabf category: main optional: false - name: libparquet @@ -12792,10 +12840,10 @@ package: libcxx: ">=18" libthrift: ">=0.21.0,<0.21.1.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-64/libparquet-18.1.0-h3e22b07_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-64/libparquet-18.1.0-h3e22b07_13_cpu.conda hash: - md5: c1f8ab1d7896682ae575405655397195 - sha256: cca09418f93788ebe5c321266bb6dc3423de677fa42118501e25d97b3b20552b + md5: a6c86bdb1f0a3ef40f7a61a29243598d + sha256: 052ce92ea3996f33c426bf06d6c64e84359cc2274e87e94ba390f0fde2925cc0 category: main optional: false - name: libparquet @@ -12808,10 +12856,10 @@ package: libcxx: ">=18" libthrift: ">=0.21.0,<0.21.1.0a0" openssl: ">=3.4.0,<4.0a0" - url: https://conda.anaconda.org/conda-forge/osx-arm64/libparquet-18.1.0-h636d7b7_12_cpu.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/libparquet-18.1.0-h636d7b7_13_cpu.conda hash: - md5: 164db85ff7ed80631e0f1497ff51a7e6 - sha256: e5b412919a9055492c5cc5508de7a12bc6d23b5cce77d3e60e0d5e2e770e5870 + md5: 1569590114304871f15e067251bc3c94 + sha256: 392445ffa49144259154b5e01b8468b976b61db65e7278182b7da8dbb7842a49 category: main optional: false - name: libpng @@ -14534,7 +14582,7 @@ package: category: main optional: false - name: mlflow - version: 2.19.0 + version: 2.20.0 manager: conda platform: linux-64 dependencies: @@ -14546,7 +14594,7 @@ package: jinja2: <4,>=2.11 markdown: <4,>=3.3 matplotlib-base: <4 - mlflow-ui: 2.19.0 + mlflow-ui: 2.20.0 numpy: <3 pandas: <3 prometheus_flask_exporter: <1 @@ -14556,14 +14604,14 @@ package: scikit-learn: <2 scipy: <2 sqlalchemy: ">=1.4.0,<3" - url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-2.19.0-h7900ff3_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-2.20.0-h7900ff3_0.conda hash: - md5: d244bb63078d55f4f2392efdd30de03e - sha256: 86fb9183306f64feb4d8f02222e5f9c75fbc820112741d49f84d175ba423a58b + md5: 79629338f091387837ef76975fb178b4 + sha256: 238c393c412e63e266bcee9c525f1a40ae4ae0e40cfdac9647627ced939d8365 category: main optional: false - name: mlflow - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-64 dependencies: @@ -14575,7 +14623,7 @@ package: jinja2: <4,>=2.11 markdown: <4,>=3.3 matplotlib-base: <4 - mlflow-ui: 2.19.0 + mlflow-ui: 2.20.0 numpy: <3 pandas: <3 prometheus_flask_exporter: <1 @@ -14585,14 +14633,14 @@ package: scikit-learn: <2 scipy: <2 sqlalchemy: ">=1.4.0,<3" - url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-2.19.0-hb401068_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-2.20.0-hb401068_0.conda hash: - md5: 29e40ed4d508b0b0479991e5e25f2ae1 - sha256: b63b6f414498513977ada204e6c22be8d44259891980af44965c6a0128eca21e + md5: b3b9e64ea2c39a63fc1d678d9b89de04 + sha256: aca25dfacbd685492188f97cb29c3224b13fcc69db95d45bd54423b80282e821 category: main optional: false - name: mlflow - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-arm64 dependencies: @@ -14604,7 +14652,7 @@ package: jinja2: <4,>=2.11 markdown: <4,>=3.3 matplotlib-base: <4 - mlflow-ui: 2.19.0 + mlflow-ui: 2.20.0 numpy: <3 pandas: <3 prometheus_flask_exporter: <1 @@ -14615,14 +14663,14 @@ package: scikit-learn: <2 scipy: <2 sqlalchemy: ">=1.4.0,<3" - url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-2.19.0-py312h81bd7bf_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-2.20.0-py312h81bd7bf_0.conda hash: - md5: a01eb4e3c6583b42543a00a6afbcdd2c - sha256: 5e463381b2b31646f3612a04cd7ba6ec90bc0b70ca702abde880deb61d117ccb + md5: 293c9f27e936291e99c20178df922aa7 + sha256: 40f0f9d884fddf376d6eb99f0741581177d295e8842763539905b304e4a93107 category: main optional: false - name: mlflow-skinny - version: 2.19.0 + version: 2.20.0 manager: conda platform: linux-64 dependencies: @@ -14637,20 +14685,22 @@ package: opentelemetry-sdk: <3,>=1.0.0 packaging: <25 protobuf: ">=3.12.0,<6" + pydantic: <3,>=1.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* pytz: <2025 pyyaml: ">=5.1,<7" requests: ">=2.17.3,<3" sqlparse: ">=0.4.0,<1" - url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-skinny-2.19.0-py312h7900ff3_0.conda + typing-extensions: <5,>=4.0.0 + url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-skinny-2.20.0-py312h7900ff3_0.conda hash: - md5: 79d84867c9f9dad9341ffe585573e128 - sha256: 079cc64dc56a42310e4ee3adb5594afa43bfe16f66367a0dcb20ebdb922365cf + md5: 99e8753d95cc3489ac8aa857abd8c611 + sha256: afd66b4194b28b4ff65229806bb5da2cad7db05bc34e99794f54df74cd90f8c0 category: main optional: false - name: mlflow-skinny - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-64 dependencies: @@ -14665,20 +14715,22 @@ package: opentelemetry-sdk: <3,>=1.0.0 packaging: <25 protobuf: ">=3.12.0,<6" + pydantic: <3,>=1.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* pytz: <2025 pyyaml: ">=5.1,<7" requests: ">=2.17.3,<3" sqlparse: ">=0.4.0,<1" - url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-skinny-2.19.0-py312hb401068_0.conda + typing-extensions: <5,>=4.0.0 + url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-skinny-2.20.0-py312hb401068_0.conda hash: - md5: dfa2050b881b67f99dcfba596a600937 - sha256: da115c7eaba21de84cf1cea41baab1cef4b944dd5c5e2f48ab7c37b5f60070fd + md5: 90bf1546fafa607f8516018d7a35c433 + sha256: 375f1de03917a74d37e4e1a6023b3d6b414f3461ac8c3b590db310be04662d95 category: main optional: false - name: mlflow-skinny - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-arm64 dependencies: @@ -14693,67 +14745,69 @@ package: opentelemetry-sdk: <3,>=1.0.0 packaging: <25 protobuf: ">=3.12.0,<6" + pydantic: <3,>=1.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* pytz: <2025 pyyaml: ">=5.1,<7" requests: ">=2.17.3,<3" sqlparse: ">=0.4.0,<1" - url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-skinny-2.19.0-py312h81bd7bf_0.conda + typing-extensions: <5,>=4.0.0 + url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-skinny-2.20.0-py312h81bd7bf_0.conda hash: - md5: e915cb47fafafcefd4aca92289e9fd46 - sha256: e2db8280244f5345365f4371e8a2e802475d33aa01c1fc8a86efb47b7d2ba455 + md5: 26a4346bd01bc245ead820aa78612135 + sha256: 15088bbb3de38ec2b6c36a48f23a430073b0ab0857168741af6a72a7cdc33c46 category: main optional: false - name: mlflow-ui - version: 2.19.0 + version: 2.20.0 manager: conda platform: linux-64 dependencies: flask: <4 gunicorn: <23 - mlflow-skinny: 2.19.0 + mlflow-skinny: 2.20.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* querystring_parser: <2 - url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-ui-2.19.0-py312h7900ff3_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/mlflow-ui-2.20.0-py312h7900ff3_0.conda hash: - md5: 124c3c21d6c5c197eff0c30fead2d23c - sha256: 326c7fef3d6bc05b7a3ab0a684b30542ab92ba219f161bf1924441de9fe52603 + md5: 169c03eb5298c562151be53361898248 + sha256: f9154ba8c860ee1f8963b192300875f78cf0fba8a5c0ba1304b03db732c67c31 category: main optional: false - name: mlflow-ui - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-64 dependencies: flask: <4 gunicorn: <23 - mlflow-skinny: 2.19.0 + mlflow-skinny: 2.20.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* querystring_parser: <2 - url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-ui-2.19.0-py312hb401068_0.conda + url: https://conda.anaconda.org/conda-forge/osx-64/mlflow-ui-2.20.0-py312hb401068_0.conda hash: - md5: 5bb9114ecd2fd48522eaf4ed36c8f01b - sha256: d609c330277ebe708101a463257858af2054a7501adfbf6bd020bb41ef5026d0 + md5: 25d800a78195240fbb8d319c61c42134 + sha256: de289fcca182491f35180170b49ede093a7a569dd423d8d28cb6c8d79299611d category: main optional: false - name: mlflow-ui - version: 2.19.0 + version: 2.20.0 manager: conda platform: osx-arm64 dependencies: flask: <4 gunicorn: <23 - mlflow-skinny: 2.19.0 + mlflow-skinny: 2.20.0 python: ">=3.12,<3.13.0a0" python_abi: 3.12.* querystring_parser: <2 - url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-ui-2.19.0-py312h81bd7bf_0.conda + url: https://conda.anaconda.org/conda-forge/osx-arm64/mlflow-ui-2.20.0-py312h81bd7bf_0.conda hash: - md5: 2099c3076d8ae32cb2104a84b89a8c5d - sha256: 13025f9cee274d173ad265344ea7ed939a92f75f3a62ec9afd1083ac9631ba8a + md5: f97e716d11744f5bc6cea42c878111e7 + sha256: aab524208050c9d16f1b2b049df94d3c6773f765934936215e78b98a0ff4e84c category: main optional: false - name: more-itertools @@ -17862,6 +17916,45 @@ package: sha256: 063eb168a29d4ce6d9ed865e9e1ad3b6e141712189955a79e06b24ddc0cbbc9c category: main optional: false + - name: pyarrow-hotfix + version: "0.6" + manager: conda + platform: linux-64 + dependencies: + pyarrow: ">=0.14" + python: ">=3.9" + url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda + hash: + md5: 49c3b8c3b2578f35a7034f75f30d0041 + sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 + category: main + optional: false + - name: pyarrow-hotfix + version: "0.6" + manager: conda + platform: osx-64 + dependencies: + python: ">=3.9" + pyarrow: ">=0.14" + url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda + hash: + md5: 49c3b8c3b2578f35a7034f75f30d0041 + sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 + category: main + optional: false + - name: pyarrow-hotfix + version: "0.6" + manager: conda + platform: osx-arm64 + dependencies: + python: ">=3.9" + pyarrow: ">=0.14" + url: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_1.conda + hash: + md5: 49c3b8c3b2578f35a7034f75f30d0041 + sha256: 9ff4e520cff831d34adcf8d791f735972d804572f223ad21b9652ad0886968a6 + category: main + optional: false - name: pyasn1 version: 0.6.1 manager: conda @@ -19021,7 +19114,6 @@ package: readline: ">=8.2,<9.0a0" tk: ">=8.6.13,<8.7.0a0" tzdata: "" - pip: "" url: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.8-h9e4cc4f_1_cpython.conda hash: md5: 7fd2fd79436d9b473812f14e86746844 @@ -19045,7 +19137,6 @@ package: readline: ">=8.2,<9.0a0" tk: ">=8.6.13,<8.7.0a0" tzdata: "" - pip: "" url: https://conda.anaconda.org/conda-forge/osx-64/python-3.12.8-h9ccd52b_1_cpython.conda hash: md5: 68a31f9cfbdcab2a4baec79095374780 @@ -19069,7 +19160,6 @@ package: readline: ">=8.2,<9.0a0" tk: ">=8.6.13,<8.7.0a0" tzdata: "" - pip: "" url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.12.8-hc22306f_1_cpython.conda hash: md5: 54ca5b5d92ef3a3ba61e195ee882a518 diff --git a/environments/conda-osx-64.lock.yml b/environments/conda-osx-64.lock.yml index be38707d8..bd067292a 100644 --- a/environments/conda-osx-64.lock.yml +++ b/environments/conda-osx-64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 6f5f2716d95b3f5dfce0a2a55c80a997b5224c0ca3ea6386de439a120ac76ab9 +# input_hash: 01b55c205e542a6ec3079e568f2f105dd6e70c6461a90cd04d98ed9157f334c2 channels: - conda-forge @@ -42,7 +42,7 @@ dependencies: - aws-c-sdkutils=0.2.2=hc0df2db_0 - aws-checksums=0.2.2=hc0df2db_4 - aws-crt-cpp=0.29.9=h5c43303_2 - - aws-sdk-cpp=1.11.458=h904bc55_6 + - aws-sdk-cpp=1.11.489=h904bc55_0 - azure-core-cpp=1.14.0=h9a36307_0 - azure-identity-cpp=1.10.0=ha4e2ba9_0 - azure-storage-blobs-cpp=12.13.0=h3d2f5f1_1 @@ -111,6 +111,7 @@ dependencies: - debugpy=1.8.12=py312haafddd8_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 + - deltalake=0.24.0=py312h16aec2d_0 - deprecated=1.2.15=pyhd8ed1ab_1 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -245,10 +246,10 @@ dependencies: - lerc=4.0.0=hb486fe8_0 - libabseil=20240722.0=cxx17_h0e468a2_4 - libarchive=3.7.7=h1a33361_3 - - libarrow=18.1.0=h36d682d_12_cpu - - libarrow-acero=18.1.0=ha6338a2_12_cpu - - libarrow-dataset=18.1.0=ha6338a2_12_cpu - - libarrow-substrait=18.1.0=h5c2345d_12_cpu + - libarrow=18.1.0=h553eae1_13_cpu + - libarrow-acero=18.1.0=ha6338a2_13_cpu + - libarrow-dataset=18.1.0=ha6338a2_13_cpu + - libarrow-substrait=18.1.0=h5c2345d_13_cpu - libavif16=1.1.1=h71406da_2 - libblas=3.9.0=26_osx64_openblas - libbrotlicommon=1.1.0=h00291cd_2 @@ -284,7 +285,7 @@ dependencies: - libnghttp2=1.64.0=hc7306c3_0 - libntlm=1.8=h6e16a3a_0 - libopenblas=0.3.28=openmp_hbf64a52_1 - - libparquet=18.1.0=h3e22b07_12_cpu + - libparquet=18.1.0=h3e22b07_13_cpu - libpng=1.6.45=h3c4a55f_0 - libpq=17.2=h639cf83_1 - libprotobuf=5.28.3=h6401091_1 @@ -323,9 +324,9 @@ dependencies: - mergedeep=1.3.4=pyhd8ed1ab_1 - minizip=4.0.7=hfb7a1ec_3 - mistune=3.1.0=pyhd8ed1ab_0 - - mlflow=2.19.0=hb401068_0 - - mlflow-skinny=2.19.0=py312hb401068_0 - - mlflow-ui=2.19.0=py312hb401068_0 + - mlflow=2.20.0=hb401068_0 + - mlflow-skinny=2.20.0=py312hb401068_0 + - mlflow-ui=2.20.0=py312hb401068_0 - more-itertools=10.6.0=pyhd8ed1ab_0 - msgpack-python=1.1.0=py312hc5c4d5f_0 - multidict=6.1.0=py312h6f3313d_1 @@ -397,6 +398,7 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312hb401068_0 - pyarrow-core=18.1.0=py312h5157fe3_0_cpu + - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 diff --git a/environments/conda-osx-arm64.lock.yml b/environments/conda-osx-arm64.lock.yml index 7b0e638a2..a60ed6222 100644 --- a/environments/conda-osx-arm64.lock.yml +++ b/environments/conda-osx-arm64.lock.yml @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-arm64 -# input_hash: 893f66740883ac9f298ed6c1703550c97fc74fa837aa11d4062ca8e3a1a233d8 +# input_hash: f127291e8632894e5612179a9a91ca72882a883e124bfabff789c780fdaf09fa channels: - conda-forge @@ -42,7 +42,7 @@ dependencies: - aws-c-sdkutils=0.2.2=hc8a0bd2_0 - aws-checksums=0.2.2=hc8a0bd2_4 - aws-crt-cpp=0.29.9=ha81f72f_2 - - aws-sdk-cpp=1.11.458=h0e5014b_6 + - aws-sdk-cpp=1.11.489=h0e5014b_0 - azure-core-cpp=1.14.0=hd50102c_0 - azure-identity-cpp=1.10.0=hc602bab_0 - azure-storage-blobs-cpp=12.13.0=h7585a09_1 @@ -111,6 +111,7 @@ dependencies: - debugpy=1.8.12=py312hd8f9ff3_0 - decorator=5.1.1=pyhd8ed1ab_1 - defusedxml=0.7.1=pyhd8ed1ab_0 + - deltalake=0.24.0=py312h30a1e44_0 - deprecated=1.2.15=pyhd8ed1ab_1 - distlib=0.3.9=pyhd8ed1ab_1 - dnspython=2.7.0=pyhff2d567_1 @@ -245,10 +246,10 @@ dependencies: - lerc=4.0.0=h9a09cb3_0 - libabseil=20240722.0=cxx17_h07bc746_4 - libarchive=3.7.7=h3b16cec_3 - - libarrow=18.1.0=hd1aa4b5_12_cpu - - libarrow-acero=18.1.0=hf07054f_12_cpu - - libarrow-dataset=18.1.0=hf07054f_12_cpu - - libarrow-substrait=18.1.0=h4239455_12_cpu + - libarrow=18.1.0=h5b094fc_13_cpu + - libarrow-acero=18.1.0=hf07054f_13_cpu + - libarrow-dataset=18.1.0=hf07054f_13_cpu + - libarrow-substrait=18.1.0=h4239455_13_cpu - libavif16=1.1.1=h45b7238_2 - libblas=3.9.0=26_osxarm64_openblas - libbrotlicommon=1.1.0=hd74edd7_2 @@ -284,7 +285,7 @@ dependencies: - libnghttp2=1.64.0=h6d7220d_0 - libntlm=1.8=h5505292_0 - libopenblas=0.3.28=openmp_hf332438_1 - - libparquet=18.1.0=h636d7b7_12_cpu + - libparquet=18.1.0=h636d7b7_13_cpu - libpng=1.6.45=h3783ad8_0 - libpq=17.2=ha9b7db8_1 - libprotobuf=5.28.3=h3bd63a1_1 @@ -323,9 +324,9 @@ dependencies: - mergedeep=1.3.4=pyhd8ed1ab_1 - minizip=4.0.7=hff1a8ea_3 - mistune=3.1.0=pyhd8ed1ab_0 - - mlflow=2.19.0=py312h81bd7bf_0 - - mlflow-skinny=2.19.0=py312h81bd7bf_0 - - mlflow-ui=2.19.0=py312h81bd7bf_0 + - mlflow=2.20.0=py312h81bd7bf_0 + - mlflow-skinny=2.20.0=py312h81bd7bf_0 + - mlflow-ui=2.20.0=py312h81bd7bf_0 - more-itertools=10.6.0=pyhd8ed1ab_0 - msgpack-python=1.1.0=py312h6142ec9_0 - multidict=6.1.0=py312hdb8e49c_1 @@ -397,6 +398,7 @@ dependencies: - pure_eval=0.2.3=pyhd8ed1ab_1 - pyarrow=18.1.0=py312h1f38498_0 - pyarrow-core=18.1.0=py312hc40f475_0_cpu + - pyarrow-hotfix=0.6=pyhd8ed1ab_1 - pyasn1=0.6.1=pyhd8ed1ab_2 - pyasn1-modules=0.4.1=pyhd8ed1ab_1 - pybtex=0.24.0=pyhd8ed1ab_3 diff --git a/pyproject.toml b/pyproject.toml index 24cf45474..e8adf4650 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "dask>=2025", "dask-expr>=2", # Required for dask[dataframe] "datasette>=0.65", + "deltalake>=0.24,<1", "doc8>=1.1", "duckdb>=1.1.3", "email-validator>=1.0.3", # pydantic[email] diff --git a/src/pudl/analysis/pudl_models.py b/src/pudl/analysis/pudl_models.py new file mode 100644 index 000000000..22d19f277 --- /dev/null +++ b/src/pudl/analysis/pudl_models.py @@ -0,0 +1,54 @@ +"""Implement utilities for working with data produced in the pudl modelling repo.""" + +import os + +import pandas as pd +import pyarrow as pa +from dagster import AssetsDefinition, asset +from deltalake import DeltaTable + + +def get_model_tables() -> list[str]: + """Return all tables produced by PUDL models or empty list if env variable not set.""" + pudl_models_tables = [] + if os.getenv("USE_PUDL_MODELS"): + pudl_models_tables = [ + "core_sec10k__company_information", + "core_sec10k__exhibit_21_company_ownership", + "core_sec10k__filings", + "out_sec_10k__parents_and_subsidiaries", + ] + + return pudl_models_tables + + +def _get_table_uri(table_name: str) -> str: + return f"gs://model-outputs.catalyst.coop/sec10k/{table_name}" + + +def pudl_models_asset_factory(table_name: str) -> AssetsDefinition: + """Factory function to create assets which will load pudl models tables.""" + + @asset( + name=table_name, + io_manager_key="pudl_io_manager", + ) + def _asset() -> pd.DataFrame: + return DeltaTable(_get_table_uri(table_name)).to_pandas() + + return _asset + + +def get_pudl_models_assets() -> list[AssetsDefinition]: + """Generate a collection of assets for all PUDL model tables.""" + return [pudl_models_asset_factory(table) for table in get_model_tables()] + + +def get_model_table_schemas() -> list[str, str, pa.Schema]: + """Return pyarrow schemas for all PUDL models tables.""" + dts = [DeltaTable(_get_table_uri(table_name)) for table_name in get_model_tables()] + + return [ + (dt.metadata().name, dt.metadata().description, dt.schema().to_pyarrow()) + for dt in dts + ] diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 016110a95..4b230b77d 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -21,6 +21,7 @@ from dagster._core.definitions.cacheable_assets import CacheableAssetsDefinition import pudl +from pudl.analysis.pudl_models import get_pudl_models_assets from pudl.io_managers import ( epacems_io_manager, ferc1_dbf_sqlite_io_manager, @@ -107,14 +108,17 @@ } all_asset_modules = raw_module_groups | core_module_groups | out_module_groups -default_assets = list( - itertools.chain.from_iterable( - load_assets_from_modules( - modules, - group_name=group_name, +default_assets = ( + list( + itertools.chain.from_iterable( + load_assets_from_modules( + modules, + group_name=group_name, + ) + for group_name, modules in all_asset_modules.items() ) - for group_name, modules in all_asset_modules.items() ) + + get_pudl_models_assets() ) default_asset_checks = list( diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index a2e3eac59..266ff747b 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -25,8 +25,8 @@ from upath import UPath import pudl -from pudl.metadata import PUDL_PACKAGE -from pudl.metadata.classes import Package, Resource +from pudl.analysis.pudl_models import get_model_tables +from pudl.metadata.classes import PUDL_PACKAGE, Package, Resource from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -322,13 +322,22 @@ def load_input(self, context: InputContext) -> pd.DataFrame: class PudlParquetIOManager(IOManager): """IOManager that writes pudl tables to pyarrow parquet files.""" + def _get_table_resource(self, table_name: str) -> Resource: + """Return resource class for table.""" + if table_name not in get_model_tables(): + res = Resource.from_id(table_name) + else: + # For tables coming from PUDL modelling repo just use already parsed resource metadata + [res] = [r for r in PUDL_PACKAGE.resources if r.name == table_name] + return res + def handle_output(self, context: OutputContext, df: Any) -> None: """Writes pudl dataframe to parquet file.""" assert isinstance(df, pd.DataFrame), "Only panda dataframes are supported." table_name = get_table_name_from_context(context) parquet_path = PudlPaths().parquet_path(table_name) parquet_path.parent.mkdir(parents=True, exist_ok=True) - res = Resource.from_id(table_name) + res = self._get_table_resource(table_name) df = res.enforce_schema(df) schema = res.to_pyarrow() @@ -346,7 +355,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: """Loads pudl table from parquet file.""" table_name = get_table_name_from_context(context) parquet_path = PudlPaths().parquet_path(table_name) - res = Resource.from_id(table_name) + res = self._get_table_resource(table_name) df = pq.read_table(source=parquet_path, schema=res.to_pyarrow()).to_pandas() return res.enforce_schema(df) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index bb73ca298..c1cb33b47 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -36,6 +36,7 @@ ) import pudl.logging_helpers +from pudl.analysis.pudl_models import get_model_table_schemas from pudl.metadata.codes import CODE_METADATA from pudl.metadata.constants import ( CONSTRAINT_DTYPES, @@ -572,6 +573,24 @@ class Field(PudlMeta): harvest: FieldHarvest = FieldHarvest() encoder: Encoder | None = None + @classmethod + def from_pyarrow_field(cls, field: pa.Field) -> "Field": + """Construct from pyarrow field.""" + # Reverse map from frictionless -> pyarrow to pyarrow -> frictionless + type_map = { + value: key for value, key in FIELD_DTYPES_PYARROW.items() if key != "year" + } | { + pa.bool8(): "boolean", + pa.int32(): "integer", + pa.int64(): "integer", + pa.date32(): "date", + } + return cls( + name=field.name, + type=type_map[field.type], + description=field.metadata[b"description"].decode(), + ) + @field_validator("constraints") @classmethod def _check_constraints(cls, value, info: ValidationInfo): # noqa: C901 @@ -793,6 +812,15 @@ class Schema(PudlMeta): "missing_values", "primary_key", "foreign_keys", fn=_check_unique ) + @classmethod + def from_pyarrow_schema(cls, schema: pa.Schema) -> "Schema": + """Construct from a pyarrow schema.""" + return cls( + fields=[ + Field.from_pyarrow_field(schema.field(name)) for name in schema.names + ] + ) + @field_validator("fields") @classmethod def _check_field_names_unique(cls, fields: list[Field]): @@ -1449,6 +1477,17 @@ def from_id(cls, x: str) -> "Resource": """Construct from PUDL identifier (`resource.name`).""" return cls(**cls.dict_from_id(x)) + @classmethod + def from_pyarrow_schema( + cls, name: str, description: str, schema: pa.Schema + ) -> "Resource": + """Construct from a pyarrow schema.""" + return cls( + name=name, + description=description, + schema=Schema.from_pyarrow_schema(schema), + ) + def get_field(self, name: str) -> Field: """Return field with the given name if it's part of the Resources.""" names = [field.name for field in self.schema.fields] @@ -1975,6 +2014,12 @@ def from_resource_ids( if len(names) > i: resources += [Resource.dict_from_id(x) for x in names[i:]] + resources += [ + Resource.from_pyarrow_schema(name, description, schema).model_dump( + by_alias=True + ) + for name, description, schema in get_model_table_schemas() + ] if excluded_etl_groups: resources = [ resource diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl index 037e2d0b3..09aba2c35 100644 --- a/terraform/.terraform.lock.hcl +++ b/terraform/.terraform.lock.hcl @@ -2,48 +2,50 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/google" { - version = "6.10.0" - constraints = ">= 3.64.0, 6.10.0, < 7.0.0" + version = "6.14.1" + constraints = ">= 3.64.0, 6.14.1, < 7.0.0" hashes = [ - "h1:OjdstqHpDb3mzZoA/WiuGXndoLF8DfT6XdMaUHDgBvI=", - "zh:016ef442d70497f34d209ccba94afa5b5e8027b6a60516452549a04c5f4b1e95", - "zh:0e521ae9ab51dea6f9c310291c9e288a482bf37e149bc3e5920547d2a73a6d23", - "zh:1ad1cc5e8f7c8f0b42cc6d37c5e0a3c77557bb18d91070930d361c3d6866bf23", - "zh:64580f23f5e87d4f843a617dab9a96093671f5826c2de8bc60fb3c619f00810d", - "zh:7d29aed1a73b99e50909fe7ce2fea92ba28cd4b4943d185d9187295f991bf35f", - "zh:80ccce9ad3c64528f05b9432d6bf8278d6555ffcb1c80f563b6f24a88d269979", - "zh:af49d0083c2a46bfd022d35f7a06a0626b71d67f6b3c75b04b5723e8977d1096", - "zh:ce767ca2ed4aaf63d0fcb48f0c2756b26096cff7fd33d513ed65a4e5758371f9", - "zh:d4515ffcf5a804c4f1da750f9a4a5edaef6a4cb95e49040bb18a422eb6b4832e", + "h1:AT+PPSH18DlJGqRS3I1tfSdJ6DwY0meBTYEo7NO0U/c=", + "h1:zhyWKVVqORklT7c28f6FzZ0z/g6pGa6FFtv/wp1MKDc=", + "zh:0a0cab3291bdac20fe31511b7aa9f3258b14add16d13110d4ebac18761277361", + "zh:178594db6fbff9974a7c65c65195a64c93d16f652a1a4136015b192faaa1ce2d", + "zh:379bbd6bd5b8add55ffd46c99a8081664e9004188f6df91f8f044e4268b86e42", + "zh:4899b6174a4492dbff3d94f56a901692a3f8d86a6db9de6a92b83d43b7ad4507", + "zh:6240820c3aeeaa8b9830fb4514d3ecb6e3fed8724340dfedaf89b4bb2265102f", + "zh:9a214e052c5c7b4e7bc409086832d4bd8e404b652b66344c26c314747c49744f", + "zh:a6cb9da102d371a52b750de5628a2b6b7cc7c20481d7fa3fb25a957b58b05777", + "zh:ab69d69f9c16461105a585f8a9c780eb06238827db269c5ad3f7c114922e7f20", + "zh:b716e219779295e9af2b632b2c0d534c90cca5c87352fc0c6dc8ebae671dd4d2", + "zh:d96e8d6c3109882d5527956acad12015a024fc24a6110e244007209b4f3069a0", + "zh:f49274564725189932a65cf1a4e8f40f46fca2537da7a673da9fea6ff7e3f195", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:f701206edcfff3e7ee8ca159cde65264e55ec59e2d455facb57cca782a197bbc", - "zh:f984b7b6700f6c075329d43e8a0be25f20f1b124e90e1e1e13bd90a8e468743d", ] } provider "registry.terraform.io/hashicorp/google-beta" { - version = "6.10.0" + version = "6.16.0" constraints = ">= 3.64.0, < 7.0.0" hashes = [ - "h1:W4ps9gOsSXRLKVbUbmeCFiDmn9Be+d1j5DbFhcmMIHU=", - "zh:180bbb1bc216378d82106dc4371f01fb0409ccc29c1513a1dd59aff7033f1fbb", - "zh:1ee7fbfbd5f71db275ed0e56d2264fca6e6e0e1d6e2ab0495da5c95bee87204d", - "zh:2b12896a4489152c1bf01217dd886eef0784f18f922f133e828e1a687128aaca", - "zh:5d1885e63b4bdb711a1bec19b699626fbd676a88087f00107cdb807c8a0213ca", - "zh:7a094a659d5fe5032dab6c79ccbdc6e86d16cd4da122b6f350d84f12db624e99", - "zh:8941627a145d5787bbdfad65e01f10200f98e160059214472a0287a5d67e45d9", - "zh:9626157fbd7e06ace9f3e0491213fa33849a54f21a3fe35fe11a0f0362fb1721", - "zh:bcb41673a90b757ac2b6e8c30e358ef101509434c006f08b788028390843267e", + "h1:Z+sQyUt2iYkELNpRTEv6pZoBr9EP1PxYZzhRthiK9DU=", + "zh:0ef35e34ffa21e11c85593b48d1c879fe9b74c961b4dd8dada6017776112feac", + "zh:234517614495c99c756cc8ffe9d79f2a07e161b711e4a496f2b72fdf846509e4", + "zh:5501f1ecaa9a2c18c01cec9847e2381466dc84f075c30fff58e93b4ba433ee1c", + "zh:7081f7d2bc5d4f12652befe2f9f452f170825d1570d20b06e3b877e981bf1b5d", + "zh:731297507f15e0e86a7ac5ab6fe31d826b03087bb4099b0cf7622ffc4cfa2d90", + "zh:78ce6629f59f733b773ad1378b0f948010cdf30593b56252ec0a5e879a12b128", + "zh:a6de47ef0c0bda420d07e6b135e999ef0ea271549e56d8a91dac9f43874882bb", + "zh:c637ec5a3a5bc5b661ef6637af23768e3af0d5a3d2b75f1a1de04d198c13e2b0", + "zh:d34dc7bf412f50259d97f07eff412f432b37503f4c2771660beeec0e703cd9c0", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:fce9ef4741bf14472e7216952364f471370f8147f8fadfee89dfcee1562a3a83", - "zh:fd667034bb71eb67e50f871286db039139ad08313575e633884feb5b4e359b13", - "zh:ffd2089f326da9682f7669c2d4bc9349a968bf83ff05043d202be171237f86fb", + "zh:f67079d47b9d83696c90c23d40609de683a7ca89c06ede26cb61fb97bcb4287b", + "zh:fa597498bbcbe3871312bbf7dbea27aea01d09b5712b06d5d5ec6b5276ca94eb", ] } provider "registry.terraform.io/hashicorp/random" { version = "3.6.3" hashes = [ + "h1:Fnaec9vA8sZ8BXVlN3Xn9Jz3zghSETIKg7ch8oXhxno=", "h1:zG9uFP8l9u+yGZZvi5Te7PV62j50azpgwPunq2vTm1E=", "zh:04ceb65210251339f07cd4611885d242cd4d0c7306e86dda9785396807c00451", "zh:448f56199f3e99ff75d5c0afacae867ee795e4dfda6cb5f8e3b2a72ec3583dd8", diff --git a/terraform/main.tf b/terraform/main.tf index 6a4701972..33553ffe2 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -107,12 +107,75 @@ resource "google_storage_bucket_iam_binding" "binding" { ] } - resource "google_artifact_registry_repository" "pudl-superset-repo" { - location = "us-central1" - repository_id = "pudl-superset" - description = "Docker image of PUDL superset deployment." - format = "docker" +# Generate a random password for the mlflow db user +resource "random_password" "mlflow_postgresql_password" { + length = 16 # Adjust the password length as needed + special = true # Include special characters + upper = true # Include uppercase letters + lower = true # Include lowercase letters + numeric = true # Include numbers +} + +# Create secret to store mlflow db password +resource "google_secret_manager_secret" "mlflow_postgresql_password_secret" { + secret_id = "mlflow-postgresql-password" + replication { + auto {} + } +} + +# Create version of secret with mlflow password set +resource "google_secret_manager_secret_version" "mlflow_postgresql_password_version" { + secret = google_secret_manager_secret.mlflow_postgresql_password_secret.id + secret_data = random_password.mlflow_postgresql_password.result +} + +# Create mlflow postgresql instance for backend storage +resource "google_sql_database_instance" "mlflow_backend_store" { + name = "mlflow-backend-store" + region = "us-central1" + database_version = "POSTGRES_14" + settings { + tier = "db-f1-micro" + password_validation_policy { + min_length = 6 + reuse_interval = 2 + complexity = "COMPLEXITY_DEFAULT" + disallow_username_substring = true + password_change_interval = "30s" + enable_password_policy = true + } + } + # set `deletion_protection` to true, will ensure that one cannot accidentally delete this instance by + # use of Terraform whereas `deletion_protection_enabled` flag protects this instance at the GCP level. + deletion_protection = true +} + +resource "google_storage_bucket" "pudl_models_outputs" { + name = "model-outputs.catalyst.coop" + location = "US" + storage_class = "STANDARD" +} + +resource "google_sql_user" "mlflow_postgresql_user" { + name = "postgres" + instance = google_sql_database_instance.mlflow_backend_store.name + password = random_password.mlflow_postgresql_password.result +} + +# Optional: Create a database in the PostgreSQL instance +resource "google_sql_database" "mlflow_postgresql_database" { + name = "mlflow" + instance = google_sql_database_instance.mlflow_backend_store.name +} + +resource "google_artifact_registry_repository" "pudl-superset-repo" { + location = "us-central1" + repository_id = "pudl-superset" + description = "Docker image of PUDL superset deployment." + format = "docker" +} resource "google_cloud_run_v2_service" "pudl-superset" { name = "pudl-superset" @@ -124,7 +187,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { template { execution_environment = "EXECUTION_ENVIRONMENT_GEN2" containers { - name = "pudl-superset-1" + name = "pudl-superset-1" image = "us-central1-docker.pkg.dev/catalyst-cooperative-pudl/pudl-superset/pudl-superset:latest" volume_mounts { @@ -136,14 +199,14 @@ resource "google_cloud_run_v2_service" "pudl-superset" { mount_path = "/cloudsql" } env { - name = "IS_CLOUD_RUN" + name = "IS_CLOUD_RUN" value = "True" } env { name = "SUPERSET_DB_USER" value_source { secret_key_ref { - secret = "superset-database-username" + secret = "superset-database-username" version = "1" } } @@ -152,7 +215,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "SUPERSET_DB_NAME" value_source { secret_key_ref { - secret = "superset-database-database" + secret = "superset-database-database" version = "1" } } @@ -161,7 +224,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "SUPERSET_DB_PASS" value_source { secret_key_ref { - secret = "superset-database-password" + secret = "superset-database-password" version = "1" } } @@ -170,7 +233,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "SUPERSET_SECRET_KEY" value_source { secret_key_ref { - secret = "superset-secret-key" + secret = "superset-secret-key" version = "1" } } @@ -179,7 +242,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "CLOUD_SQL_CONNECTION_NAME" value_source { secret_key_ref { - secret = "superset-database-connection-name" + secret = "superset-database-connection-name" version = "1" } } @@ -188,7 +251,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "AUTH0_CLIENT_ID" value_source { secret_key_ref { - secret = "superset-auth0-client-id" + secret = "superset-auth0-client-id" version = "1" } } @@ -197,7 +260,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "AUTH0_CLIENT_SECRET" value_source { secret_key_ref { - secret = "superset-auth0-client-secret" + secret = "superset-auth0-client-secret" version = "2" } } @@ -206,7 +269,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "AUTH0_DOMAIN" value_source { secret_key_ref { - secret = "superset-auth0-domain" + secret = "superset-auth0-domain" version = "1" } } @@ -215,7 +278,7 @@ resource "google_cloud_run_v2_service" "pudl-superset" { name = "MAPBOX_API_KEY" value_source { secret_key_ref { - secret = "superset-mapbox-api-key" + secret = "superset-mapbox-api-key" version = "1" } } @@ -405,7 +468,7 @@ resource "google_storage_bucket" "superset_storage" { resource "google_storage_bucket_iam_member" "superset_storage_compute_iam" { bucket = google_storage_bucket.superset_storage.name - role = "roles/storage.objectViewer" + role = "roles/storage.objectViewer" member = "serviceAccount:345950277072-compute@developer.gserviceaccount.com" } @@ -449,7 +512,7 @@ resource "google_storage_bucket_iam_member" "usage_metrics_archiver_gcs_iam" { for_each = toset(["roles/storage.objectCreator", "roles/storage.objectViewer", "roles/storage.insightsCollectorService"]) bucket = google_storage_bucket.pudl_usage_metrics_archive_bucket.name - role = each.key + role = each.key member = "serviceAccount:${google_service_account.usage_metrics_archiver.email}" } @@ -457,7 +520,7 @@ resource "google_storage_bucket_iam_member" "usage_metrics_etl_gcs_iam" { for_each = toset(["roles/storage.legacyBucketReader", "roles/storage.objectViewer"]) bucket = google_storage_bucket.pudl_usage_metrics_archive_bucket.name - role = each.key + role = each.key member = "serviceAccount:pudl-usage-metrics-etl@catalyst-cooperative-pudl.iam.gserviceaccount.com" } @@ -465,7 +528,7 @@ resource "google_storage_bucket_iam_member" "usage_metrics_etl_s3_logs_gcs_iam" for_each = toset(["roles/storage.legacyBucketReader", "roles/storage.objectViewer"]) bucket = "pudl-s3-logs.catalyst.coop" - role = each.key + role = each.key member = "serviceAccount:pudl-usage-metrics-etl@catalyst-cooperative-pudl.iam.gserviceaccount.com" } @@ -497,6 +560,202 @@ resource "google_storage_bucket_iam_member" "nrel_finito_inputs_archiver_gcs_iam ]) bucket = google_storage_bucket.pudl_archive_bucket.name - role = each.key + role = each.key member = "serviceAccount:${google_service_account.nrel_finito_inputs_gha.email}" } + +// PUDL Viewer config + +locals { + pudl_viewer_secret_versions = { + pudl_viewer_secret_key = 1 + pudl_viewer_db_username = 1 + pudl_viewer_db_password = 1 + pudl_viewer_db_name = 1 + pudl_viewer_auth0_domain = 1 + pudl_viewer_auth0_client_id = 1 + pudl_viewer_auth0_client_secret = 1 + } +} + +resource "google_service_account" "pudl_viewer_sa" { + account_id = "pudl-viewer-cloud-run" + display_name = "PUDL Viewer Service Account" +} + +resource "google_artifact_registry_repository" "pudl_viewer" { + location = "us-east1" + repository_id = "pudl-viewer" + description = "Docker repository for PUDL viewer" + format = "DOCKER" +} + +resource "google_sql_database_instance" "pudl_viewer_database" { + name = "pudl-viewer-database" + region = "us-central1" + database_version = "POSTGRES_17" + settings { + tier = "db-custom-1-3840" + edition = "ENTERPRISE" + disk_size = 10 + } + deletion_protection = true +} + +resource "google_sql_database" "pudl_viewer_database" { + name = "pudl_viewer" + instance = google_sql_database_instance.pudl_viewer_database.name +} + +data "google_secret_manager_secret_version" "pudl_viewer_db_password" { + secret = "pudl_viewer_db_password" + version = "1" +} + +data "google_secret_manager_secret_version" "pudl_viewer_db_username" { + secret = "pudl_viewer_db_username" + version = "1" +} + +resource "google_sql_user" "user" { + name = data.google_secret_manager_secret_version.pudl_viewer_db_username.secret_data + password = data.google_secret_manager_secret_version.pudl_viewer_db_password.secret_data + instance = google_sql_database_instance.pudl_viewer_database.name +} + + +resource "google_cloud_run_v2_service" "pudl_viewer" { + name = "pudl-viewer" + location = "us-east1" + deletion_protection = false + + template { + annotations = { + "client.knative.dev/user-image" = "us-east1-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.pudl_viewer.name}/pudl-viewer:latest" + "run.googleapis.com/client-name" = "terraform" + "run.googleapis.com/client-version" = timestamp() + } + + service_account = google_service_account.pudl_viewer_sa.email + volumes { + name = "cloudsql" + cloud_sql_instance { + instances = [google_sql_database_instance.pudl_viewer_database.connection_name] + } + } + + containers { + image = "us-east1-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.pudl_viewer.name}/pudl-viewer:latest" + + volume_mounts { + name = "cloudsql" + mount_path = "/cloudsql" + } + + env { + name = "IS_CLOUD_RUN" + value = "True" + } + + env { + name = "CLOUD_SQL_CONNECTION_NAME" + value = google_sql_database_instance.pudl_viewer_database.connection_name + } + + dynamic "env" { + for_each = local.pudl_viewer_secret_versions + content { + name = upper(env.key) + value_source { + secret_key_ref { + secret = env.key + version = tostring(env.value) + } + } + } + } + } + } +} + +resource "google_cloud_run_v2_job" "pudl_viewer_db_migration" { + name = "pudl-viewer-db-migration" + location = "us-east1" + deletion_protection = false + + template { + task_count = 1 + template { + service_account = google_service_account.pudl_viewer_sa.email + + volumes { + name = "cloudsql" + cloud_sql_instance { + instances = [google_sql_database_instance.pudl_viewer_database.connection_name] + } + } + + containers { + image = "us-east1-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.pudl_viewer.name}/pudl-viewer:latest" + command = ["uv", "run", "flask", "--app", "parquet_fe_prototype", "db", "upgrade"] + + volume_mounts { + name = "cloudsql" + mount_path = "/cloudsql" + } + + env { + name = "IS_CLOUD_RUN" + value = "True" + } + + env { + name = "CLOUD_SQL_CONNECTION_NAME" + value = google_sql_database_instance.pudl_viewer_database.connection_name + } + + dynamic "env" { + for_each = local.pudl_viewer_secret_versions + content { + name = upper(env.key) + value_source { + secret_key_ref { + secret = env.key + version = tostring(env.value) + } + } + } + } + } + } + } +} + + +resource "google_cloud_run_v2_service_iam_member" "pudl_viewer_public" { + location = google_cloud_run_v2_service.pudl_viewer.location + name = google_cloud_run_v2_service.pudl_viewer.name + role = "roles/run.invoker" + member = "allUsers" +} + +resource "google_secret_manager_secret" "pudl_viewer_secrets" { + for_each = local.pudl_viewer_secret_versions + secret_id = each.key + replication { + auto {} + } +} + +resource "google_secret_manager_secret_iam_member" "pudl_viewer_secret_accessor" { + for_each = google_secret_manager_secret.pudl_viewer_secrets + secret_id = each.value.secret_id + role = "roles/secretmanager.secretAccessor" + member = google_service_account.pudl_viewer_sa.member +} + +resource "google_project_iam_member" "pudl_viewer_cloud_sql" { + project = var.project_id + role = "roles/cloudsql.client" + member = google_service_account.pudl_viewer_sa.member +}