diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..83c5135e9d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore: " + - package-ecosystem: "nuget" + directory: "/csharp/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore(csharp): " + ignore: + - dependency-name: "Microsoft.Extensions.*" + update-types: + - "version-update:semver-major" + - dependency-name: "Microsoft.Bcl.*" + update-types: + - "version-update:semver-major" + - dependency-name: "System.*" + update-types: + - "version-update:semver-major" diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 9e701df55e..2cd9ad1db2 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -49,7 +49,7 @@ jobs: os: [ubuntu-latest, windows-2019, macos-latest] steps: - name: Install C# - uses: actions/setup-dotnet@v3 + uses: actions/setup-dotnet@v4 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout ADBC diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 3acb30f6ba..e4d57943f9 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -33,15 +33,15 @@ jobs: name: "pre-commit" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version-file: 'go/adbc/go.mod' check-latest: true - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 - name: install golangci-lint run: | go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.49.0 diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index ed9852b8e0..b79307a159 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -33,7 +33,7 @@ jobs: name: Process runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: persist-credentials: false @@ -42,7 +42,7 @@ jobs: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'edited') - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d0f403e6f0..bd6d79d16f 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -53,7 +53,7 @@ jobs: name: "DuckDB Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -67,7 +67,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -102,7 +102,7 @@ jobs: name: "FlightSQL Integration Tests (Dremio and SQLite)" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -116,7 +116,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -129,7 +129,7 @@ jobs: --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt pip install pytest-error-for-skips - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true @@ -189,7 +189,7 @@ jobs: name: "PostgreSQL Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -203,7 +203,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -322,7 +322,7 @@ jobs: name: "Snowflake Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -336,7 +336,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -348,7 +348,7 @@ jobs: mamba install -c conda-forge \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 9e11038948..656bc5e9a8 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -44,11 +44,11 @@ jobs: matrix: java: ['8', '11', '17', '21'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-java@v3 + - uses: actions/setup-java@v4 with: cache: "maven" distribution: "temurin" @@ -77,11 +77,11 @@ jobs: matrix: java: ['11', '17', '21'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-java@v3 + - uses: actions/setup-java@v4 with: cache: "maven" distribution: "temurin" diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml index 87e13682e4..b1e87b0811 100644 --- a/.github/workflows/native-unix.yml +++ b/.github/workflows/native-unix.yml @@ -69,7 +69,7 @@ jobs: # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -83,7 +83,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -94,7 +94,7 @@ jobs: run: | mamba install -c conda-forge \ --file ci/conda_env_cpp.txt - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true @@ -120,7 +120,7 @@ jobs: export PATH=$RUNNER_TOOL_CACHE/go/1.19.13/x64/bin:$PATH ./ci/scripts/go_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: driver-manager-${{ matrix.os }} retention-days: 3 @@ -141,7 +141,7 @@ jobs: # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -155,7 +155,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -192,7 +192,7 @@ jobs: name: "clang-tidy" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -206,7 +206,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -240,7 +240,7 @@ jobs: # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -254,7 +254,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -268,7 +268,7 @@ jobs: --file ci/conda_env_cpp.txt \ --file ci/conda_env_glib.txt - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ~/local @@ -294,11 +294,11 @@ jobs: matrix: os: ["macos-latest", "ubuntu-latest", "windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version-file: 'go/adbc/go.mod' check-latest: true @@ -332,7 +332,7 @@ jobs: env: CGO_ENABLED: "1" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -346,7 +346,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -357,7 +357,7 @@ jobs: run: | mamba install -c conda-forge \ --file ci/conda_env_cpp.txt - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true @@ -368,7 +368,7 @@ jobs: if: ${{ !contains('macos-latest', matrix.os) }} run: go install honnef.co/go/tools/cmd/staticcheck@v0.3.3 - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ~/local @@ -410,7 +410,7 @@ jobs: # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -424,7 +424,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -437,14 +437,14 @@ jobs: python=${{ matrix.python }} \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ~/local @@ -530,7 +530,7 @@ jobs: os: ["ubuntu-latest"] python: ["3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -544,7 +544,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -557,7 +557,7 @@ jobs: python=${{ matrix.python }} \ --file ci/conda_env_docs.txt \ --file ci/conda_env_python.txt - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ~/local @@ -571,6 +571,13 @@ jobs: shell: bash -l {0} run: | ./ci/scripts/docs_build.sh "$(pwd)" + - name: Archive docs + uses: actions/upload-artifact@v4 + with: + name: docs + retention-days: 2 + path: | + docs/build/html - name: Test Recipes (C++) shell: bash -l {0} run: | diff --git a/.github/workflows/native-windows.yml b/.github/workflows/native-windows.yml index 94b80dc563..fc9760edc6 100644 --- a/.github/workflows/native-windows.yml +++ b/.github/workflows/native-windows.yml @@ -59,7 +59,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -76,7 +76,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -99,7 +99,7 @@ jobs: run: | .\ci\scripts\cpp_build.ps1 $pwd ${{ github.workspace }}\build - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: driver-manager-${{ matrix.os }} retention-days: 3 @@ -116,7 +116,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -133,7 +133,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -194,7 +194,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -211,7 +211,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -221,14 +221,14 @@ jobs: run: | mamba install -c conda-forge \ --file ci/conda_env_cpp.txt - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.20.8 check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ${{ github.workspace }}/build @@ -262,7 +262,7 @@ jobs: os: ["windows-latest"] python: ["3.9", "3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -279,7 +279,7 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -292,7 +292,7 @@ jobs: --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: driver-manager-${{ matrix.os }} path: ${{ github.workspace }}/build diff --git a/.github/workflows/nightly-verify.yml b/.github/workflows/nightly-verify.yml index 45279e5d6c..658b2efb0c 100644 --- a/.github/workflows/nightly-verify.yml +++ b/.github/workflows/nightly-verify.yml @@ -31,7 +31,7 @@ jobs: if: github.event_name != 'schedule' || github.repository == 'apache/arrow-adbc' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -82,7 +82,7 @@ jobs: shasum --algorithm 512 \ apache-arrow-adbc-${VERSION}.tar.gz > apache-arrow-adbc-${VERSION}.tar.gz.sha512 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: source retention-days: 7 @@ -102,7 +102,7 @@ jobs: matrix: os: ["macos-latest", "ubuntu-latest", "windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 path: arrow-adbc @@ -115,7 +115,7 @@ jobs: grep -E -o '[0-9]+\.[0-9]+\.[0-9]+') echo "VERSION=${VERSION}" >> $GITHUB_ENV - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source path: ${{ github.workspace }}/apache-arrow-adbc-${{ env.VERSION }}-rc0/ @@ -125,7 +125,7 @@ jobs: run: | mv apache-arrow-adbc-${{ env.VERSION }}-rc0/KEYS . - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 # The Unix script will set up conda itself if: matrix.os == 'windows-latest' with: diff --git a/.github/workflows/nightly-website.yml b/.github/workflows/nightly-website.yml index 38083c8b7e..192a02275d 100644 --- a/.github/workflows/nightly-website.yml +++ b/.github/workflows/nightly-website.yml @@ -36,7 +36,7 @@ jobs: name: "Build Website" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -45,7 +45,7 @@ jobs: run: | docker-compose run docs - name: Archive docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: docs retention-days: 2 @@ -57,20 +57,20 @@ jobs: runs-on: ubuntu-latest needs: [build] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 path: site # NOTE: needed to push at the end persist-credentials: true ref: asf-site - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 path: scripts persist-credentials: false - name: Download docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: docs path: temp diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml index f227947d70..ca7ae623ff 100644 --- a/.github/workflows/packaging.yml +++ b/.github/workflows/packaging.yml @@ -61,7 +61,7 @@ jobs: # For cron: only run on the main repo, not forks if: github.event_name != 'schedule' || github.repository == 'apache/arrow-adbc' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -110,7 +110,7 @@ jobs: apache-arrow-adbc-${{ steps.version.outputs.VERSION }} \ $(git log -n 1 --format=%h) - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: source retention-days: 7 @@ -123,7 +123,7 @@ jobs: needs: - source steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -160,7 +160,7 @@ jobs: popd - name: Archive docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: docs retention-days: 2 @@ -173,7 +173,7 @@ jobs: needs: - source steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -201,7 +201,7 @@ jobs: cp -a adbc/dist/ ./ - name: Archive JARs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: java retention-days: 7 @@ -224,11 +224,11 @@ jobs: - debian-bullseye - ubuntu-jammy steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: repository: apache/arrow path: arrow @@ -275,7 +275,7 @@ jobs: restore-keys: linux-${{ env.TASK_NAMESPACE }}-ccache-${{ matrix.target }}- - name: Login to GitHub Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -299,7 +299,7 @@ jobs: tar czf ${{ matrix.target }}.tar.gz ${DISTRIBUTION} - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ${{ matrix.target }} retention-days: 7 @@ -353,7 +353,7 @@ jobs: # TODO: "linux_aarch64_" arch: ["linux_64_"] steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -386,7 +386,7 @@ jobs: popd - name: Archive Conda packages - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-${{ matrix.arch }}-conda retention-days: 7 @@ -420,7 +420,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -442,7 +442,7 @@ jobs: echo "schedule: ${{ github.event.schedule }}" >> $GITHUB_STEP_SUMMARY echo "ref: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -462,7 +462,7 @@ jobs: ./adbc/ci/scripts/python_conda_build.sh $(pwd)/adbc ${ARCH_CONDA_FORGE}.yaml $(pwd)/adbc/build - name: Archive Conda packages - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-${{ matrix.arch }}-conda retention-days: 7 @@ -493,7 +493,7 @@ jobs: - arch: arm64v8 is_pr: true steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -516,7 +516,7 @@ jobs: echo "ref: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Build wheel env: @@ -530,7 +530,7 @@ jobs: popd - name: Archive wheels - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-${{ matrix.arch }}-manylinux${{ matrix.manylinux_version }} retention-days: 7 @@ -566,7 +566,7 @@ jobs: # Where to install vcpkg VCPKG_ROOT: "${{ github.workspace }}/vcpkg" steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -605,7 +605,7 @@ jobs: ci/scripts/install_vcpkg.sh $VCPKG_ROOT ${{ steps.vcpkg_version.outputs.VCPKG_VERSION }} popd - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.19.13 check-latest: true @@ -631,7 +631,7 @@ jobs: popd - name: Archive wheels - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-${{ matrix.arch }}-macos retention-days: 7 @@ -681,7 +681,7 @@ jobs: # Where to install vcpkg VCPKG_ROOT: "${{ github.workspace }}\\vcpkg" steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -729,7 +729,7 @@ jobs: # Windows needs newer Go than 1.19 # https://github.com/golang/go/issues/51007 - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version: 1.20.8 check-latest: true @@ -737,7 +737,7 @@ jobs: cache-dependency-path: adbc/go/adbc/go.sum - name: Install Python ${{ matrix.python_version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} @@ -752,7 +752,7 @@ jobs: popd - name: Archive wheels - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python${{ matrix.python_version }}-windows retention-days: 7 @@ -779,7 +779,7 @@ jobs: needs: - source steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: source @@ -810,7 +810,7 @@ jobs: popd - name: Archive sdist - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-sdist retention-days: 7 @@ -842,7 +842,7 @@ jobs: - python-sdist steps: - name: Get All Artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: release-artifacts - name: Release @@ -890,15 +890,15 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: true - name: Get All Artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: conda-packages - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -933,12 +933,12 @@ jobs: - python-windows - python-sdist steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: true - name: Get All Artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: nightly-artifacts - name: Upload diff --git a/.github/workflows/r-check.yml b/.github/workflows/r-check.yml index a6e6983097..e29f7f3ed7 100644 --- a/.github/workflows/r-check.yml +++ b/.github/workflows/r-check.yml @@ -41,7 +41,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.20' - uses: r-lib/actions/setup-r@v2 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 248a9ee494..c2cf1edcea 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -48,7 +48,7 @@ jobs: name: "Rust ${{ matrix.os }}" runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index fbde892bd0..966167bccf 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -50,7 +50,7 @@ jobs: matrix: os: ["macos-latest", "ubuntu-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false @@ -72,11 +72,11 @@ jobs: matrix: os: ["macos-latest", "ubuntu-latest", "windows-latest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 # The Unix script will set up conda itself if: matrix.os == 'windows-latest' with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 01e624caa0..194924ae0f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,11 +32,11 @@ repos: - id: mixed-line-ending name: "Mixed line endings (LF)" args: [--fix=lf] - exclude: '\.(bat|sln|csproj)$' + exclude: '\.(bat|sln)$' - id: mixed-line-ending name: "Mixed line endings (CRLF)" args: [--fix=crlf] - files: '\.(bat|sln|csproj)$' + files: '\.(bat|sln)$' - id: trailing-whitespace exclude: "^r/.*?/_snaps/.*?.md$" - repo: https://github.com/pocc/pre-commit-hooks diff --git a/c/driver/postgresql/connection.cc b/c/driver/postgresql/connection.cc index d389a66c2b..deae3171c5 100644 --- a/c/driver/postgresql/connection.cc +++ b/c/driver/postgresql/connection.cc @@ -1147,38 +1147,23 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog, struct ArrowSchema* schema, struct AdbcError* error) { AdbcStatusCode final_status = ADBC_STATUS_OK; - struct StringBuilder query; - std::memset(&query, 0, sizeof(query)); - std::vector params; - if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL; - if (StringBuilderAppend( - &query, "%s", - "SELECT attname, atttypid " - "FROM pg_catalog.pg_class AS cls " - "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid " - "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid " - "WHERE attr.attnum >= 0 AND cls.oid = ") != 0) - return ADBC_STATUS_INTERNAL; + std::string query = + "SELECT attname, atttypid " + "FROM pg_catalog.pg_class AS cls " + "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid " + "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid " + "WHERE attr.attnum >= 0 AND cls.oid = $1::regclass::oid"; + std::vector params; if (db_schema != nullptr) { - if (StringBuilderAppend(&query, "%s", "$1.")) { - StringBuilderReset(&query); - return ADBC_STATUS_INTERNAL; - } - params.push_back(db_schema); - } - - if (StringBuilderAppend(&query, "%s%" PRIu64 "%s", "$", - static_cast(params.size() + 1), "::regclass::oid")) { - StringBuilderReset(&query); - return ADBC_STATUS_INTERNAL; + params.push_back(std::string(db_schema) + "." + table_name); + } else { + params.push_back(table_name); } - params.push_back(table_name); PqResultHelper result_helper = - PqResultHelper{conn_, std::string(query.buffer), params, error}; - StringBuilderReset(&query); + PqResultHelper{conn_, std::string(query.c_str()), params, error}; RAISE_ADBC(result_helper.Prepare()); auto result = result_helper.Execute(); diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 99ddaec8c0..8a9192c329 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -17,10 +17,14 @@ #pragma once +// Windows +#define NOMINMAX + #include #include #include #include +#include #include #include #include @@ -68,6 +72,9 @@ constexpr int64_t kMaxSafeMicrosToNanos = 9223372036854775L; // without overflow constexpr int64_t kMinSafeMicrosToNanos = -9223372036854775L; +// 2000-01-01 00:00:00.000000 in microseconds +constexpr int64_t kPostgresTimestampEpoch = 946684800000000L; + // Read a value from the buffer without checking the buffer size. Advances // the cursor of data and reduces its size by sizeof(T). template @@ -1479,14 +1486,20 @@ class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { if (!overflow_safe) { ArrowErrorSet(error, - "Row %" PRId64 " timestamp value %" PRId64 + "[libpq] Row %" PRId64 " timestamp value %" PRId64 " with unit %d would overflow", index, raw_value, TU); return ADBC_STATUS_INVALID_ARGUMENT; } - // 2000-01-01 00:00:00.000000 in microseconds - constexpr int64_t kPostgresTimestampEpoch = 946684800000000; + if (value < std::numeric_limits::min() + kPostgresTimestampEpoch) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 " timestamp value %" PRId64 + " with unit %d would underflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + const int64_t scaled = value - kPostgresTimestampEpoch; NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, scaled, error)); @@ -1607,16 +1620,20 @@ static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, class PostgresCopyStreamWriter { public: - ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array) { + ArrowErrorCode Init(struct ArrowSchema* schema) { schema_ = schema; NANOARROW_RETURN_NOT_OK( ArrowArrayViewInitFromSchema(&array_view_.value, schema, nullptr)); - NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); root_writer_.Init(&array_view_.value); ArrowBufferInit(&buffer_.value); return NANOARROW_OK; } + ArrowErrorCode SetArray(struct ArrowArray* array) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); + return NANOARROW_OK; + } + ArrowErrorCode WriteHeader(ArrowError* error) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&buffer_.value, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature))); @@ -1655,6 +1672,11 @@ class PostgresCopyStreamWriter { const struct ArrowBuffer& WriteBuffer() const { return buffer_.value; } + void Rewind() { + records_written_ = 0; + buffer_->size_bytes = 0; + } + private: PostgresCopyFieldTupleWriter root_writer_; struct ArrowSchema* schema_; diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index 6dc67d874f..201aa223a2 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -60,13 +60,14 @@ class PostgresCopyStreamTester { class PostgresCopyStreamWriteTester { public: ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array, - ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(writer_.Init(schema, array)); + struct ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(writer_.Init(schema)); NANOARROW_RETURN_NOT_OK(writer_.InitFieldWriters(error)); + NANOARROW_RETURN_NOT_OK(writer_.SetArray(array)); return NANOARROW_OK; } - ArrowErrorCode WriteAll(ArrowError* error = nullptr) { + ArrowErrorCode WriteAll(struct ArrowError* error) { NANOARROW_RETURN_NOT_OK(writer_.WriteHeader(error)); int result; @@ -77,8 +78,20 @@ class PostgresCopyStreamWriteTester { return result; } + ArrowErrorCode WriteArray(struct ArrowArray* array, struct ArrowError* error) { + writer_.SetArray(array); + int result; + do { + result = writer_.WriteRecord(error); + } while (result == NANOARROW_OK); + + return result; + } + const struct ArrowBuffer& WriteBuffer() const { return writer_.WriteBuffer(); } + void Rewind() { writer_.Rewind(); } + private: PostgresCopyStreamWriter writer_; }; @@ -1327,4 +1340,40 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { ASSERT_DOUBLE_EQ(data_buffer2[2], 0); } +TEST(PostgresCopyUtilsTest, PostgresCopyWriteMultiBatch) { + // Regression test for https://github.com/apache/arrow-adbc/issues/1310 + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), + NANOARROW_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + NANOARROW_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + size_t buf_size = sizeof(kTestPgCopyInteger) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); + } + + tester.Rewind(); + ASSERT_EQ(tester.WriteArray(&array.value, nullptr), ENODATA); + + buf = tester.WriteBuffer(); + // Ignore the header and footer + buf_size = sizeof(kTestPgCopyInteger) - 21; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i + 19]); + } +} + } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_util.h b/c/driver/postgresql/postgres_util.h index 1009d70b55..95e2619f10 100644 --- a/c/driver/postgresql/postgres_util.h +++ b/c/driver/postgresql/postgres_util.h @@ -166,9 +166,11 @@ struct Handle { Handle() { std::memset(&value, 0, sizeof(value)); } - ~Handle() { Releaser::Release(&value); } + ~Handle() { reset(); } Resource* operator->() { return &value; } + + void reset() { Releaser::Release(&value); } }; } // namespace adbcpq diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index eb5d24c384..8ac841d8a0 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -62,6 +62,18 @@ class PostgresQuirks : public adbc_validation::DriverQuirks { return AdbcStatementRelease(&statement.value, error); } + AdbcStatusCode DropTable(struct AdbcConnection* connection, const std::string& name, + const std::string& db_schema, + struct AdbcError* error) const override { + Handle statement; + RAISE_ADBC(AdbcStatementNew(connection, &statement.value, error)); + + std::string query = "DROP TABLE IF EXISTS \"" + db_schema + "\".\"" + name + "\""; + RAISE_ADBC(AdbcStatementSetSqlQuery(&statement.value, query.c_str(), error)); + RAISE_ADBC(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr, error)); + return AdbcStatementRelease(&statement.value, error); + } + AdbcStatusCode DropTempTable(struct AdbcConnection* connection, const std::string& name, struct AdbcError* error) const override { Handle statement; @@ -84,6 +96,18 @@ class PostgresQuirks : public adbc_validation::DriverQuirks { return AdbcStatementRelease(&statement.value, error); } + AdbcStatusCode EnsureDbSchema(struct AdbcConnection* connection, + const std::string& name, + struct AdbcError* error) const override { + Handle statement; + RAISE_ADBC(AdbcStatementNew(connection, &statement.value, error)); + + std::string query = "CREATE SCHEMA IF NOT EXISTS \"" + name + "\""; + RAISE_ADBC(AdbcStatementSetSqlQuery(&statement.value, query.c_str(), error)); + RAISE_ADBC(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr, error)); + return AdbcStatementRelease(&statement.value, error); + } + std::string BindParameter(int index) const override { return "$" + std::to_string(index + 1); } @@ -347,7 +371,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsPrimaryKey) { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_EQ(reader.rows_affected, 0); + ASSERT_EQ(reader.rows_affected, -1); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(reader.array->release, nullptr); @@ -420,7 +444,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsForeignKey) { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_EQ(reader.rows_affected, 0); + ASSERT_EQ(reader.rows_affected, -1); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(reader.array->release, nullptr); @@ -439,7 +463,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsForeignKey) { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_EQ(reader.rows_affected, 0); + ASSERT_EQ(reader.rows_affected, -1); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(reader.array->release, nullptr); @@ -828,12 +852,6 @@ class PostgresStatementTest : public ::testing::Test, void TestSqlPrepareErrorParamCountMismatch() { GTEST_SKIP() << "Not yet implemented"; } void TestSqlPrepareGetParameterSchema() { GTEST_SKIP() << "Not yet implemented"; } void TestSqlPrepareSelectParams() { GTEST_SKIP() << "Not yet implemented"; } - void TestSqlQueryRowsAffectedDelete() { - GTEST_SKIP() << "Cannot query rows affected in delete (not implemented)"; - } - void TestSqlQueryRowsAffectedDeleteStream() { - GTEST_SKIP() << "Cannot query rows affected in delete stream (not implemented)"; - } void TestConcurrentStatements() { // TODO: refactor driver so that we read all the data as soon as @@ -1172,7 +1190,7 @@ TEST_F(PostgresStatementTest, UpdateInExecuteQuery) { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_EQ(reader.rows_affected, 0); + ASSERT_EQ(reader.rows_affected, -1); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(reader.array->release, nullptr); @@ -1187,7 +1205,7 @@ TEST_F(PostgresStatementTest, UpdateInExecuteQuery) { ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, &reader.rows_affected, &error), IsOkStatus(&error)); - ASSERT_EQ(reader.rows_affected, 0); + ASSERT_EQ(reader.rows_affected, -1); ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_EQ(reader.array->release, nullptr); diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 5206d8dc58..68fd45a944 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +// Windows +#define NOMINMAX + #include "statement.h" #include @@ -23,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -437,8 +441,6 @@ struct BindStream { case ArrowType::NANOARROW_TYPE_TIMESTAMP: { int64_t val = array_view->children[col]->buffer_views[1].data.as_int64[row]; - // 2000-01-01 00:00:00.000000 in microseconds - constexpr int64_t kPostgresTimestampEpoch = 946684800000000; bool overflow_safe = true; auto unit = bind_schema_fields[col].time_unit; @@ -474,6 +476,15 @@ struct BindStream { return ADBC_STATUS_INVALID_ARGUMENT; } + if (val < std::numeric_limits::min() + kPostgresTimestampEpoch) { + SetError(error, + "[libpq] Field #%" PRId64 " ('%s') Row #%" PRId64 + " has value '%" PRIi64 "' which would underflow", + col + 1, bind_schema->children[col]->name, row + 1, + array_view->children[col]->buffer_views[1].data.as_int64[row]); + return ADBC_STATUS_INVALID_ARGUMENT; + } + if (bind_schema_fields[col].type == ArrowType::NANOARROW_TYPE_TIMESTAMP) { const uint64_t value = ToNetworkInt64(val - kPostgresTimestampEpoch); std::memcpy(param_values[col], &value, sizeof(int64_t)); @@ -559,7 +570,12 @@ struct BindStream { AdbcStatusCode ExecuteCopy(PGconn* conn, int64_t* rows_affected, struct AdbcError* error) { if (rows_affected) *rows_affected = 0; - PGresult* result = nullptr; + + PostgresCopyStreamWriter writer; + CHECK_NA(INTERNAL, writer.Init(&bind_schema.value), error); + CHECK_NA(INTERNAL, writer.InitFieldWriters(nullptr), error); + + CHECK_NA(INTERNAL, writer.WriteHeader(nullptr), error); while (true) { Handle array; @@ -573,20 +589,9 @@ struct BindStream { } if (!array->release) break; - Handle array_view; - CHECK_NA( - INTERNAL, - ArrowArrayViewInitFromSchema(&array_view.value, &bind_schema.value, nullptr), - error); - CHECK_NA(INTERNAL, ArrowArrayViewSetArray(&array_view.value, &array.value, nullptr), - error); - - PostgresCopyStreamWriter writer; - CHECK_NA(INTERNAL, writer.Init(&bind_schema.value, &array.value), error); - CHECK_NA(INTERNAL, writer.InitFieldWriters(nullptr), error); + CHECK_NA(INTERNAL, writer.SetArray(&array.value), error); // build writer buffer - CHECK_NA(INTERNAL, writer.WriteHeader(nullptr), error); int write_result; do { write_result = writer.WriteRecord(nullptr); @@ -605,25 +610,26 @@ struct BindStream { return ADBC_STATUS_IO; } - if (PQputCopyEnd(conn, NULL) <= 0) { - SetError(error, "Error message returned by PQputCopyEnd: %s", - PQerrorMessage(conn)); - return ADBC_STATUS_IO; - } + if (rows_affected) *rows_affected += array->length; + writer.Rewind(); + } - result = PQgetResult(conn); - ExecStatusType pg_status = PQresultStatus(result); - if (pg_status != PGRES_COMMAND_OK) { - AdbcStatusCode code = - SetError(error, result, "[libpq] Failed to execute COPY statement: %s %s", - PQresStatus(pg_status), PQerrorMessage(conn)); - PQclear(result); - return code; - } + if (PQputCopyEnd(conn, NULL) <= 0) { + SetError(error, "Error message returned by PQputCopyEnd: %s", PQerrorMessage(conn)); + return ADBC_STATUS_IO; + } + PGresult* result = PQgetResult(conn); + ExecStatusType pg_status = PQresultStatus(result); + if (pg_status != PGRES_COMMAND_OK) { + AdbcStatusCode code = + SetError(error, result, "[libpq] Failed to execute COPY statement: %s %s", + PQresStatus(pg_status), PQerrorMessage(conn)); PQclear(result); - if (rows_affected) *rows_affected += array->length; + return code; } + + PQclear(result); return ADBC_STATUS_OK; } }; @@ -1315,7 +1321,18 @@ AdbcStatusCode PostgresStatement::ExecuteUpdateQuery(int64_t* rows_affected, PQclear(result); return code; } - if (rows_affected) *rows_affected = PQntuples(reader_.result_); + if (rows_affected) { + if (status == PGRES_TUPLES_OK) { + *rows_affected = PQntuples(reader_.result_); + } else { + // In theory, PQcmdTuples would work here, but experimentally it gives + // an empty string even for a DELETE. (Also, why does it return a + // string...) Possibly, it doesn't work because we use PQexecPrepared + // but the docstring is careful to specify it works on an EXECUTE of a + // prepared statement. + *rows_affected = -1; + } + } PQclear(result); return ADBC_STATUS_OK; } diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc index d30aa0a979..97d12be169 100644 --- a/c/validation/adbc_validation.cc +++ b/c/validation/adbc_validation.cc @@ -70,7 +70,9 @@ bool iequals(std::string_view s1, std::string_view s2) { // DriverQuirks AdbcStatusCode DoIngestSampleTable(struct AdbcConnection* connection, - const std::string& name, struct AdbcError* error) { + const std::string& name, + std::optional db_schema, + struct AdbcError* error) { Handle schema; Handle array; struct ArrowError na_error; @@ -84,6 +86,10 @@ AdbcStatusCode DoIngestSampleTable(struct AdbcConnection* connection, CHECK_OK(AdbcStatementNew(connection, &statement.value, error)); CHECK_OK(AdbcStatementSetOption(&statement.value, ADBC_INGEST_OPTION_TARGET_TABLE, name.c_str(), error)); + if (db_schema.has_value()) { + CHECK_OK(AdbcStatementSetOption(&statement.value, ADBC_INGEST_OPTION_TARGET_DB_SCHEMA, + db_schema->c_str(), error)); + } CHECK_OK(AdbcStatementBind(&statement.value, &array.value, &schema.value, error)); CHECK_OK(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr, error)); CHECK_OK(AdbcStatementRelease(&statement.value, error)); @@ -91,7 +97,8 @@ AdbcStatusCode DoIngestSampleTable(struct AdbcConnection* connection, } void IngestSampleTable(struct AdbcConnection* connection, struct AdbcError* error) { - ASSERT_THAT(DoIngestSampleTable(connection, "bulk_ingest", error), IsOkStatus(error)); + ASSERT_THAT(DoIngestSampleTable(connection, "bulk_ingest", std::nullopt, error), + IsOkStatus(error)); } AdbcStatusCode DriverQuirks::EnsureSampleTable(struct AdbcConnection* connection, @@ -107,7 +114,17 @@ AdbcStatusCode DriverQuirks::CreateSampleTable(struct AdbcConnection* connection if (!supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { return ADBC_STATUS_NOT_IMPLEMENTED; } - return DoIngestSampleTable(connection, name, error); + return DoIngestSampleTable(connection, name, std::nullopt, error); +} + +AdbcStatusCode DriverQuirks::CreateSampleTable(struct AdbcConnection* connection, + const std::string& name, + const std::string& schema, + struct AdbcError* error) const { + if (!supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { + return ADBC_STATUS_NOT_IMPLEMENTED; + } + return DoIngestSampleTable(connection, name, schema, error); } //------------------------------------------------------------ @@ -431,6 +448,34 @@ void ConnectionTest::TestMetadataGetTableSchema() { {"strings", NANOARROW_TYPE_STRING, NULLABLE}})); } +void ConnectionTest::TestMetadataGetTableSchemaDbSchema() { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + auto status = quirks()->EnsureDbSchema(&connection, "otherschema", &error); + if (status == ADBC_STATUS_NOT_IMPLEMENTED) { + GTEST_SKIP() << "Schema not supported"; + return; + } + ASSERT_THAT(status, IsOkStatus(&error)); + + ASSERT_THAT(quirks()->DropTable(&connection, "bulk_ingest", "otherschema", &error), + IsOkStatus(&error)); + ASSERT_THAT( + quirks()->CreateSampleTable(&connection, "bulk_ingest", "otherschema", &error), + IsOkStatus(&error)); + + Handle schema; + ASSERT_THAT(AdbcConnectionGetTableSchema(&connection, /*catalog=*/nullptr, + /*db_schema=*/"otherschema", "bulk_ingest", + &schema.value, &error), + IsOkStatus(&error)); + + ASSERT_NO_FATAL_FAILURE( + CompareSchema(&schema.value, {{"int64s", NANOARROW_TYPE_INT64, NULLABLE}, + {"strings", NANOARROW_TYPE_STRING, NULLABLE}})); +} + void ConnectionTest::TestMetadataGetTableSchemaEscaping() { if (!quirks()->supports_bulk_ingest(ADBC_INGEST_OPTION_MODE_CREATE)) { GTEST_SKIP(); diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 874d9a0584..30a20491eb 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -50,6 +50,13 @@ class DriverQuirks { return ADBC_STATUS_OK; } + virtual AdbcStatusCode DropTable(struct AdbcConnection* connection, + const std::string& name, + const std::string& db_schema, + struct AdbcError* error) const { + return ADBC_STATUS_NOT_IMPLEMENTED; + } + /// \brief Drop the given temporary table. Used by tests to reset state. virtual AdbcStatusCode DropTempTable(struct AdbcConnection* connection, const std::string& name, @@ -68,13 +75,33 @@ class DriverQuirks { const std::string& name, struct AdbcError* error) const; + /// \brief Create a schema for testing. + virtual AdbcStatusCode EnsureDbSchema(struct AdbcConnection* connection, + const std::string& name, + struct AdbcError* error) const { + return ADBC_STATUS_NOT_IMPLEMENTED; + } + + /// \brief Create a table of sample data with a fixed schema for testing. + /// + /// The table should have two columns: + /// - "int64s" with Arrow type int64. + /// - "strings" with Arrow type utf8. + virtual AdbcStatusCode CreateSampleTable(struct AdbcConnection* connection, + const std::string& name, + struct AdbcError* error) const; + /// \brief Create a table of sample data with a fixed schema for testing. /// + /// Create it in the given schema. Specify "" for the default schema. + /// Return NOT_IMPLEMENTED if not supported by this backend. + /// /// The table should have two columns: /// - "int64s" with Arrow type int64. /// - "strings" with Arrow type utf8. virtual AdbcStatusCode CreateSampleTable(struct AdbcConnection* connection, const std::string& name, + const std::string& schema, struct AdbcError* error) const; /// \brief Get the statement to create a table with a primary key, or nullopt if not @@ -197,7 +224,7 @@ class DriverQuirks { /// \brief Default catalog to use for tests virtual std::string catalog() const { return ""; } - /// \brief Default Schema to use for tests + /// \brief Default database schema to use for tests virtual std::string db_schema() const { return ""; } }; @@ -243,6 +270,7 @@ class ConnectionTest { void TestMetadataGetInfo(); void TestMetadataGetTableSchema(); + void TestMetadataGetTableSchemaDbSchema(); void TestMetadataGetTableSchemaEscaping(); void TestMetadataGetTableSchemaNotFound(); void TestMetadataGetTableTypes(); @@ -277,6 +305,9 @@ class ConnectionTest { TEST_F(FIXTURE, MetadataCurrentDbSchema) { TestMetadataCurrentDbSchema(); } \ TEST_F(FIXTURE, MetadataGetInfo) { TestMetadataGetInfo(); } \ TEST_F(FIXTURE, MetadataGetTableSchema) { TestMetadataGetTableSchema(); } \ + TEST_F(FIXTURE, MetadataGetTableSchemaDbSchema) { \ + TestMetadataGetTableSchemaDbSchema(); \ + } \ TEST_F(FIXTURE, MetadataGetTableSchemaEscaping) { \ TestMetadataGetTableSchemaEscaping(); \ } \ diff --git a/ci/conda_env_cpp_lint.txt b/ci/conda_env_cpp_lint.txt index 471ef0de75..7cc81c1e1a 100644 --- a/ci/conda_env_cpp_lint.txt +++ b/ci/conda_env_cpp_lint.txt @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -clang=14 -clang-tools=14 +clang=14.* +clang-tools=14.* diff --git a/ci/conda_env_docs.txt b/ci/conda_env_docs.txt index ff2b6df33f..42151b8d29 100644 --- a/ci/conda_env_docs.txt +++ b/ci/conda_env_docs.txt @@ -18,7 +18,7 @@ breathe doxygen # XXX(https://github.com/apache/arrow-adbc/issues/987) -furo=2023.07.26 +furo>=2023.09.10 make # Needed to install mermaid nodejs @@ -28,4 +28,5 @@ sphinx>=5.0 sphinx-autobuild sphinx-copybutton sphinx-design +sphinxext-opengraph r-pkgdown diff --git a/ci/conda_env_java.txt b/ci/conda_env_java.txt new file mode 100644 index 0000000000..2b06fdddd8 --- /dev/null +++ b/ci/conda_env_java.txt @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +maven +# maven javadocs build appears to fail under Java 21 +# maven javadocs build appears to emit the wrong stylesheet under Java 8 +openjdk=17.* diff --git a/ci/scripts/csharp_build.ps1 b/ci/scripts/csharp_build.ps1 new file mode 100644 index 0000000000..01b75bf8a0 --- /dev/null +++ b/ci/scripts/csharp_build.ps1 @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cd ..\..\csharp + +dotnet build diff --git a/ci/scripts/csharp_pack.ps1 b/ci/scripts/csharp_pack.ps1 new file mode 100644 index 0000000000..b875b60dd3 --- /dev/null +++ b/ci/scripts/csharp_pack.ps1 @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +param ( + [string]$destination=$null +) + +$loc = Get-Location + +if ($loc.ToString().ToLower().EndsWith("csharp") -eq $False) { + cd ..\..\csharp +} + +if ($destination) { + dotnet pack -c Release -o $destination +} +else { + dotnet pack -c Release +} diff --git a/ci/scripts/csharp_smoketest.ps1 b/ci/scripts/csharp_smoketest.ps1 new file mode 100644 index 0000000000..da5edd7743 --- /dev/null +++ b/ci/scripts/csharp_smoketest.ps1 @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +param ( + [string]$destination=".\packages" +) + +$ErrorActionPreference = "Stop" + +Write-Host "This script performs the following steps:" +Write-Host " - Runs unit tests against all projects" +Write-Host " - Packages everything to NuGet packages in $destination" +Write-Host " - Runs smoke tests using the NuGet packages" + +Write-Host "" + +cd $PSScriptRoot +cd ..\..\csharp + +Write-Host "Running dotnet test" + +dotnet test + +Write-Host "Running dotnet pack" + +$loc = Get-Location + +Write-Host $loc + +Invoke-Expression "powershell -executionpolicy bypass -File $PSScriptRoot\csharp_pack.ps1 -destination $destination" + +Write-Host "Running smoke tests" + +cd test\SmokeTests + +dotnet test diff --git a/ci/scripts/docs_build.sh b/ci/scripts/docs_build.sh index 261786b99a..271dec459f 100755 --- a/ci/scripts/docs_build.sh +++ b/ci/scripts/docs_build.sh @@ -25,8 +25,25 @@ main() { doxygen popd + pushd "$source_dir/java" + mvn site + popd + pushd "$source_dir/docs" + # The project name/version don't really matter here. + python "$source_dir/docs/source/ext/javadoc_inventory.py" \ + "ADBC" \ + "version" \ + "$source_dir/java/target/site/apidocs" \ + "java/api" + + # We need to determine the base URL without knowing it... + # Inject a dummy URL here, and fix it up in website_build.sh + export ADBC_INTERSPHINX_MAPPING_java_adbc="http://javadocs.home.arpa/;$source_dir/java/target/site/apidocs/objects.inv" + make html + rm -rf "$source_dir/docs/build/html/java/api" + cp -r "$source_dir/java/target/site/apidocs" "$source_dir/docs/build/html/java/api" make doctest popd diff --git a/ci/scripts/website_build.sh b/ci/scripts/website_build.sh index 480c57c908..1f7f146e1c 100755 --- a/ci/scripts/website_build.sh +++ b/ci/scripts/website_build.sh @@ -47,16 +47,24 @@ main() { fi local -r regex='^([0-9]+\.[0-9]+\.[0-9]+)$' + local directory="main" if [[ "${new_version}" =~ $regex ]]; then cp -r "${docs}" "${site}/${new_version}" git -C "${site}" add --force "${new_version}" + directory="${new_version}" else # Assume this is dev docs rm -rf "${site}/main" cp -r "${docs}" "${site}/main" git -C "${site}" add --force "main" + directory="main" fi + # Fix up lazy Intersphinx links (see docs_build.sh) + # Assumes GNU sed + sed -i "s|http://javadocs.home.arpa/|https://arrow.apache.org/adbc/${directory}/|g" $(grep -Rl javadocs.home.arpa "${site}/${directory}/") + git -C "${site}" add --force "${directory}" + # Copy the version script and regenerate the version list # The versions get embedded into the JavaScript file to save a roundtrip rm -f "${site}/version.js" diff --git a/csharp/src/Apache.Arrow.Adbc/Apache.Arrow.Adbc.csproj b/csharp/src/Apache.Arrow.Adbc/Apache.Arrow.Adbc.csproj index b64b32d778..358eca01ae 100644 --- a/csharp/src/Apache.Arrow.Adbc/Apache.Arrow.Adbc.csproj +++ b/csharp/src/Apache.Arrow.Adbc/Apache.Arrow.Adbc.csproj @@ -1,21 +1,21 @@ - - - - netstandard2.0;net6.0 - true - readme.md - - - - - - - - - - true - \ - PreserveNewest - - - + + + + netstandard2.0;net6.0 + true + readme.md + + + + + + + + + + true + \ + PreserveNewest + + + diff --git a/csharp/src/Client/Apache.Arrow.Adbc.Client.csproj b/csharp/src/Client/Apache.Arrow.Adbc.Client.csproj index f44be6f229..ced800c665 100644 --- a/csharp/src/Client/Apache.Arrow.Adbc.Client.csproj +++ b/csharp/src/Client/Apache.Arrow.Adbc.Client.csproj @@ -1,16 +1,16 @@ - - - netstandard2.0;net6.0 - readme.md - - - - - - - true - \ - PreserveNewest - - - + + + netstandard2.0;net6.0 + readme.md + + + + + + + true + \ + PreserveNewest + + + diff --git a/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj b/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj index a7451c7bd1..6d50c3efc8 100644 --- a/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj +++ b/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj @@ -1,22 +1,22 @@ - - - netstandard2.0;net6.0 - readme.md - - - - - - - - - - - - - true - \ - PreserveNewest - - - + + + netstandard2.0;net6.0 + readme.md + + + + + + + + + + + + + true + \ + PreserveNewest + + + diff --git a/csharp/src/Drivers/FlightSql/Apache.Arrow.Adbc.Drivers.FlightSql.csproj b/csharp/src/Drivers/FlightSql/Apache.Arrow.Adbc.Drivers.FlightSql.csproj index b6a7fb4c68..61a38ffb2b 100644 --- a/csharp/src/Drivers/FlightSql/Apache.Arrow.Adbc.Drivers.FlightSql.csproj +++ b/csharp/src/Drivers/FlightSql/Apache.Arrow.Adbc.Drivers.FlightSql.csproj @@ -1,12 +1,12 @@ - - - netstandard2.0;net6.0 - - - - - - - - - + + + netstandard2.0;net6.0 + + + + + + + + + diff --git a/csharp/src/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Drivers.Interop.Snowflake.csproj b/csharp/src/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Drivers.Interop.Snowflake.csproj index ece287a7c9..815418f5a3 100644 --- a/csharp/src/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Drivers.Interop.Snowflake.csproj +++ b/csharp/src/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Drivers.Interop.Snowflake.csproj @@ -1,39 +1,39 @@ - - - netstandard2.0;net472;net6.0 - readme.md - - - - - - - - - - true - lib\netstandard2.0 - PreserveNewest - - - true - lib\net472 - PreserveNewest - - - true - lib\net6.0 - PreserveNewest - - - - - true - \ - PreserveNewest - - - - - - + + + netstandard2.0;net472;net6.0 + readme.md + + + + + + + + + + true + lib\netstandard2.0 + PreserveNewest + + + true + lib\net472 + PreserveNewest + + + true + lib\net6.0 + PreserveNewest + + + + + true + \ + PreserveNewest + + + + + + diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Apache.Arrow.Adbc.Tests.csproj b/csharp/test/Apache.Arrow.Adbc.Tests/Apache.Arrow.Adbc.Tests.csproj index eb53f350ac..7deb5dc0b5 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Apache.Arrow.Adbc.Tests.csproj +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Apache.Arrow.Adbc.Tests.csproj @@ -1,23 +1,23 @@ - - - - net472;net6.0 - disable - false - true - True - - - - - - - - - - - - - - - + + + + net472;net6.0 + disable + false + true + True + + + + + + + + + + + + + + + diff --git a/csharp/test/Drivers/BigQuery/Apache.Arrow.Adbc.Tests.Drivers.BigQuery.csproj b/csharp/test/Drivers/BigQuery/Apache.Arrow.Adbc.Tests.Drivers.BigQuery.csproj index d4c2fb541b..d4aba6faf6 100644 --- a/csharp/test/Drivers/BigQuery/Apache.Arrow.Adbc.Tests.Drivers.BigQuery.csproj +++ b/csharp/test/Drivers/BigQuery/Apache.Arrow.Adbc.Tests.Drivers.BigQuery.csproj @@ -1,29 +1,29 @@ - - - net472;net6.0 - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - - - - - PreserveNewest - - - Never - - - + + + net472;net6.0 + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + + + + PreserveNewest + + + Never + + + diff --git a/csharp/test/Drivers/FlightSql/Apache.Arrow.Adbc.Tests.Drivers.FlightSql.csproj b/csharp/test/Drivers/FlightSql/Apache.Arrow.Adbc.Tests.Drivers.FlightSql.csproj index 7647af2d76..4f72495c4b 100644 --- a/csharp/test/Drivers/FlightSql/Apache.Arrow.Adbc.Tests.Drivers.FlightSql.csproj +++ b/csharp/test/Drivers/FlightSql/Apache.Arrow.Adbc.Tests.Drivers.FlightSql.csproj @@ -1,28 +1,28 @@ - - - net472;net6.0 - disable - False - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - - - - - Never - - - + + + net472;net6.0 + disable + False + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + + + + Never + + + diff --git a/csharp/test/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Tests.Drivers.Interop.Snowflake.csproj b/csharp/test/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Tests.Drivers.Interop.Snowflake.csproj index a7dba73d0e..99f13c3d85 100644 --- a/csharp/test/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Tests.Drivers.Interop.Snowflake.csproj +++ b/csharp/test/Drivers/Interop/Snowflake/Apache.Arrow.Adbc.Tests.Drivers.Interop.Snowflake.csproj @@ -1,27 +1,27 @@ - - - net472;net6.0 - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - - - PreserveNewest - - - Never - - - + + + net472;net6.0 + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + + PreserveNewest + + + Never + + + diff --git a/csharp/test/SmokeTests/Apache.Arrow.Adbc.SmokeTests/Apache.Arrow.Adbc.SmokeTests.csproj b/csharp/test/SmokeTests/Apache.Arrow.Adbc.SmokeTests/Apache.Arrow.Adbc.SmokeTests.csproj index 08ad0ea09d..a7a7a21303 100644 --- a/csharp/test/SmokeTests/Apache.Arrow.Adbc.SmokeTests/Apache.Arrow.Adbc.SmokeTests.csproj +++ b/csharp/test/SmokeTests/Apache.Arrow.Adbc.SmokeTests/Apache.Arrow.Adbc.SmokeTests.csproj @@ -1,44 +1,44 @@ - - - - - net472;net6.0 - disable - false - true - True - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + net472;net6.0 + disable + false + true + True + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/csharp/test/SmokeTests/BigQuery/Apache.Arrow.Adbc.SmokeTests.Drivers.BigQuery.csproj b/csharp/test/SmokeTests/BigQuery/Apache.Arrow.Adbc.SmokeTests.Drivers.BigQuery.csproj index d032501e43..6a323fd590 100644 --- a/csharp/test/SmokeTests/BigQuery/Apache.Arrow.Adbc.SmokeTests.Drivers.BigQuery.csproj +++ b/csharp/test/SmokeTests/BigQuery/Apache.Arrow.Adbc.SmokeTests.Drivers.BigQuery.csproj @@ -1,33 +1,33 @@ - - - - net472;net6.0 - - - - - - - - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - PreserveNewest - - - - - - + + + + net472;net6.0 + + + + + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + PreserveNewest + + + + + + diff --git a/csharp/test/SmokeTests/FlightSql/Apache.Arrow.Adbc.SmokeTests.Drivers.FlightSql.csproj b/csharp/test/SmokeTests/FlightSql/Apache.Arrow.Adbc.SmokeTests.Drivers.FlightSql.csproj index 7647af2d76..e8dfd14445 100644 --- a/csharp/test/SmokeTests/FlightSql/Apache.Arrow.Adbc.SmokeTests.Drivers.FlightSql.csproj +++ b/csharp/test/SmokeTests/FlightSql/Apache.Arrow.Adbc.SmokeTests.Drivers.FlightSql.csproj @@ -1,28 +1,28 @@ - - - net472;net6.0 - disable - False - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - - - - - Never - - - + + + net472;net6.0 + disable + False + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + + + + Never + + + diff --git a/csharp/test/SmokeTests/Interop/Snowflake/Apache.Arrow.Adbc.SmokeTests.Drivers.Interop.Snowflake.csproj b/csharp/test/SmokeTests/Interop/Snowflake/Apache.Arrow.Adbc.SmokeTests.Drivers.Interop.Snowflake.csproj index fa722ea644..ced24e6426 100644 --- a/csharp/test/SmokeTests/Interop/Snowflake/Apache.Arrow.Adbc.SmokeTests.Drivers.Interop.Snowflake.csproj +++ b/csharp/test/SmokeTests/Interop/Snowflake/Apache.Arrow.Adbc.SmokeTests.Drivers.Interop.Snowflake.csproj @@ -1,33 +1,33 @@ - - - - net472;net6.0 - - - - - - - - - - - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - - - PreserveNewest - - - - - - + + + + net472;net6.0 + + + + + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + PreserveNewest + + + + + + diff --git a/docker-compose.yml b/docker-compose.yml index a25fb6653b..89394e598f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,7 +28,7 @@ services: volumes: - .:/adbc:delegated command: | - /bin/bash -c 'git config --global --add safe.directory /adbc && source /opt/conda/etc/profile.d/conda.sh && mamba create -y -n adbc -c conda-forge go --file /adbc/ci/conda_env_cpp.txt --file /adbc/ci/conda_env_docs.txt --file /adbc/ci/conda_env_python.txt && conda activate adbc && env ADBC_USE_ASAN=0 ADBC_USE_UBSAN=0 /adbc/ci/scripts/cpp_build.sh /adbc /adbc/build && env CGO_ENABLED=1 /adbc/ci/scripts/go_build.sh /adbc /adbc/build && /adbc/ci/scripts/python_build.sh /adbc /adbc/build && /adbc/ci/scripts/r_build.sh /adbc && /adbc/ci/scripts/docs_build.sh /adbc' + /bin/bash -c 'git config --global --add safe.directory /adbc && source /opt/conda/etc/profile.d/conda.sh && mamba create -y -n adbc -c conda-forge go --file /adbc/ci/conda_env_cpp.txt --file /adbc/ci/conda_env_docs.txt --file /adbc/ci/conda_env_java.txt --file /adbc/ci/conda_env_python.txt && conda activate adbc && env ADBC_USE_ASAN=0 ADBC_USE_UBSAN=0 /adbc/ci/scripts/cpp_build.sh /adbc /adbc/build && env CGO_ENABLED=1 /adbc/ci/scripts/go_build.sh /adbc /adbc/build && /adbc/ci/scripts/python_build.sh /adbc /adbc/build && /adbc/ci/scripts/r_build.sh /adbc && /adbc/ci/scripts/docs_build.sh /adbc' ############################ Java JARs ###################################### diff --git a/docs/source/_static/banner.png b/docs/source/_static/banner.png new file mode 100644 index 0000000000..2f58c223e2 Binary files /dev/null and b/docs/source/_static/banner.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index b5e45a09c9..c1b715e2d0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import os import sys from pathlib import Path @@ -24,7 +25,10 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "ADBC" -copyright = "2022, Apache Arrow Developers" +copyright = """2022–2024 The Apache Software Foundation. Apache Arrow, Arrow, +Apache, the Apache feather logo, and the Apache Arrow project logo are either +registered trademarks or trademarks of The Apache Software Foundation in the +United States and other countries.""" author = "the Apache Arrow Developers" release = "0.9.0 (dev)" # Needed to generate version switcher @@ -35,7 +39,10 @@ exclude_patterns = [] extensions = [ + # recipe directive "adbc_cookbook", + # generic directives to enable intersphinx for java + "adbc_java_domain", "breathe", "numpydoc", "sphinx.ext.autodoc", @@ -43,6 +50,7 @@ "sphinx.ext.intersphinx", "sphinx_copybutton", "sphinx_design", + "sphinxext.opengraph", ] templates_path = ["_templates"] @@ -90,7 +98,10 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_css_files = ["css/custom.css"] +html_css_files = [ + "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css", + "css/custom.css", +] html_static_path = ["_static"] html_theme = "furo" html_theme_options = { @@ -107,6 +118,35 @@ "arrow": ("https://arrow.apache.org/docs/", None), } +# Add env vars like ADBC_INTERSPHINX_MAPPING_adbc_java = url;path +# to inject more mappings + + +def _find_intersphinx_mappings(): + prefix = "ADBC_INTERSPHINX_MAPPING_" + for key, val in os.environ.items(): + if key.startswith(prefix): + name = key[len(prefix) :] + url, _, path = val.partition(";") + print("[ADBC] Found Intersphinx mapping", name) + intersphinx_mapping[name] = (url, path) + # "adbc_java": ( + # "http://localhost:8000/", + # "/home/lidavidm/Code/arrow-adbc/java/target/site/apidocs/objects.inv", + # ), + + +_find_intersphinx_mappings() + + # -- Options for numpydoc ---------------------------------------------------- numpydoc_class_members_toctree = False + +# -- Options for sphinxext.opengraph ----------------------------------------- + +if "dev" in release: + ogp_site_url = "https://arrow.apache.org/adbc/main/" +else: + ogp_site_url = f"https://arrow.apache.org/adbc/{release}/" +ogp_image = "_static/banner.png" diff --git a/docs/source/csharp/index.rst b/docs/source/csharp/index.rst new file mode 100644 index 0000000000..05681563c4 --- /dev/null +++ b/docs/source/csharp/index.rst @@ -0,0 +1,26 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +======= +C#/.NET +======= + +The ADBC C# libraries are under development, supporting: + +- A native driver to Google BigQuery +- A way to import/export native (C/C++) drivers +- Bindings to the Snowflake driver diff --git a/docs/source/driver/duckdb.rst b/docs/source/driver/duckdb.rst index ab109b81de..410331c39f 100644 --- a/docs/source/driver/duckdb.rst +++ b/docs/source/driver/duckdb.rst @@ -19,7 +19,7 @@ DuckDB Support ============== -**Available for:** C/C++, GLib/Ruby, Go, Python +**Available for:** C/C++, GLib/Ruby, Go, Python, R `DuckDB`_ provides ADBC support since `version 0.8.0 `_. @@ -87,6 +87,18 @@ ADBC support in DuckDB requires the driver manager. See the `DuckDB Python documentation`_. + .. tab-item:: R + :sync: r + + You must have DuckDB 0.9.1 or higher. + + .. code-block:: r + + # install.packages("duckdb") + library(adbcdrivermanager) + db <- adbc_database_init(duckdb::duckdb_adbc(), ...) + + .. _DuckDB C++ documentation: https://duckdb.org/docs/api/adbc.html#c .. _DuckDB Python documentation: https://duckdb.org/docs/api/adbc.html#python diff --git a/docs/source/driver/flight_sql.rst b/docs/source/driver/flight_sql.rst index db650620a2..aca95d86cd 100644 --- a/docs/source/driver/flight_sql.rst +++ b/docs/source/driver/flight_sql.rst @@ -19,7 +19,7 @@ Flight SQL Driver ================= -**Available for:** C/C++, GLib/Ruby, Go, Java, Python +**Available for:** C/C++, GLib/Ruby, Go, Java, Python, R The Flight SQL Driver provides access to any database implementing a :doc:`arrow:format/FlightSql` compatible endpoint. diff --git a/docs/source/driver/installation.rst b/docs/source/driver/installation.rst index 44f3235a18..bdd1f13a03 100644 --- a/docs/source/driver/installation.rst +++ b/docs/source/driver/installation.rst @@ -145,18 +145,21 @@ From conda-forge_: R = -Install the appropriate driver package from GitHub: +Install the appropriate driver package from CRAN: + +.. code-block:: r + + install.packages("adbcsqlite") + install.packages("adbcpostgresql") + install.packages("duckdb") + +Drivers not yet available on CRAN can be installed from GitHub: .. code-block:: r # install.packages("pak") pak::pak("apache/arrow-adbc/r/adbcflightsql") - pak::pak("apache/arrow-adbc/r/adbcpostgresql") pak::pak("apache/arrow-adbc/r/adbcsnowflake") - pak::pak("apache/arrow-adbc/r/adbcsqlite") - -Installation of stable releases from CRAN is anticipated following the -release of ADBC Libraries 0.6.0. Ruby ==== diff --git a/docs/source/driver/postgresql.rst b/docs/source/driver/postgresql.rst index 7ec5b7f589..ddf9115d76 100644 --- a/docs/source/driver/postgresql.rst +++ b/docs/source/driver/postgresql.rst @@ -31,7 +31,7 @@ overall approach. .. _libpq: https://www.postgresql.org/docs/current/libpq.html .. _pgeon: https://github.com/0x0L/pgeon -.. note:: The PostgreSQL driver is experimental. +.. note:: The PostgreSQL driver is in beta. Performance/optimization and support for complex types and different ADBC features is still ongoing. @@ -75,8 +75,7 @@ Installation .. code-block:: r - # install.packages("pak") - pak::pak("apache/arrow-adbc/r/adbcpostgresql") + install.packages("adbcpostgresql") Usage ===== @@ -188,6 +187,134 @@ PostgreSQL allows defining new types at runtime, so the driver must build a mapping of available types. This is currently done once at startup. -Type support is currently limited. Parameter binding and bulk -ingestion support int16, int32, int64, and string. Reading result -sets is limited to int32, int64, float, double, and string. +Type support is currently limited depending on the type and whether it is +being read or written. + +.. list-table:: Arrow type to PostgreSQL type mapping + :header-rows: 1 + + * - Arrow Type + - As Bind Parameter + - In Bulk Ingestion [#bulk-ingestion]_ + + * - binary + - BYTEA + - BYTEA + + * - bool + - BOOLEAN + - BOOLEAN + + * - date32 + - DATE + - DATE + + * - date64 + - ❌ + - ❌ + + * - dictionary + - (as unpacked type) + - (as unpacked type, only for binary/string) + + * - duration + - INTERVAL + - INTERVAL + + * - float32 + - REAL + - REAL + + * - float64 + - DOUBLE PRECISION + - DOUBLE PRECISION + + * - int8 + - SMALLINT + - SMALLINT + + * - int16 + - SMALLINT + - SMALLINT + + * - int32 + - INTEGER + - INTEGER + + * - int64 + - BIGINT + - BIGINT + + * - large_binary + - ❌ + - ❌ + + * - large_string + - TEXT + - TEXT + + * - month_day_nano_interval + - INTERVAL + - INTERVAL + + * - string + - TEXT + - TEXT + + * - timestamp + - TIMESTAMP [#timestamp]_ + - TIMESTAMP/TIMESTAMP WITH TIMEZONE + +.. list-table:: PostgreSQL type to Arrow type mapping + :header-rows: 1 + + * - PostgreSQL Type + - In Result Set + + * - ARRAY + - list + * - BIGINT + - int64 + * - BINARY + - binary + * - BOOLEAN + - bool + * - CHAR + - utf8 + * - DATE + - date32 + * - DOUBLE PRECISION + - float64 + * - INTEGER + - int32 + * - INTERVAL + - month_day_nano_interval + * - NUMERIC + - utf8 [#numeric-utf8]_ + * - REAL + - float32 + * - SMALLINT + - int16 + * - TEXT + - utf8 + * - TIME + - time64 + * - TIMESTAMP WITH TIME ZONE + - timestamp[unit, UTC] + * - TIMESTAMP WITHOUT TIME ZONE + - timestamp[unit] + * - VARCHAR + - utf8 + +.. [#bulk-ingestion] This is the data type used when creating/appending to a + table from Arrow data via the bulk ingestion feature. + +.. [#numeric-utf8] NUMERIC types are read as the string representation of the + value, because the PostgreSQL NUMERIC type cannot be + losslessly converted to the Arrow decimal types. + +.. [#timestamp] When binding a timestamp value, the time zone (if present) is + ignored. The value will be converted to microseconds and + adjusted to the PostgreSQL epoch (2000-01-01) and so may + overflow/underflow; an error will be returned if this would be + the case. diff --git a/docs/source/driver/sqlite.rst b/docs/source/driver/sqlite.rst index 6ec4a02b42..30e7d32b67 100644 --- a/docs/source/driver/sqlite.rst +++ b/docs/source/driver/sqlite.rst @@ -65,10 +65,9 @@ Installation .. tab-item:: R :sync: r - .. code-block:: shell + .. code-block:: r - # install.packages("pak") - pak::pak("apache/arrow-adbc/r/adbcsqlite") + install.packages("adbcsqlite") Usage ===== @@ -211,7 +210,7 @@ Example .. code-block:: go - # TODO + // TODO .. tab-item:: Python :sync: python diff --git a/docs/source/driver/status.rst b/docs/source/driver/status.rst index 64c50532b3..d295bc3f7c 100644 --- a/docs/source/driver/status.rst +++ b/docs/source/driver/status.rst @@ -34,6 +34,11 @@ Implementation Status - Implementation Language - Status + * - BigQuery + - C# + - C# + - Experimental + * - Flight SQL (Go) - C, Go - Go @@ -64,11 +69,6 @@ Implementation Status - Go - Experimental - * - BigQuery - - C# - - C# - - Experimental - .. [#supported-languages] C drivers are usable from Go, Python, and Ruby as well. Feature Support @@ -177,10 +177,10 @@ Update Queries - Y * - PostgreSQL + - N/A - Y - Y - Y - - N/A - Y * - SQLite diff --git a/docs/source/ext/adbc_java_domain.py b/docs/source/ext/adbc_java_domain.py new file mode 100644 index 0000000000..186141fc84 --- /dev/null +++ b/docs/source/ext/adbc_java_domain.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""A basic Java domain for Sphinx.""" + +import typing + +from sphinx.application import Sphinx + + +def setup(app: Sphinx) -> dict[str, typing.Any]: + # XXX: despite documentation, this is added to 'std' domain not 'rst' + # domain (look at the source) + app.add_object_type( + "javatype", + "jtype", + objname="Java Type", + ) + app.add_object_type( + "javamember", + "jmember", + objname="Java Member", + ) + app.add_object_type( + "javapackage", + "jpackage", + objname="Java Package", + ) + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/source/ext/javadoc_inventory.py b/docs/source/ext/javadoc_inventory.py new file mode 100644 index 0000000000..7c2fbcf255 --- /dev/null +++ b/docs/source/ext/javadoc_inventory.py @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Generate a Sphinx inventory for a Javadoc site.""" + +from __future__ import annotations + +import argparse +import json +import typing +import urllib.parse +from pathlib import Path + +import sphinx.util.inventory + +# XXX: we're taking advantage of duck typing to do stupid things here. + + +class FakeEnv(typing.NamedTuple): + project: str + version: str + + +class FakeObject(typing.NamedTuple): + # Looks like this + # name domainname:typ prio uri dispname + name: str + # written as '-' if equal to name + dispname: str + # member, doc, etc + typ: str + # passed through builder.get_target_uri + docname: str + # not including the # + anchor: str + # written, but never used + prio: str + + +class FakeDomain(typing.NamedTuple): + objects: list[FakeObject] + + def get_objects(self): + return self.objects + + +class FakeBuildEnvironment(typing.NamedTuple): + config: FakeEnv + domains: dict[str, FakeDomain] + + +class FakeBuilder: + def get_target_uri(self, docname: str) -> str: + return docname + + +def extract_index(data: str, prelude: str) -> list: + epilogue = ";updateSearchResults();" + if not data.startswith(prelude): + raise ValueError( + f"Cannot parse search index; expected {prelude!r} but found {data[:50]!r}" + ) + if data.endswith(epilogue): + data = data[len(prelude) : -len(epilogue)] + else: + # Some JDK versions appear to generate without the epilogue + data = data[len(prelude) :] + return json.loads(data) + + +def make_fake_domains(root: Path, base_url: str) -> dict[str, FakeDomain]: + if not base_url.endswith("/"): + base_url += "/" + + # Scrape the search index generated by Javadoc + # https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/IndexItem.java#L515 + # "p" is containing package + # "m" is containing module + # "c" is containing class + # "l" is label + # "u" is the URL anchor + + with open(root / "type-search-index.js") as f: + data = extract_index(f.read(), "typeSearchIndex = ") + with open(root / "member-search-index.js") as f: + data.extend(extract_index(f.read(), "memberSearchIndex = ")) + with open(root / "package-search-index.js") as f: + data.extend(extract_index(f.read(), "packageSearchIndex = ")) + + domains = { + "std": FakeDomain(objects=[]), + } + + for item in data: + if "p" not in item: + # Non-code item (package, or index) + if "All " in item["l"]: + # Ignore indices ("All Packages") + continue + # This is a package + name = item["l"] + url = f"{item['l'].replace('.', '/')}/package-summary.html" + anchor = "" + typ = "javapackage" + domain = "std" + elif "c" in item: + # This is a class member + name = f"{item['p']}.{item['c']}#{item['l']}" + url = f"{item['p'].replace('.', '/')}/{item['c']}.html" + anchor = item["u"] if "u" in item else item["l"] + typ = "javamember" + domain = "std" + else: + # This is a class/interface + name = f"{item['p']}.{item['l']}" + url = f"{item['p'].replace('.', '/')}/{item['l']}.html" + anchor = "" + typ = "javatype" + domain = "std" + + url = urllib.parse.urljoin(base_url, url) + domains[domain].objects.append( + FakeObject( + name=name, + dispname=name, + typ=typ, + docname=url, + anchor=anchor, + prio=1, + ) + ) + + return domains + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("project", help="Project name.") + parser.add_argument("version", help="Project version.") + parser.add_argument("path", type=Path, help="Path to the generated Javadocs.") + parser.add_argument("url", help="Eventual base URL of the Javadocs.") + + args = parser.parse_args() + + domains = make_fake_domains(args.path, args.url) + config = FakeEnv(project=args.project, version=args.version) + env = FakeBuildEnvironment(config=config, domains=domains) + + output = args.path / "objects.inv" + sphinx.util.inventory.InventoryFile.dump( + str(output), + env, + FakeBuilder(), + ) + print("Wrote", output) + + +if __name__ == "__main__": + main() diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 803b54f56b..43bc889b96 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -30,7 +30,7 @@ ADBC is: For example, result sets of queries in ADBC are all returned as streams of Arrow data, not row-by-row. - A set of implementations of that API in different languages (C/C++, - Go, Java, Python, and Ruby) that target different databases + C#/.NET, Go, Java, Python, and Ruby) that target different databases (e.g. PostgreSQL, SQLite, any database supporting Flight SQL). Why not just use JDBC/ODBC? diff --git a/docs/source/format/specification.rst b/docs/source/format/specification.rst index 19b73010cb..d1c91ea120 100644 --- a/docs/source/format/specification.rst +++ b/docs/source/format/specification.rst @@ -23,7 +23,9 @@ This document summarizes the general featureset. - For C/C++ details, see :doc:`adbc.h <../../cpp/api/adbc>`. - For Go details, see the `source `__. -- For Java details, see the `source `__. +- For Java details, see the `source + `__, particularly + the package :jpackage:`org.apache.arrow.adbc.core`. Databases ========= @@ -34,7 +36,7 @@ provides a place to hold ownership of the in-memory database. - C/C++: :cpp:class:`AdbcDatabase` - Go: ``Driver`` -- Java: ``org.apache.arrow.adbc.core.AdbcDatabase`` +- Java: :jtype:`org.apache.arrow.adbc.core.AdbcDatabase` Connections =========== @@ -43,7 +45,7 @@ A connection is a single, logical connection to a database. - C/C++: :cpp:class:`AdbcConnection` - Go: ``Connection`` -- Java: ``org.apache.arrow.adbc.core.AdbcConnection`` +- Java: :jtype:`org.apache.arrow.adbc.core.AdbcConnection` Autocommit ---------- @@ -55,7 +57,7 @@ implementations will support this. - C/C++: :c:macro:`ADBC_CONNECTION_OPTION_AUTOCOMMIT` - Go: ``OptionKeyAutoCommit`` -- Java: ``org.apache.arrow.adbc.core.AdbcConnection#setAutoCommit(boolean)`` +- Java: :jmember:`org.apache.arrow.adbc.core.AdbcConnection#setAutoCommit(boolean)` Metadata -------- diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst new file mode 100644 index 0000000000..f54c63b290 --- /dev/null +++ b/docs/source/glossary.rst @@ -0,0 +1,55 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +======== +Glossary +======== + +.. glossary:: + + client API + The API that an application uses to interact with a database. May + abstract over the underlying :term:`wire protocol` and other details. + For example, ADBC, JDBC, ODBC. + + driver + A library that implements a :term:`client API` using a :term:`wire + protocol`. For example, the ADBC PostgreSQL driver exposes the ADBC + client API and interacts with a PostgreSQL database via the PostgreSQL + wire protocol. The JDBC PostgreSQL driver uses the same wire protocol, + but exposes the JDBC client API instead. + + driver manager + A library that helps manage multiple drivers for a given client API. + For example, the JDBC driver manager can find a appropriate driver + implementation for a database URI. + + The ADBC driver manager in each language is similar. In C/C++, it can + dynamically load drivers so that applications do not have to directly + link to them. (Since all drivers expose the same API, their symbols + would collide otherwise.) In Python, it loads drivers and provides + Python bindings on top. + + wire protocol + The protocol that a database driver uses to interact with a database. + For example, :external:doc:`format/FlightSql`, the `PostgreSQL wire + protocol`_, or `Tabular Data Stream`_ (Microsoft SQL Server). Generally + not directly used by an application. + +.. _Arrow Flight SQL: https://arrow.apache.org/docs/format/FlightSql.html +.. _PostgreSQL wire protocol: https://www.postgresql.org/docs/current/protocol.html +.. _Tabular Data Stream: https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-tds/b46a581a-39de-4745-b076-ec4dbb7d13ec diff --git a/docs/source/index.rst b/docs/source/index.rst index 69551b0b7e..494ccfc711 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,25 +15,208 @@ .. specific language governing permissions and limitations .. under the License. +:sd_hide_title: + ================= Apache Arrow ADBC ================= -To get started, choose a language and follow the Quickstart page. +.. div:: + :style: border-bottom: 1px solid var(--color-foreground-border); + + .. grid:: + :margin: 4 3 0 0 + + .. grid-item:: + :columns: 12 12 12 12 + :class: sd-fs-2 + + ADBC: Arrow Database Connectivity + + .. grid-item:: + :columns: 12 12 12 12 + :class: sd-fs-4 + + **Cross-language**, **Arrow-native** database access. + +ADBC is a set of APIs and libraries for Arrow-native access to databases. +Execute SQL and Substrait_ queries, query database catalogs, and more, all +using Arrow data to eliminate unnecessary data copies, speed up access, and +make it more convenient to build analytical applications. + +.. _Substrait: https://substrait.io/ + +.. div:: + + .. grid:: + :margin: 4 4 0 0 + :gutter: 1 + + .. grid-item-card:: + :columns: 12 12 4 4 + + Quickstart + ^^^ + + Get started with simple examples in your language of choice. + + +++ + + .. button-ref:: cpp/quickstart + :ref-type: doc + :color: secondary + :expand: + + C/C++ + + .. button-link:: https://pkg.go.dev/github.com/apache/arrow-adbc/go/adbc + :color: secondary + :expand: + + Go + + .. button-ref:: java/quickstart + :ref-type: doc + :color: secondary + :expand: + + Java + + .. button-ref:: python/quickstart + :ref-type: doc + :color: secondary + :expand: + + Python + + .. button-ref:: r/index + :ref-type: doc + :color: secondary + :expand: + + R + + .. grid-item-card:: + :columns: 12 4 4 4 + + Specification + ^^^ + + Learn about the underlying API specification. + + +++ + + .. button-link:: https://arrow.apache.org/blog/2023/01/05/introducing-arrow-adbc/ + :color: secondary + :expand: + + Introducing ADBC :octicon:`cross-reference` -To learn more about ADBC, see the `introductory blog post -`_. + .. button-ref:: format/specification + :ref-type: doc + :color: secondary + :expand: + + Specification + + .. button-ref:: faq + :ref-type: doc + :color: secondary + :expand: + + FAQ + + .. button-ref:: glossary + :ref-type: doc + :color: secondary + :expand: + + Glossary + + .. grid-item-card:: + :columns: 12 4 4 4 + + Development + ^^^ + + Report bugs, ask questions, and contribute to Apache Arrow. + + +++ + + .. button-link:: https://github.com/apache/arrow-adbc/issues + :color: secondary + :expand: + + :fab:`github` Issues/Questions + + .. button-link:: https://arrow.apache.org/community/ + :color: secondary + :expand: + + Mailing List :octicon:`cross-reference` + + .. button-link:: https://github.com/apache/arrow-adbc/blob/main/CONTRIBUTING.md + :color: secondary + :expand: + + Contributing :octicon:`cross-reference` + +Why ADBC? +========= + +.. grid:: 1 2 2 2 + :margin: 4 4 0 0 + :gutter: 1 + + .. grid-item-card:: Arrow-native + :link: https://arrow.apache.org/ + + Execute queries and get back results in Arrow format, eliminating extra + data copies for Arrow-native backends. + + +++ + Learn about Apache Arrow + + .. grid-item-card:: Backend-agnostic + :link: driver/status + :link-type: doc + + Connect to all kinds of databases, even ones that aren't Arrow-native. + ADBC drivers optimize conversion to/from Arrow where required, saving + work for developers. + + +++ + See Supported Drivers + + .. grid-item-card:: Cross-language + + Work in C/C++, C#, Go, Java, Python, R, Ruby, and more. + + .. grid-item-card:: Full-featured + + Execute SQL and Substrait, query database catalogs, inspect table + schemas, and more. ADBC handles common tasks without having to pull in + another database client. + + .. grid-item-card:: Language-native + + Use language-native APIs that you're already familiar with, like DBAPI + in Python, ``database/sql`` in Go, or DBI in R. .. toctree:: :maxdepth: 1 + :hidden: faq + glossary .. toctree:: :maxdepth: 1 :caption: Supported Environments + :hidden: C/C++ + C#/.NET Go Java Python @@ -42,6 +225,7 @@ To learn more about ADBC, see the `introductory blog post .. toctree:: :maxdepth: 1 :caption: Drivers + :hidden: driver/installation driver/status @@ -55,6 +239,7 @@ To learn more about ADBC, see the `introductory blog post .. toctree:: :maxdepth: 1 :caption: Specification + :hidden: format/specification format/versioning @@ -63,28 +248,8 @@ To learn more about ADBC, see the `introductory blog post .. toctree:: :maxdepth: 1 :caption: Development + :hidden: development/contributing development/nightly development/releasing - -ADBC (Arrow Database Connectivity) is an API specification for -Arrow-based database access. It provides a set of APIs in C, Go, and -Java that define how to interact with databases, including executing -queries and fetching metadata, that use Arrow data for result sets and -query parameters. These APIs are then implemented by drivers (or a -driver manager) that use some underlying protocol to work with -specific databases. - -ADBC aims to provide applications with a single, Arrow-based API to -work with multiple databases, whether Arrow-native or not. -Application code should not need to juggle conversions from -non-Arrow-native datasources alongside bindings for multiple -Arrow-native database protocols. - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/java/api/adbc_driver_manager.rst b/docs/source/java/api/adbc_driver_manager.rst deleted file mode 100644 index 6c4ec2d10f..0000000000 --- a/docs/source/java/api/adbc_driver_manager.rst +++ /dev/null @@ -1,79 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - -============================ -``Java ADBC Driver Manager`` -============================ - -Java ADBC Wrapper for JDBC -========================== - -The Java ADBC Driver Manager provides a means to manage ADBC drivers and facilitate connections to databases using the ADBC API. This particular implementation wraps around the JDBC API. - -Constants ---------- - -.. data:: org.apache.arrow.adbc.driver.jdbc.JdbcDriver.PARAM_DATASOURCE - - A parameter for creating an ``AdbcDatabase`` from a ``DataSource``. - -.. data:: org.apache.arrow.adbc.driver.jdbc.JdbcDriver.PARAM_JDBC_QUIRKS - - A parameter for specifying backend-specific configuration. - -.. data:: org.apache.arrow.adbc.driver.jdbc.JdbcDriver.PARAM_URI - - A parameter for specifying a URI to connect to, aligning with the C/Go implementations. - -Classes -------- - -.. class:: org.apache.arrow.adbc.driver.jdbc.JdbcDriver - - An ADBC driver implementation that wraps around the JDBC API. - - .. method:: open(Map parameters) - - Opens a new database connection using the specified parameters. - -.. class:: org.apache.arrow.adbc.driver.jdbc.JdbcDataSourceDatabase - - Represents an ADBC database backed by a JDBC ``DataSource``. - -Utilities ---------- - -The ``JdbcDriver`` class provides utility methods to fetch and validate parameters from the provided options map. - -.. method:: org.apache.arrow.adbc.driver.jdbc.JdbcDriver.getParam(Class klass, Map parameters, String... choices) - - Retrieves a parameter from the provided map, validating its type and ensuring no duplicates. - -Usage -===== - -The ``JdbcDriver`` class is registered with the ``AdbcDriverManager`` upon class loading. To utilize this driver: - -1. Ensure the necessary dependencies are in place. -2. Create a ``Map`` containing the connection parameters. -3. Use the ``AdbcDriverManager`` to obtain an instance of the ``JdbcDriver``. -4. Open a new database connection using the driver's ``open`` method. - -Exceptions -========== - -Any errors during the driver operations throw the ``AdbcException``. This exception provides detailed messages indicating the nature of the problem. diff --git a/docs/source/java/api/index.rst b/docs/source/java/api/index.rst index e93406f3c4..a316867429 100644 --- a/docs/source/java/api/index.rst +++ b/docs/source/java/api/index.rst @@ -15,11 +15,9 @@ .. specific language governing permissions and limitations .. under the License. -==================== +================== Java API Reference -==================== +================== -.. toctree:: - :maxdepth: 1 - - adbc_driver_manager +This is a stub page for the Javadocs. If you're seeing this page, it means +that the actual Javadocs were not generated. diff --git a/docs/source/java/driver_manager.rst b/docs/source/java/driver_manager.rst index 78215a0e90..3f31421e41 100644 --- a/docs/source/java/driver_manager.rst +++ b/docs/source/java/driver_manager.rst @@ -35,4 +35,4 @@ To include the ADBC Driver Manager in your Maven project, add the following depe API Reference ============= -See the API reference: :doc:`./api/adbc_driver_manager`. +See the `API reference <./api/org/apache/arrow/adbc/drivermanager/package-summary.html>`_. diff --git a/docs/source/python/recipe/postgresql.rst b/docs/source/python/recipe/postgresql.rst index dbf28adba8..7d578b3633 100644 --- a/docs/source/python/recipe/postgresql.rst +++ b/docs/source/python/recipe/postgresql.rst @@ -24,11 +24,23 @@ Authenticate with a username and password .. recipe:: postgresql_authenticate.py +.. _recipe-postgresql-create-append: + +Create/append to a table from an Arrow dataset +============================================== + +.. recipe:: postgresql_create_dataset_table.py + Create/append to a table from an Arrow table ============================================ .. recipe:: postgresql_create_append_table.py +Create/append to a temporary table +================================== + +.. recipe:: postgresql_create_temp_table.py + Execute a statement with bind parameters ======================================== @@ -39,6 +51,11 @@ Get the Arrow schema of a table .. recipe:: postgresql_get_table_schema.py +Get the Arrow schema of a query +=============================== + +.. recipe:: postgresql_get_query_schema.py + List catalogs, schemas, and tables ================================== diff --git a/docs/source/python/recipe/postgresql_create_append_table.py b/docs/source/python/recipe/postgresql_create_append_table.py index 54331ba097..36e29b9386 100644 --- a/docs/source/python/recipe/postgresql_create_append_table.py +++ b/docs/source/python/recipe/postgresql_create_append_table.py @@ -28,10 +28,11 @@ uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] conn = adbc_driver_postgresql.dbapi.connect(uri) -#: For the purposes of testing, we'll first make sure the table -#: doesn't exist. +#: For the purposes of testing, we'll first make sure the tables we're about +#: to use don't exist. with conn.cursor() as cur: cur.execute("DROP TABLE IF EXISTS example") + cur.execute("DROP TABLE IF EXISTS example2") #: Now we can create the table. with conn.cursor() as cur: @@ -77,4 +78,26 @@ cur.execute("SELECT COUNT(*) FROM example") assert cur.fetchone() == (8,) +#: We can also choose to create the table if it doesn't exist, and otherwise +#: append. + +with conn.cursor() as cur: + cur.adbc_ingest("example2", data, mode="create_append") + + cur.execute("SELECT COUNT(*) FROM example2") + assert cur.fetchone() == (4,) + + cur.adbc_ingest("example2", data, mode="create_append") + + cur.execute("SELECT COUNT(*) FROM example2") + assert cur.fetchone() == (8,) + +#: Finally, we can replace the table. + +with conn.cursor() as cur: + cur.adbc_ingest("example", data.slice(0, 2), mode="replace") + + cur.execute("SELECT COUNT(*) FROM example") + assert cur.fetchone() == (2,) + conn.close() diff --git a/docs/source/python/recipe/postgresql_create_dataset_table.py b/docs/source/python/recipe/postgresql_create_dataset_table.py new file mode 100644 index 0000000000..e26093a308 --- /dev/null +++ b/docs/source/python/recipe/postgresql_create_dataset_table.py @@ -0,0 +1,184 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# RECIPE STARTS HERE + +#: ADBC makes it easy to load PyArrow datasets into your datastore. + +import os +import tempfile +from pathlib import Path + +import pyarrow +import pyarrow.csv +import pyarrow.dataset +import pyarrow.feather +import pyarrow.parquet + +import adbc_driver_postgresql.dbapi + +uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] +conn = adbc_driver_postgresql.dbapi.connect(uri) + +#: For the purposes of testing, we'll first make sure the tables we're about +#: to use don't exist. +with conn.cursor() as cur: + cur.execute("DROP TABLE IF EXISTS csvtable") + cur.execute("DROP TABLE IF EXISTS ipctable") + cur.execute("DROP TABLE IF EXISTS pqtable") + cur.execute("DROP TABLE IF EXISTS csvdataset") + cur.execute("DROP TABLE IF EXISTS ipcdataset") + cur.execute("DROP TABLE IF EXISTS pqdataset") + +conn.commit() + +#: Generating sample data +#: ~~~~~~~~~~~~~~~~~~~~~~ + +tempdir = tempfile.TemporaryDirectory( + prefix="adbc-docs-", + ignore_cleanup_errors=True, +) +root = Path(tempdir.name) +table = pyarrow.table( + [ + [1, 1, 2], + ["foo", "bar", "baz"], + ], + names=["ints", "strs"], +) + +#: First we'll write single files. + +csv_file = root / "example.csv" +pyarrow.csv.write_csv(table, csv_file) + +ipc_file = root / "example.arrow" +pyarrow.feather.write_feather(table, ipc_file) + +parquet_file = root / "example.parquet" +pyarrow.parquet.write_table(table, parquet_file) + +#: We'll also generate some partitioned datasets. + +csv_dataset = root / "csv_dataset" +pyarrow.dataset.write_dataset( + table, + csv_dataset, + format="csv", + partitioning=["ints"], +) + +ipc_dataset = root / "ipc_dataset" +pyarrow.dataset.write_dataset( + table, + ipc_dataset, + format="feather", + partitioning=["ints"], +) + +parquet_dataset = root / "parquet_dataset" +pyarrow.dataset.write_dataset( + table, + parquet_dataset, + format="parquet", + partitioning=["ints"], +) + +#: Loading CSV Files into PostgreSQL +#: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#: We can directly pass a :py:class:`pyarrow.RecordBatchReader` (from +#: ``open_csv``) to ``adbc_ingest``. We can also pass a +#: :py:class:`pyarrow.dataset.Dataset`, or a +#: :py:class:`pyarrow.dataset.Scanner`. + +with conn.cursor() as cur: + reader = pyarrow.csv.open_csv(csv_file) + cur.adbc_ingest("csvtable", reader, mode="create") + + reader = pyarrow.dataset.dataset( + csv_dataset, + format="csv", + partitioning=["ints"], + ) + cur.adbc_ingest("csvdataset", reader, mode="create") + +conn.commit() + +with conn.cursor() as cur: + cur.execute("SELECT ints, strs FROM csvtable ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + + cur.execute("SELECT ints, strs FROM csvdataset ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + +#: Loading Arrow IPC (Feather) Files into PostgreSQL +#: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +with conn.cursor() as cur: + reader = pyarrow.ipc.RecordBatchFileReader(ipc_file) + #: Because of quirks in the PyArrow API, we have to read the file into + #: memory. + cur.adbc_ingest("ipctable", reader.read_all(), mode="create") + + #: The Dataset API will stream the data into memory and then into + #: PostgreSQL, though. + reader = pyarrow.dataset.dataset( + ipc_dataset, + format="feather", + partitioning=["ints"], + ) + cur.adbc_ingest("ipcdataset", reader, mode="create") + +conn.commit() + +with conn.cursor() as cur: + cur.execute("SELECT ints, strs FROM ipctable ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + + cur.execute("SELECT ints, strs FROM ipcdataset ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + +#: Loading Parquet Files into PostgreSQL +#: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +with conn.cursor() as cur: + reader = pyarrow.parquet.ParquetFile(parquet_file) + cur.adbc_ingest("pqtable", reader.iter_batches(), mode="create") + + reader = pyarrow.dataset.dataset( + parquet_dataset, + format="parquet", + partitioning=["ints"], + ) + cur.adbc_ingest("pqdataset", reader, mode="create") + +conn.commit() + +with conn.cursor() as cur: + cur.execute("SELECT ints, strs FROM pqtable ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + + cur.execute("SELECT ints, strs FROM pqdataset ORDER BY ints, strs ASC") + assert cur.fetchall() == [(1, "bar"), (1, "foo"), (2, "baz")] + +#: Cleanup +#: ~~~~~~~ + +conn.close() +tempdir.cleanup() diff --git a/docs/source/python/recipe/postgresql_create_temp_table.py b/docs/source/python/recipe/postgresql_create_temp_table.py new file mode 100644 index 0000000000..2d762b9a49 --- /dev/null +++ b/docs/source/python/recipe/postgresql_create_temp_table.py @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# RECIPE STARTS HERE +#: ADBC allows creating and appending to temporary tables as well. + +import os + +import pyarrow + +import adbc_driver_postgresql.dbapi + +uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] +conn = adbc_driver_postgresql.dbapi.connect(uri) + +#: For the purposes of testing, we'll first make sure the tables we're about +#: to use don't exist. +with conn.cursor() as cur: + cur.execute("DROP TABLE IF EXISTS example") + +#: To create a temporary table, just specify the option "temporary". +data = pyarrow.table( + [ + [1, 2, None, 4], + ], + schema=pyarrow.schema( + [ + ("ints", "int32"), + ] + ), +) + +with conn.cursor() as cur: + cur.adbc_ingest("example", data, mode="create", temporary=True) + +conn.commit() + +#: After ingestion, we can fetch the result. +with conn.cursor() as cur: + cur.execute("SELECT * FROM example") + assert cur.fetchone() == (1,) + assert cur.fetchone() == (2,) + + cur.execute("SELECT COUNT(*) FROM example") + assert cur.fetchone() == (4,) + +#: Temporary tables are separate from regular tables, even if they have the +#: same name. + +with conn.cursor() as cur: + cur.adbc_ingest("example", data.slice(0, 2), mode="create", temporary=False) + +conn.commit() + +with conn.cursor() as cur: + #: Because we have two tables with the same name, we have to explicitly + #: reference the normal temporary table here. + cur.execute("SELECT COUNT(*) FROM public.example") + assert cur.fetchone() == (2,) + + cur.execute("SELECT COUNT(*) FROM example") + assert cur.fetchone() == (4,) + +conn.close() + +#: After closing the connection, the temporary table is implicitly dropped. +#: If we reconnect, the table won't exist; we'll see only the 'normal' table. + +with adbc_driver_postgresql.dbapi.connect(uri) as conn: + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM example") + assert cur.fetchone() == (2,) + +#: All the regular ingestion options apply to temporary tables, too. See +#: :ref:`recipe-postgresql-create-append` for more examples. diff --git a/docs/source/python/recipe/postgresql_get_query_schema.py b/docs/source/python/recipe/postgresql_get_query_schema.py new file mode 100644 index 0000000000..2568453924 --- /dev/null +++ b/docs/source/python/recipe/postgresql_get_query_schema.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# RECIPE STARTS HERE + +#: ADBC lets you get the schema of a result set, without executing the query. + +import os + +import pyarrow + +import adbc_driver_postgresql.dbapi + +uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] +conn = adbc_driver_postgresql.dbapi.connect(uri) + +#: We'll create an example table to test. +with conn.cursor() as cur: + cur.execute("DROP TABLE IF EXISTS example") + cur.execute("CREATE TABLE example (ints INT, bigints BIGINT)") + +conn.commit() + +expected = pyarrow.schema( + [ + ("ints", "int32"), + ("bigints", "int64"), + ] +) + +with conn.cursor() as cur: + assert cur.adbc_execute_schema("SELECT * FROM example") == expected + + #: PostgreSQL doesn't know the type here, so it just returns a guess. + assert cur.adbc_execute_schema("SELECT $1 AS res") == pyarrow.schema( + [ + ("res", "string"), + ] + ) + +conn.close() diff --git a/docs/source/python/recipe/postgresql_get_table_schema.py b/docs/source/python/recipe/postgresql_get_table_schema.py index 3f1bae7264..aacbc1c254 100644 --- a/docs/source/python/recipe/postgresql_get_table_schema.py +++ b/docs/source/python/recipe/postgresql_get_table_schema.py @@ -28,13 +28,18 @@ uri = os.environ["ADBC_POSTGRESQL_TEST_URI"] conn = adbc_driver_postgresql.dbapi.connect(uri) -#: We'll create an example table to test. +#: We'll create some example tables to test. with conn.cursor() as cur: cur.execute("DROP TABLE IF EXISTS example") cur.execute("CREATE TABLE example (ints INT, bigints BIGINT)") + cur.execute("CREATE SCHEMA IF NOT EXISTS other_schema") + cur.execute("DROP TABLE IF EXISTS other_schema.example") + cur.execute("CREATE TABLE other_schema.example (strings TEXT, values NUMERIC)") + conn.commit() +#: By default the "active" catalog/schema are assumed. assert conn.adbc_get_table_schema("example") == pyarrow.schema( [ ("ints", "int32"), @@ -42,4 +47,23 @@ ] ) +#: We can explicitly specify the PostgreSQL schema to get the Arrow schema of +#: a table in a different namespace. +#: +#: .. note:: In PostgreSQL, you can only query the database (catalog) that you +#: are connected to. So we cannot specify the catalog here (or +#: rather, there is no point in doing so). +#: +#: Note that the NUMERIC column is read as a string, because PostgreSQL +#: decimals do not map onto Arrow decimals. +assert conn.adbc_get_table_schema( + "example", + db_schema_filter="other_schema", +) == pyarrow.schema( + [ + ("strings", "string"), + ("values", "string"), + ] +) + conn.close() diff --git a/go/adbc/driver/snowflake/driver_test.go b/go/adbc/driver/snowflake/driver_test.go index 6f8707a15c..3230fed540 100644 --- a/go/adbc/driver/snowflake/driver_test.go +++ b/go/adbc/driver/snowflake/driver_test.go @@ -871,6 +871,8 @@ func ConnectWithJwt(uri, keyValue, passcode string) { } func (suite *SnowflakeTests) TestJwtPrivateKey() { + suite.T().Skipf("apache/arrow-adbc#1364") + // grab the username from the DSN cfg, err := gosnowflake.ParseDSN(suite.Quirks.dsn) suite.NoError(err) diff --git a/go/adbc/go.mod b/go/adbc/go.mod index 45f2ee2daf..69e9eaf591 100644 --- a/go/adbc/go.mod +++ b/go/adbc/go.mod @@ -60,7 +60,7 @@ require ( github.com/danieljoos/wincred v1.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/dvsekhvalnov/jose2go v1.5.0 // indirect + github.com/dvsekhvalnov/jose2go v1.6.0 // indirect github.com/form3tech-oss/jwt-go v3.2.5+incompatible // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/goccy/go-json v0.10.2 // indirect @@ -83,12 +83,12 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect - golang.org/x/crypto v0.14.0 // indirect + golang.org/x/crypto v0.17.0 // indirect golang.org/x/mod v0.13.0 // indirect golang.org/x/net v0.17.0 // indirect - golang.org/x/sys v0.13.0 // indirect - golang.org/x/term v0.13.0 // indirect - golang.org/x/text v0.13.0 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/term v0.15.0 // indirect + golang.org/x/text v0.14.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go/adbc/go.sum b/go/adbc/go.sum index acc7fae809..d8e09e3ffb 100644 --- a/go/adbc/go.sum +++ b/go/adbc/go.sum @@ -69,8 +69,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/dvsekhvalnov/jose2go v1.5.0 h1:3j8ya4Z4kMCwT5nXIKFSV84YS+HdqSSO0VsTQxaLAeM= -github.com/dvsekhvalnov/jose2go v1.5.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= +github.com/dvsekhvalnov/jose2go v1.6.0 h1:Y9gnSnP4qEI0+/uQkHvFXeD2PLPJeXEL+ySMEA2EjTY= +github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= github.com/form3tech-oss/jwt-go v3.2.5+incompatible h1:/l4kBbb4/vGSsdtB5nUe8L7B9mImVMaBPw9L/0TBHU8= github.com/form3tech-oss/jwt-go v3.2.5+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= @@ -148,8 +148,8 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= -golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= +golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= @@ -165,13 +165,13 @@ golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= -golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= +golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/java/pom.xml b/java/pom.xml index 972e55b00b..8e166b59e7 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -219,17 +219,20 @@ + org.apache.maven.plugins maven-javadoc-plugin - 3.4.1 + 3.6.3 - - -package all,-missing https://arrow.apache.org/docs/java/reference/ + + 8 + public 1.8 diff --git a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py index 4c36ad5cbd..1e86144c12 100644 --- a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py +++ b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py @@ -43,6 +43,15 @@ except ImportError as e: raise ImportError("PyArrow is required for the DBAPI-compatible interface") from e +try: + import pyarrow.dataset +except ImportError: + _pya_dataset = () + _pya_scanner = () +else: + _pya_dataset = (pyarrow.dataset.Dataset,) + _pya_scanner = (pyarrow.dataset.Scanner,) + import adbc_driver_manager from . import _lib, _reader @@ -807,7 +816,7 @@ def adbc_ingest( The Arrow data to insert. This can be a pyarrow RecordBatch, Table or RecordBatchReader, or any Arrow-compatible data that implements the Arrow PyCapsule Protocol (i.e. has an ``__arrow_c_array__`` - or ``__arrow_c_stream__ ``method). + or ``__arrow_c_stream__`` method). mode How to deal with existing data: @@ -891,6 +900,13 @@ def adbc_ingest( else: if isinstance(data, pyarrow.Table): data = data.to_reader() + elif isinstance(data, pyarrow.dataset.Dataset): + data = data.scanner().to_reader() + elif isinstance(data, pyarrow.dataset.Scanner): + data = data.to_reader() + elif not hasattr(data, "_export_to_c"): + data = pyarrow.Table.from_batches(data) + data = data.to_reader() handle = _lib.ArrowArrayStreamHandle() data._export_to_c(handle.address) self._stmt.bind_stream(handle) diff --git a/python/adbc_driver_postgresql/tests/test_dbapi.py b/python/adbc_driver_postgresql/tests/test_dbapi.py index 2a132bd4a7..283e3fe687 100644 --- a/python/adbc_driver_postgresql/tests/test_dbapi.py +++ b/python/adbc_driver_postgresql/tests/test_dbapi.py @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. +from pathlib import Path from typing import Generator import pyarrow +import pyarrow.dataset import pytest from adbc_driver_postgresql import StatementOptions, dbapi @@ -213,6 +215,60 @@ def test_stmt_ingest(postgres: dbapi.Connection) -> None: assert cur.fetch_arrow_table() == table +def test_stmt_ingest_dataset(postgres: dbapi.Connection, tmp_path: Path) -> None: + # Regression test for https://github.com/apache/arrow-adbc/issues/1310 + table = pyarrow.table( + [ + [1, 1, 2, 2, 3, 3], + ["a", "a", None, None, "b", "b"], + ], + schema=pyarrow.schema([("ints", "int32"), ("strs", "string")]), + ) + pyarrow.dataset.write_dataset( + table, tmp_path, format="parquet", partitioning=["ints"] + ) + ds = pyarrow.dataset.dataset(tmp_path, format="parquet", partitioning=["ints"]) + + with postgres.cursor() as cur: + for item in ( + lambda: ds, + lambda: ds.scanner(), + lambda: ds.scanner().to_reader(), + lambda: ds.scanner().to_table(), + ): + cur.execute("DROP TABLE IF EXISTS test_ingest") + + cur.adbc_ingest( + "test_ingest", + item(), + mode="create_append", + ) + cur.execute("SELECT ints, strs FROM test_ingest ORDER BY ints") + assert cur.fetch_arrow_table() == table + + +def test_stmt_ingest_multi(postgres: dbapi.Connection) -> None: + # Regression test for https://github.com/apache/arrow-adbc/issues/1310 + table = pyarrow.table( + [ + [1, 1, 2, 2, 3, 3], + ["a", "a", None, None, "b", "b"], + ], + names=["ints", "strs"], + ) + + with postgres.cursor() as cur: + cur.execute("DROP TABLE IF EXISTS test_ingest") + + cur.adbc_ingest( + "test_ingest", + table.to_batches(max_chunksize=2), + mode="create_append", + ) + cur.execute("SELECT * FROM test_ingest ORDER BY ints") + assert cur.fetch_arrow_table() == table + + def test_ddl(postgres: dbapi.Connection): with postgres.cursor() as cur: cur.execute("DROP TABLE IF EXISTS test_ddl") diff --git a/python/adbc_driver_postgresql/tests/test_polars.py b/python/adbc_driver_postgresql/tests/test_polars.py index a0295211f7..218a6d8b21 100644 --- a/python/adbc_driver_postgresql/tests/test_polars.py +++ b/python/adbc_driver_postgresql/tests/test_polars.py @@ -73,7 +73,7 @@ def test_polars_write_database(postgres_uri: str, df: "polars.DataFrame") -> Non connection=postgres_uri, # TODO(apache/arrow-adbc#541): polars doesn't map the semantics # properly here, and one of their modes isn't supported - if_exists="replace", + if_table_exists="replace", engine="adbc", ) finally: diff --git a/r/adbcdrivermanager/DESCRIPTION b/r/adbcdrivermanager/DESCRIPTION index 17c70324d3..a5b69b81fb 100644 --- a/r/adbcdrivermanager/DESCRIPTION +++ b/r/adbcdrivermanager/DESCRIPTION @@ -24,4 +24,4 @@ Config/build/bootstrap: TRUE URL: https://github.com/apache/arrow-adbc BugReports: https://github.com/apache/arrow-adbc/issues Imports: - nanoarrow + nanoarrow (>= 0.3.0) diff --git a/r/adbcdrivermanager/NAMESPACE b/r/adbcdrivermanager/NAMESPACE index 894851a0e6..5c671a2b3d 100644 --- a/r/adbcdrivermanager/NAMESPACE +++ b/r/adbcdrivermanager/NAMESPACE @@ -83,7 +83,6 @@ export(adbc_statement_release) export(adbc_statement_set_options) export(adbc_statement_set_sql_query) export(adbc_statement_set_substrait_plan) -export(adbc_stream_join) export(adbc_xptr_is_valid) export(adbc_xptr_move) export(execute_adbc) diff --git a/r/adbcdrivermanager/NEWS.md b/r/adbcdrivermanager/NEWS.md index 0f69dca35f..dadc9aa48a 100644 --- a/r/adbcdrivermanager/NEWS.md +++ b/r/adbcdrivermanager/NEWS.md @@ -1,3 +1,11 @@ +# adbcdrivermanager 0.8.0 + +- Update upstream ADBC libraries to version 0.8.0. + +# adbcdrivermanager 0.7.0 + +- Update upstream ADBC libraries to version 0.7.0. + # adbcdrivermanager 0.6.0 - **r**: Ensure that info_codes are coerced to integer (#986) diff --git a/r/adbcdrivermanager/R/adbc.R b/r/adbcdrivermanager/R/adbc.R index 60e271d9ce..65a4e6ebb0 100644 --- a/r/adbcdrivermanager/R/adbc.R +++ b/r/adbcdrivermanager/R/adbc.R @@ -70,6 +70,8 @@ adbc_database_init_default <- function(driver, options = NULL, subclass = charac #' @rdname adbc_database_init #' @export adbc_database_release <- function(database) { + stop_for_nonzero_child_count(database) + error <- adbc_allocate_error() status <- .Call(RAdbcDatabaseRelease, database, error) stop_for_error(status, error) @@ -119,6 +121,8 @@ adbc_connection_init_default <- function(database, options = NULL, subclass = ch #' @rdname adbc_connection_init #' @export adbc_connection_release <- function(connection) { + stop_for_nonzero_child_count(connection) + if (isTRUE(connection$.release_database)) { database <- connection$database on.exit(adbc_database_release(database)) @@ -188,7 +192,7 @@ adbc_connection_get_info <- function(connection, info_codes = NULL) { ) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -211,7 +215,7 @@ adbc_connection_get_objects <- function(connection, depth = 0L, catalog = NULL, ) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -241,7 +245,7 @@ adbc_connection_get_table_types <- function(connection) { status <- .Call(RAdbcConnectionGetTableTypes, connection, out_stream, error) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -258,7 +262,7 @@ adbc_connection_read_partition <- function(connection, serialized_partition) { ) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -297,7 +301,7 @@ adbc_connection_get_statistic_names <- function(connection) { status <- .Call(RAdbcConnectionGetStatisticNames, connection, out_stream, error) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -319,7 +323,7 @@ adbc_connection_get_statistics <- function(connection, catalog, db_schema, ) stop_for_error(status, error) - out_stream + adbc_child_stream(connection, out_stream) } #' @rdname adbc_connection_get_info @@ -384,6 +388,8 @@ adbc_statement_init_default <- function(connection, options = NULL, subclass = c #' @rdname adbc_statement_init #' @export adbc_statement_release <- function(statement) { + stop_for_nonzero_child_count(statement) + if (isTRUE(statement$.release_connection)) { connection <- statement$connection on.exit(adbc_connection_release(connection)) @@ -407,6 +413,8 @@ adbc_statement_release <- function(statement) { #' or object that can be coerced to one. #' @param schema A [nanoarrow_schema][nanoarrow::as_nanoarrow_schema] or object #' that can be coerced to one. +#' @param stream_join_parent Use `TRUE` to invalidate `statement` and tie its +#' lifecycle to `stream`. #' #' @return #' - `adbc_statement_set_sql_query()`, `adbc_statement_set_substrait_plan()`, @@ -483,9 +491,25 @@ adbc_statement_bind_stream <- function(statement, stream, schema = NULL) { #' @rdname adbc_statement_set_sql_query #' @export -adbc_statement_execute_query <- function(statement, stream = NULL) { +adbc_statement_execute_query <- function(statement, stream = NULL, + stream_join_parent = FALSE) { error <- adbc_allocate_error() - result <- .Call(RAdbcStatementExecuteQuery, statement, stream, error) + + if (is.null(stream)) { + result <- .Call(RAdbcStatementExecuteQuery, statement, NULL, error) + } else { + stream_tmp <- nanoarrow::nanoarrow_allocate_array_stream() + result <- .Call(RAdbcStatementExecuteQuery, statement, stream_tmp, error) + if (identical(result$status, 0L)) { + stream_tmp <- adbc_child_stream( + statement, + stream_tmp, + release_parent = stream_join_parent + ) + nanoarrow::nanoarrow_pointer_export(stream_tmp, stream) + } + } + stop_for_error(result$status, error) result$rows_affected } diff --git a/r/adbcdrivermanager/R/helpers.R b/r/adbcdrivermanager/R/helpers.R index 26aadd1e93..eea3a3ee8c 100644 --- a/r/adbcdrivermanager/R/helpers.R +++ b/r/adbcdrivermanager/R/helpers.R @@ -104,11 +104,7 @@ execute_adbc.default <- function(db_or_con, query, ..., bind = NULL, stream = NU adbc_statement_prepare(stmt) } - adbc_statement_execute_query(stmt, stream) - - if (!is.null(stream)) { - adbc_stream_join(stream, stmt) - } + adbc_statement_execute_query(stmt, stream, stream_join_parent = TRUE) invisible(db_or_con) } @@ -150,12 +146,13 @@ write_adbc.default <- function(tbl, db_or_con, target_table, ..., #' it is good practice to explicitly clean up these objects. These helpers #' are designed to make explicit and predictable cleanup easy to accomplish. #' -#' Note that you can use [adbc_connection_join()], -#' [adbc_statement_join()], and [adbc_stream_join()] +#' Note that you can use [adbc_connection_join()] and [adbc_statement_join()] #' to tie the lifecycle of the parent object to that of the child object. #' These functions mark any previous references to the parent object as #' released so you can still use local and with helpers to manage the parent -#' object before it is joined. +#' object before it is joined. Use `stream_join_parent = TRUE` in +#' [adbc_statement_execute_query()] to tie the lifecycle of a statement to +#' the output stream. #' #' @param x An ADBC database, ADBC connection, ADBC statement, or #' nanoarrow_array_stream returned from calls to an ADBC function. @@ -217,8 +214,6 @@ local_adbc <- function(x, .local_envir = parent.frame()) { #' @param database A database created with [adbc_database_init()] #' @param connection A connection created with [adbc_connection_init()] #' @param statement A statement created with [adbc_statement_init()] -#' @param stream A [nanoarrow_array_stream][nanoarrow::as_nanoarrow_array_stream] -#' @inheritParams with_adbc #' #' @return The input, invisibly. #' @export @@ -244,10 +239,15 @@ local_adbc <- function(x, .local_envir = parent.frame()) { #' adbc_connection_join <- function(connection, database) { assert_adbc(connection, "adbc_connection") - assert_adbc(database, "adbc_database") + + stopifnot( + identical(database, connection$database), + identical(database$.child_count, 1L) + ) connection$.release_database <- TRUE - connection$database <- adbc_xptr_move(database) + connection$database <- adbc_xptr_move(database, check_child_count = FALSE) + xptr_set_protected(connection, connection$database) invisible(connection) } @@ -255,42 +255,53 @@ adbc_connection_join <- function(connection, database) { #' @export adbc_statement_join <- function(statement, connection) { assert_adbc(statement, "adbc_statement") - assert_adbc(connection, "adbc_connection") + + stopifnot( + identical(connection, statement$connection), + identical(connection$.child_count, 1L) + ) statement$.release_connection <- TRUE - statement$connection <- adbc_xptr_move(connection) + statement$connection <- adbc_xptr_move(connection, check_child_count = FALSE) + xptr_set_protected(statement, statement$connection) invisible(statement) } -#' @rdname adbc_connection_join -#' @export -adbc_stream_join <- function(stream, x) { - if (utils::packageVersion("nanoarrow") < "0.1.0.9000") { - stop("adbc_stream_join_statement() requires nanoarrow >= 0.2.0") - } - - assert_adbc(stream, "nanoarrow_array_stream") - assert_adbc(x) +adbc_child_stream <- function(parent, stream, release_parent = FALSE) { + assert_adbc(parent) + # This finalizer will run immediately on release (if released explicitly + # on the main R thread) or on garbage collection otherwise. self_contained_finalizer <- function() { - try(adbc_release_non_null(x)) + try({ + parent$.child_count <- parent$.child_count - 1L + if (release_parent) { + adbc_release_non_null(parent) + } + }) } # Make sure we don't keep any variables around that aren't needed - # for the finalizer and make sure we invalidate the original statement + # for the finalizer and make sure we do keep around a strong reference + # to parent. self_contained_finalizer_env <- as.environment( - list(x = adbc_xptr_move(x)) + list( + parent = if (release_parent) adbc_xptr_move(parent) else parent, + release_parent = release_parent + ) ) parent.env(self_contained_finalizer_env) <- asNamespace("adbcdrivermanager") environment(self_contained_finalizer) <- self_contained_finalizer_env - # This finalizer will run immediately on release (if released explicitly - # on the main R thread) or on garbage collection otherwise. - - # Until the release version of nanoarrow contains this we will get a check - # warning for nanoarrow::array_stream_set_finalizer() - set_finalizer <- asNamespace("nanoarrow")[["array_stream_set_finalizer"]] - set_finalizer(stream, self_contained_finalizer) + # Set the finalizer using nanoarrow's method for this + stream_out <- nanoarrow::array_stream_set_finalizer( + stream, + self_contained_finalizer + ) - invisible(stream) + # Once we're sure this will succeed, increment the parent child count + # Use whatever version is in the finalizer env (we might have moved parent) + self_contained_finalizer_env$parent$.child_count <- + self_contained_finalizer_env$parent$.child_count + 1L + stream_out } diff --git a/r/adbcdrivermanager/R/utils.R b/r/adbcdrivermanager/R/utils.R index 1d7a01be1d..c747a612ed 100644 --- a/r/adbcdrivermanager/R/utils.R +++ b/r/adbcdrivermanager/R/utils.R @@ -16,13 +16,23 @@ # under the License. new_env <- function() { - new.env(parent = emptyenv()) + env <- new.env(parent = emptyenv()) + # A previous version of this just did env$.child_count <- 0L, + # which, perhaps because of compilation, results in env$.child_count + # referring to the exact same SEXP for every ADBC object! Use vector() + # to ensure a fresh allocation. + env$.child_count <- vector("integer", length = 1L) + env } xptr_env <- function(xptr) { .Call(RAdbcXptrEnv, xptr) } +xptr_set_protected <- function(xptr, prot) { + .Call(RAdbcXptrSetProtected, xptr, prot) +} + #' @export length.adbc_xptr <- function(x) { length(xptr_env(x)) @@ -80,6 +90,20 @@ str.adbc_xptr <- function(object, ...) { invisible(object) } +stop_for_nonzero_child_count <- function(obj) { + child_count <- obj$.child_count + if (!identical(child_count, 0L)) { + msg <- sprintf( + "<%s> has %d unreleased child object%s", + paste(class(obj), collapse = "/"), + child_count, + if (child_count != 1) "s" else "" + ) + cnd <- simpleError(msg, call = sys.call(-1)) + class(cnd) <- union("adbc_error_child_count_not_zero", class(cnd)) + stop(cnd) + } +} #' Low-level pointer details #' @@ -93,6 +117,8 @@ str.adbc_xptr <- function(object, ...) { #' #' @param x An 'adbc_database', 'adbc_connection', 'adbc_statement', or #' 'nanoarrow_array_stream' +#' @param check_child_count Ensures that `x` has a zero child count before +#' performing the move. This should almost always be `TRUE`. #' #' @return #' - `adbc_xptr_move()`: A freshly-allocated R object identical to `x` @@ -107,7 +133,11 @@ str.adbc_xptr <- function(object, ...) { #' adbc_xptr_is_valid(db) #' adbc_xptr_is_valid(db_new) #' -adbc_xptr_move <- function(x) { +adbc_xptr_move <- function(x, check_child_count = TRUE) { + if (check_child_count && (".child_count" %in% names(x))) { + stop_for_nonzero_child_count(x) + } + if (inherits(x, "adbc_database")) { .Call(RAdbcMoveDatabase, x) } else if (inherits(x, "adbc_connection")) { diff --git a/r/adbcdrivermanager/man/adbc_connection_join.Rd b/r/adbcdrivermanager/man/adbc_connection_join.Rd index 823a33b7be..418f9b1ddc 100644 --- a/r/adbcdrivermanager/man/adbc_connection_join.Rd +++ b/r/adbcdrivermanager/man/adbc_connection_join.Rd @@ -3,14 +3,11 @@ \name{adbc_connection_join} \alias{adbc_connection_join} \alias{adbc_statement_join} -\alias{adbc_stream_join} \title{Join the lifecycle of a unique parent to its child} \usage{ adbc_connection_join(connection, database) adbc_statement_join(statement, connection) - -adbc_stream_join(stream, x) } \arguments{ \item{connection}{A connection created with \code{\link[=adbc_connection_init]{adbc_connection_init()}}} @@ -18,11 +15,6 @@ adbc_stream_join(stream, x) \item{database}{A database created with \code{\link[=adbc_database_init]{adbc_database_init()}}} \item{statement}{A statement created with \code{\link[=adbc_statement_init]{adbc_statement_init()}}} - -\item{stream}{A \link[nanoarrow:as_nanoarrow_array_stream]{nanoarrow_array_stream}} - -\item{x}{An ADBC database, ADBC connection, ADBC statement, or -nanoarrow_array_stream returned from calls to an ADBC function.} } \value{ The input, invisibly. diff --git a/r/adbcdrivermanager/man/adbc_statement_set_sql_query.Rd b/r/adbcdrivermanager/man/adbc_statement_set_sql_query.Rd index f83f61c7c2..71a85751bf 100644 --- a/r/adbcdrivermanager/man/adbc_statement_set_sql_query.Rd +++ b/r/adbcdrivermanager/man/adbc_statement_set_sql_query.Rd @@ -24,7 +24,11 @@ adbc_statement_bind(statement, values, schema = NULL) adbc_statement_bind_stream(statement, stream, schema = NULL) -adbc_statement_execute_query(statement, stream = NULL) +adbc_statement_execute_query( + statement, + stream = NULL, + stream_join_parent = FALSE +) adbc_statement_execute_schema(statement) @@ -45,6 +49,9 @@ that can be coerced to one.} \item{stream}{A \link[nanoarrow:as_nanoarrow_array_stream]{nanoarrow_array_stream} or object that can be coerced to one.} + +\item{stream_join_parent}{Use \code{TRUE} to invalidate \code{statement} and tie its +lifecycle to \code{stream}.} } \value{ \itemize{ diff --git a/r/adbcdrivermanager/man/adbc_xptr_move.Rd b/r/adbcdrivermanager/man/adbc_xptr_move.Rd index 2dc2ceab24..bc7a92e7f3 100644 --- a/r/adbcdrivermanager/man/adbc_xptr_move.Rd +++ b/r/adbcdrivermanager/man/adbc_xptr_move.Rd @@ -5,13 +5,16 @@ \alias{adbc_xptr_is_valid} \title{Low-level pointer details} \usage{ -adbc_xptr_move(x) +adbc_xptr_move(x, check_child_count = TRUE) adbc_xptr_is_valid(x) } \arguments{ \item{x}{An 'adbc_database', 'adbc_connection', 'adbc_statement', or 'nanoarrow_array_stream'} + +\item{check_child_count}{Ensures that \code{x} has a zero child count before +performing the move. This should almost always be \code{TRUE}.} } \value{ \itemize{ diff --git a/r/adbcdrivermanager/man/with_adbc.Rd b/r/adbcdrivermanager/man/with_adbc.Rd index 5e16a9cad5..ba05dbef84 100644 --- a/r/adbcdrivermanager/man/with_adbc.Rd +++ b/r/adbcdrivermanager/man/with_adbc.Rd @@ -33,12 +33,13 @@ it is good practice to explicitly clean up these objects. These helpers are designed to make explicit and predictable cleanup easy to accomplish. } \details{ -Note that you can use \code{\link[=adbc_connection_join]{adbc_connection_join()}}, -\code{\link[=adbc_statement_join]{adbc_statement_join()}}, and \code{\link[=adbc_stream_join]{adbc_stream_join()}} +Note that you can use \code{\link[=adbc_connection_join]{adbc_connection_join()}} and \code{\link[=adbc_statement_join]{adbc_statement_join()}} to tie the lifecycle of the parent object to that of the child object. These functions mark any previous references to the parent object as released so you can still use local and with helpers to manage the parent -object before it is joined. +object before it is joined. Use \code{stream_join_parent = TRUE} in +\code{\link[=adbc_statement_execute_query]{adbc_statement_execute_query()}} to tie the lifecycle of a statement to +the output stream. } \examples{ # Using with_adbc(): diff --git a/r/adbcdrivermanager/src/init.c b/r/adbcdrivermanager/src/init.c index 77c097b877..ad7ff6dcb9 100644 --- a/r/adbcdrivermanager/src/init.c +++ b/r/adbcdrivermanager/src/init.c @@ -99,6 +99,7 @@ SEXP RAdbcStatementExecutePartitions(SEXP statement_xptr, SEXP out_schema_xptr, SEXP partitions_xptr, SEXP error_xptr); SEXP RAdbcStatementCancel(SEXP statement_xptr, SEXP error_xptr); SEXP RAdbcXptrEnv(SEXP xptr); +SEXP RAdbcXptrSetProtected(SEXP xptr, SEXP prot); static const R_CallMethodDef CallEntries[] = { {"RAdbcVoidDriverInitFunc", (DL_FUNC)&RAdbcVoidDriverInitFunc, 0}, @@ -160,6 +161,7 @@ static const R_CallMethodDef CallEntries[] = { {"RAdbcStatementExecutePartitions", (DL_FUNC)&RAdbcStatementExecutePartitions, 4}, {"RAdbcStatementCancel", (DL_FUNC)&RAdbcStatementCancel, 2}, {"RAdbcXptrEnv", (DL_FUNC)&RAdbcXptrEnv, 1}, + {"RAdbcXptrSetProtected", (DL_FUNC)&RAdbcXptrSetProtected, 2}, {NULL, NULL, 0}}; /* end generated by tools/make-callentries.R */ diff --git a/r/adbcdrivermanager/src/radbc.cc b/r/adbcdrivermanager/src/radbc.cc index fe09de13d0..da27e4d04b 100644 --- a/r/adbcdrivermanager/src/radbc.cc +++ b/r/adbcdrivermanager/src/radbc.cc @@ -41,6 +41,24 @@ static void adbc_error_warn(int code, AdbcError* error, const char* context) { } } +static int adbc_update_parent_child_count(SEXP xptr, int delta) { + SEXP parent_xptr = R_ExternalPtrProtected(xptr); + if (parent_xptr == R_NilValue) { + return NA_INTEGER; + } + + SEXP parent_env = R_ExternalPtrTag(parent_xptr); + if (parent_env == R_NilValue) { + return NA_INTEGER; + } + + SEXP child_count_sexp = Rf_findVarInFrame(parent_env, Rf_install(".child_count")); + int* child_count = INTEGER(child_count_sexp); + int old_value = child_count[0]; + child_count[0] = child_count[0] + delta; + return old_value; +} + static void finalize_driver_xptr(SEXP driver_xptr) { auto driver = reinterpret_cast(R_ExternalPtrAddr(driver_xptr)); if (driver == nullptr) { @@ -186,6 +204,9 @@ static void finalize_connection_xptr(SEXP connection_xptr) { AdbcError error = ADBC_ERROR_INIT; int status = AdbcConnectionRelease(connection, &error); adbc_error_warn(status, &error, "finalize_connection_xptr()"); + if (status == ADBC_STATUS_OK) { + adbc_update_parent_child_count(connection_xptr, -1); + } } adbc_xptr_default_finalize(connection_xptr); @@ -236,6 +257,7 @@ extern "C" SEXP RAdbcConnectionInit(SEXP connection_xptr, SEXP database_xptr, // Keep the database pointer alive for as long as the connection pointer // is alive R_SetExternalPtrProtected(connection_xptr, database_xptr); + adbc_update_parent_child_count(connection_xptr, 1); } return adbc_wrap_status(result); @@ -245,6 +267,10 @@ extern "C" SEXP RAdbcConnectionRelease(SEXP connection_xptr, SEXP error_xptr) { auto connection = adbc_from_xptr(connection_xptr); auto error = adbc_from_xptr(error_xptr); int status = AdbcConnectionRelease(connection, error); + if (status == ADBC_STATUS_OK) { + adbc_update_parent_child_count(connection_xptr, -1); + } + return adbc_wrap_status(status); } @@ -384,6 +410,9 @@ static void finalize_statement_xptr(SEXP statement_xptr) { AdbcError error = ADBC_ERROR_INIT; int status = AdbcStatementRelease(statement, &error); adbc_error_warn(status, &error, "finalize_statement_xptr()"); + if (status == ADBC_STATUS_OK) { + adbc_update_parent_child_count(statement_xptr, -1); + } } adbc_xptr_default_finalize(statement_xptr); @@ -401,6 +430,7 @@ extern "C" SEXP RAdbcStatementNew(SEXP connection_xptr) { adbc_error_stop(status, &error); R_SetExternalPtrProtected(statement_xptr, connection_xptr); + adbc_update_parent_child_count(statement_xptr, 1); UNPROTECT(1); return statement_xptr; @@ -430,6 +460,10 @@ extern "C" SEXP RAdbcStatementRelease(SEXP statement_xptr, SEXP error_xptr) { auto statement = adbc_from_xptr(statement_xptr); auto error = adbc_from_xptr(error_xptr); int status = AdbcStatementRelease(statement, error); + if (status == ADBC_STATUS_OK) { + adbc_update_parent_child_count(statement_xptr, -1); + } + return adbc_wrap_status(status); } diff --git a/r/adbcdrivermanager/src/radbc.h b/r/adbcdrivermanager/src/radbc.h index 27772802ad..4f1ec28317 100644 --- a/r/adbcdrivermanager/src/radbc.h +++ b/r/adbcdrivermanager/src/radbc.h @@ -22,6 +22,8 @@ #include +#include + template static inline const char* adbc_xptr_class(); diff --git a/r/adbcdrivermanager/src/utils.c b/r/adbcdrivermanager/src/utils.c index 71258fc06a..74e5c90c7e 100644 --- a/r/adbcdrivermanager/src/utils.c +++ b/r/adbcdrivermanager/src/utils.c @@ -26,3 +26,14 @@ SEXP RAdbcXptrEnv(SEXP xptr) { return R_ExternalPtrTag(xptr); } + +SEXP RAdbcXptrSetProtected(SEXP xptr, SEXP prot) { + if (TYPEOF(xptr) != EXTPTRSXP) { + Rf_error("object is not an external pointer"); + } + + SEXP old_prot = PROTECT(R_ExternalPtrProtected(xptr)); + R_SetExternalPtrProtected(xptr, prot); + UNPROTECT(1); + return old_prot; +} diff --git a/r/adbcdrivermanager/tests/testthat/test-driver_monkey.R b/r/adbcdrivermanager/tests/testthat/test-driver_monkey.R index d3ed532a89..5275b408f8 100644 --- a/r/adbcdrivermanager/tests/testthat/test-driver_monkey.R +++ b/r/adbcdrivermanager/tests/testthat/test-driver_monkey.R @@ -27,6 +27,11 @@ test_that("the monkey driver sees, and the monkey driver does", { stream <- nanoarrow::nanoarrow_allocate_array_stream() expect_identical(adbc_statement_execute_query(stmt, stream), -1) expect_identical(as.data.frame(stream$get_next()), input) + expect_error( + adbc_statement_release(stmt), + class = "adbc_error_child_count_not_zero" + ) + stream$release() adbc_statement_release(stmt) stmt <- adbc_statement_init(con, input) diff --git a/r/adbcdrivermanager/tests/testthat/test-helpers.R b/r/adbcdrivermanager/tests/testthat/test-helpers.R index 3e65053e54..4444835962 100644 --- a/r/adbcdrivermanager/tests/testthat/test-helpers.R +++ b/r/adbcdrivermanager/tests/testthat/test-helpers.R @@ -129,8 +129,6 @@ test_that("joiners work for databases, connections, and statements", { }) test_that("joiners work with streams", { - skip_if_not(packageVersion("nanoarrow") >= "0.1.0.9000") - stream <- local({ db <- local_adbc(adbc_database_init(adbc_driver_monkey())) @@ -143,8 +141,7 @@ test_that("joiners work with streams", { expect_false(adbc_xptr_is_valid(con)) stream <- local_adbc(nanoarrow::nanoarrow_allocate_array_stream()) - adbc_statement_execute_query(stmt, stream) - adbc_stream_join(stream, stmt) + adbc_statement_execute_query(stmt, stream, stream_join_parent = TRUE) expect_false(adbc_xptr_is_valid(stmt)) adbc_xptr_move(stream) diff --git a/r/adbcdrivermanager/tests/testthat/test-utils.R b/r/adbcdrivermanager/tests/testthat/test-utils.R index 0a9c7a49d1..e99209c929 100644 --- a/r/adbcdrivermanager/tests/testthat/test-utils.R +++ b/r/adbcdrivermanager/tests/testthat/test-utils.R @@ -17,8 +17,8 @@ test_that("external pointer embedded environment works", { db <- adbc_database_init(adbc_driver_void()) - expect_identical(names(db), "driver") - expect_identical(length(db), 1L) + expect_setequal(names(db), c("driver", ".child_count")) + expect_identical(db$.child_count, 0L) db$key <- "value" expect_identical(db$key, "value") diff --git a/r/adbcflightsql/DESCRIPTION b/r/adbcflightsql/DESCRIPTION index c9ea48995c..43707deb0a 100644 --- a/r/adbcflightsql/DESCRIPTION +++ b/r/adbcflightsql/DESCRIPTION @@ -19,6 +19,7 @@ RoxygenNote: 7.2.3 Suggests: nanoarrow, testthat (>= 3.0.0) +SystemRequirements: GNU make, Go (>= 1.19.13) Config/testthat/edition: 3 Config/build/bootstrap: TRUE URL: https://github.com/apache/arrow-adbc diff --git a/r/adbcflightsql/cleanup b/r/adbcflightsql/cleanup index 01e970b324..2261ddb19a 100755 --- a/r/adbcflightsql/cleanup +++ b/r/adbcflightsql/cleanup @@ -16,3 +16,5 @@ # under the License. rm src/*.o src/go/*.a +rm -rf src/.go-cache +rm -rf src/.go-path diff --git a/r/adbcflightsql/configure b/r/adbcflightsql/configure index ef44e54df5..3ba0a59462 100755 --- a/r/adbcflightsql/configure +++ b/r/adbcflightsql/configure @@ -18,6 +18,22 @@ R_BIN="$R_HOME/bin/R" RSCRIPT_BIN="$R_HOME/bin/Rscript" +# Set to non-false for CRAN releases, which require this approach to comply with +# guidance regarding large dependency sources. When non-false, this script +# will fail if the download fails or if checksum verification fails on +# the downloaded file. +DOWNLOAD_DEPENDENCY_ARCHIVE="false" + +# CRAN checks CPU time to enforce ~2 cores for building. Set GOMAXPROCS to 1 +# when building on CRAN to be safe. +if [ ! "$DOWNLOAD_DEPENDENCY_ARCHIVE" = "false" ] && [ -z "$GOMAXPROCS" ]; then + echo "Using GOMAXPROCS=1 for go build" + echo "Set GOMAXPROCS to a higher value in ~/.Renviron for a faster build" + GOMAXPROCS=1 +elif [ ! -z "$GOMAXPROCS" ]; then + echo "Using GOMAXPROCS=$GOMAXPROCS for go build" +fi + # Run bootstrap.R. This will have already run if we are installing a source # package built with pkgbuild::build() with pkgbuild >1.4.0; however, we # run it again in case this is R CMD INSTALL on a directory or @@ -29,26 +45,45 @@ if [ -f bootstrap.R ]; then fi # Find the go binary so that we can go build! +# If we've downloaded a specific version of go to src/go/tmp, use that +# one (helpful for testing different version of go locally) +PREVIOUSLY_DOWNLOADED_GO="`pwd`/src/go/tmp/go/bin/go" +if [ -z "$GO_BIN" ] && [ -f "$PREVIOUSLY_DOWNLOADED_GO" ]; then + GO_BIN="$PREVIOUSLY_DOWNLOADED_GO" +fi + +# Check go on PATH if [ -z "$GO_BIN" ]; then GO_BIN=`which go` fi +# Try some default install locations that might not be on PATH +DEFAULT_GO_WIN="C:/Program Files/Go/bin/go.exe" +if [ -z "$GO_BIN" ] && [ -f "$DEFAULT_GO_WIN" ]; then + GO_BIN="$DEFAULT_GO_WIN" +fi + +DEFAULT_GO_MACOS="/usr/local/go/bin/go" +if [ -z "$GO_BIN" ] && [ -f "$DEFAULT_GO_MACOS" ]; then + GO_BIN="$DEFAULT_GO_MACOS" +fi + +DEFAULT_GO_HOMEBREW_M1="/opt/homebrew/bin/go" +if [ -z "$GO_BIN" ] && [ -f "$DEFAULT_GO_HOMEBREW_M1" ]; then + GO_BIN="$DEFAULT_GO_HOMEBREW_M1" +fi + if [ -z "$GO_BIN" ]; then - if [ ! -f src/go/tmp/go/bin/go ]; then - echo "" - echo "Downloading and extracting Go into the package source directory:" - echo "This may take a few minutes. To eliminate this step, install Go" - echo "from your faviourite package manager or set the GO_BIN environment variable:" - echo "- apt-get install golang" - echo "- brew install golang" - echo "- dnf install golang" - echo "- apk add go" - echo "- pacman -S go" - - "$RSCRIPT_BIN" tools/download-go.R - fi - - GO_BIN="`pwd`/src/go/tmp/go/bin/go" + echo "" + echo "The Go compiler is required to install this package. You can install go" + echo "from your faviourite package manager or set the GO_BIN environment variable:" + echo "- apt-get install golang" + echo "- brew install golang" + echo "- dnf install golang" + echo "- apk add go" + echo "- pacman -S go" + echo "...or from the official installers available at https://go.dev/dl/" + exit 1 fi echo "Trying 'go version' with GO_BIN at '$GO_BIN'" @@ -62,12 +97,8 @@ fi CC=`"$R_BIN" CMD config CC` CXX=`"$R_BIN" CMD config CXX` -# clang and gcc use different symbol-hiding syntax and we need to -# make sure to hide any Adbc* symbols that might conflict with another -# driver. -if "$R_BIN" CMD config CC | grep -e "clang" ; then - SYMBOL_ARGS="-Wl,-exported_symbol,_adbcflightsql_c_flightsql -Wl,-exported_symbol,_R_init_adbcflightsql" -elif "$R_BIN" CMD config CC | grep -e "gcc" ; then +# Attempt to hide symbols where possible +if "$R_BIN" CMD config CC | grep -e "gcc" >/dev/null ; then SYMBOL_ARGS="-Wl,--version-script=go/symbols.map" fi @@ -83,6 +114,7 @@ sed \ -e "s|@libs@|$PKG_LIBS|" \ -e "s|@cc@|$CC|" \ -e "s|@cxx@|$CXX|" \ + -e "s|@nproc@|$GOMAXPROCS|" \ src/Makevars.in > src/Makevars if [ -f "src/go/adbc/pkg/flightsql/driver.go" ]; then diff --git a/r/adbcflightsql/src/.gitignore b/r/adbcflightsql/src/.gitignore index fb883613f9..e040acd74e 100644 --- a/r/adbcflightsql/src/.gitignore +++ b/r/adbcflightsql/src/.gitignore @@ -17,3 +17,5 @@ adbc.h Makevars +.go-cache +.go-path diff --git a/r/adbcflightsql/src/Makevars.in b/r/adbcflightsql/src/Makevars.in index eb74ef3356..86101d5054 100644 --- a/r/adbcflightsql/src/Makevars.in +++ b/r/adbcflightsql/src/Makevars.in @@ -21,10 +21,11 @@ PKG_LIBS=-L$(CURDIR)/go -ladbc_driver_flightsql -lresolv @libs@ CGO_CC = @cc@ CGO_CXX = @cxx@ CGO_CFLAGS = $(ALL_CPPFLAGS) +GOMAXPROCS = @nproc@ .PHONY: all gostatic all: $(SHLIB) $(SHLIB): gostatic gostatic: - (cd "$(CURDIR)/go/adbc"; CC="$(CGO_CC)" CXX="$(CGO_CXX)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(PKG_LIBS)" "@gobin@" build -v -tags driverlib -o $(CURDIR)/go/libadbc_driver_flightsql.a -buildmode=c-archive "./pkg/flightsql") + (cd "$(CURDIR)/go/adbc"; GOMAXPROCS=$(GOMAXPROCS) GOPATH="$(CURDIR)/.go-path" GOCACHE="$(CURDIR)/.go-cache" CC="$(CGO_CC)" CXX="$(CGO_CXX)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(PKG_LIBS)" "@gobin@" build -v -tags driverlib -o $(CURDIR)/go/libadbc_driver_flightsql.a -buildmode=c-archive "./pkg/flightsql") diff --git a/r/adbcpostgresql/NEWS.md b/r/adbcpostgresql/NEWS.md index 842387e43e..700150c221 100644 --- a/r/adbcpostgresql/NEWS.md +++ b/r/adbcpostgresql/NEWS.md @@ -1,3 +1,11 @@ +# adbcdrivermanager 0.8.0 + +- Update upstream ADBC libraries to version 0.8.0. + +# adbcdrivermanager 0.7.0 + +- Update upstream ADBC libraries to version 0.7.0. + # adbcpostgresql 0.6.0 * Initial CRAN submission. diff --git a/r/adbcpostgresql/cran-comments.md b/r/adbcpostgresql/cran-comments.md index 2d2bbbdc12..906efef731 100644 --- a/r/adbcpostgresql/cran-comments.md +++ b/r/adbcpostgresql/cran-comments.md @@ -4,6 +4,4 @@ Apache Arrow ADBC libraries version. ## R CMD check results -0 errors | 0 warnings | 1 note - -* This is a new release. +0 errors | 0 warnings | 0 notes diff --git a/r/adbcsnowflake/DESCRIPTION b/r/adbcsnowflake/DESCRIPTION index 5d7af67367..f22f18ba4e 100644 --- a/r/adbcsnowflake/DESCRIPTION +++ b/r/adbcsnowflake/DESCRIPTION @@ -18,8 +18,9 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Suggests: nanoarrow, + openssl, testthat (>= 3.0.0) -SystemRequirements: GNU make +SystemRequirements: GNU make, Go (>= 1.19.13) Config/testthat/edition: 3 Config/build/bootstrap: TRUE URL: https://github.com/apache/arrow-adbc diff --git a/r/adbcsnowflake/cleanup b/r/adbcsnowflake/cleanup index 01e970b324..2261ddb19a 100755 --- a/r/adbcsnowflake/cleanup +++ b/r/adbcsnowflake/cleanup @@ -16,3 +16,5 @@ # under the License. rm src/*.o src/go/*.a +rm -rf src/.go-cache +rm -rf src/.go-path diff --git a/r/adbcsnowflake/configure b/r/adbcsnowflake/configure index 9157426196..e5a101435a 100755 --- a/r/adbcsnowflake/configure +++ b/r/adbcsnowflake/configure @@ -24,6 +24,16 @@ RSCRIPT_BIN="$R_HOME/bin/Rscript" # the downloaded file. DOWNLOAD_DEPENDENCY_ARCHIVE="false" +# CRAN checks CPU time to enforce ~2 cores for building. Set GOMAXPROCS to 1 +# when building on CRAN to be safe. +if [ ! "$DOWNLOAD_DEPENDENCY_ARCHIVE" = "false" ] && [ -z "$GOMAXPROCS" ]; then + echo "Using GOMAXPROCS=1 for go build" + echo "Set GOMAXPROCS to a higher value in ~/.Renviron for a faster build" + GOMAXPROCS=1 +elif [ ! -z "$GOMAXPROCS" ]; then + echo "Using GOMAXPROCS=$GOMAXPROCS for go build" +fi + # Run bootstrap.R. This will have already run if we are installing a source # package built with pkgbuild::build() with pkgbuild >1.4.0; however, we # run it again in case this is R CMD INSTALL on a directory or @@ -62,8 +72,13 @@ if [ -f tools/src-go-adbc-vendor.zip ]; then cd .. "$RSCRIPT_BIN" tools/extract-go-vendor-archive.R else - echo "Checksum verification failed for vendored dependency archive" - exit 1 + cd .. + if "$RSCRIPT_BIN" tools/verify-go-vendor-archive.R ; then + "$RSCRIPT_BIN" tools/extract-go-vendor-archive.R + else + echo "Checksum verification failed for vendored dependency archive" + exit 1 + fi fi fi @@ -120,12 +135,8 @@ fi CC=`"$R_BIN" CMD config CC` CXX=`"$R_BIN" CMD config CXX` -# clang and gcc use different symbol-hiding syntax and we need to -# make sure to hide any Adbc* symbols that might conflict with another -# driver. -if "$R_BIN" CMD config CC | grep -e "clang" >/dev/null ; then - SYMBOL_ARGS="-Wl,-exported_symbol,_adbcsnowflake_c_snowflake -Wl,-exported_symbol,_R_init_adbcsnowflake" -elif "$R_BIN" CMD config CC | grep -e "gcc" >/dev/null ; then +# Attempt to hide symbols where possible +if "$R_BIN" CMD config CC | grep -e "gcc" >/dev/null ; then SYMBOL_ARGS="-Wl,--version-script=go/symbols.map" fi @@ -148,6 +159,7 @@ sed \ -e "s|@cflags@|$PKG_CPPFLAGS|" \ -e "s|@cc@|$CC|" \ -e "s|@cxx@|$CXX|" \ + -e "s|@nproc@|$GOMAXPROCS|" \ src/Makevars.in > src/Makevars if [ -f "src/go/adbc/pkg/snowflake/driver.go" ]; then diff --git a/r/adbcsnowflake/src/.gitignore b/r/adbcsnowflake/src/.gitignore index fb883613f9..e040acd74e 100644 --- a/r/adbcsnowflake/src/.gitignore +++ b/r/adbcsnowflake/src/.gitignore @@ -17,3 +17,5 @@ adbc.h Makevars +.go-cache +.go-path diff --git a/r/adbcsnowflake/src/Makevars.in b/r/adbcsnowflake/src/Makevars.in index 58dc4a36c9..fcc1c0ee13 100644 --- a/r/adbcsnowflake/src/Makevars.in +++ b/r/adbcsnowflake/src/Makevars.in @@ -21,10 +21,11 @@ PKG_LIBS=-L$(CURDIR)/go -ladbc_driver_snowflake @libs@ CGO_CC = @cc@ CGO_CXX = @cxx@ CGO_CFLAGS = $(ALL_CPPFLAGS) +GOMAXPROCS = @nproc@ .PHONY: all gostatic all: $(SHLIB) $(SHLIB): gostatic gostatic: - (cd "$(CURDIR)/go/adbc"; CC="$(CGO_CC)" CXX="$(CGO_CXX)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(PKG_LIBS)" "@gobin@" build -v -tags driverlib -o $(CURDIR)/go/libadbc_driver_snowflake.a -buildmode=c-archive "./pkg/snowflake") + (cd "$(CURDIR)/go/adbc"; GOMAXPROCS=$(GOMAXPROCS) GOPATH="$(CURDIR)/.go-path" GOCACHE="$(CURDIR)/.go-cache" CC="$(CGO_CC)" CXX="$(CGO_CXX)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(PKG_LIBS)" "@gobin@" build -v -tags driverlib -o $(CURDIR)/go/libadbc_driver_snowflake.a -buildmode=c-archive "./pkg/snowflake") diff --git a/r/adbcsnowflake/tools/.gitignore b/r/adbcsnowflake/tools/.gitignore index eb6f76fe0f..c8439b9b68 100644 --- a/r/adbcsnowflake/tools/.gitignore +++ b/r/adbcsnowflake/tools/.gitignore @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -src-go-adbc-vendor.zip* +src-go-adbc-vendor.zip diff --git a/r/adbcsnowflake/tools/create-go-vendor-archive.R b/r/adbcsnowflake/tools/create-go-vendor-archive.R index 04c7ed33d6..a9045b0598 100644 --- a/r/adbcsnowflake/tools/create-go-vendor-archive.R +++ b/r/adbcsnowflake/tools/create-go-vendor-archive.R @@ -20,7 +20,7 @@ go_bin <- Sys.getenv("GO_BIN", unname(Sys.which("go"))) withr::with_dir("src/go/adbc", { system(paste(shQuote(go_bin), "mod vendor -v")) - # go mod vendor for arrow/v13 doesn't include some files needed for go build + # go mod vendor for arrow/v14 doesn't include some files needed for go build tmp_zip <- tempfile() tmp_extract <- tempfile() local({ @@ -30,7 +30,7 @@ withr::with_dir("src/go/adbc", { }) curl::curl_download( - "https://github.com/apache/arrow/archive/refs/tags/go/v13.0.0.zip", + "https://github.com/apache/arrow/archive/refs/tags/go/v14.0.2.zip", tmp_zip ) @@ -38,10 +38,10 @@ withr::with_dir("src/go/adbc", { src_go_arrow_cdata_arrow_dir <- file.path( tmp_extract, - "arrow-go-v13.0.0/go/arrow/cdata/arrow" + "arrow-go-v14.0.2/go/arrow/cdata/arrow" ) - dst_go_arrow_cdata_dir <- "vendor/github.com/apache/arrow/go/v13/arrow/cdata/" + dst_go_arrow_cdata_dir <- "vendor/github.com/apache/arrow/go/v14/arrow/cdata/" stopifnot(file.copy(src_go_arrow_cdata_arrow_dir, dst_go_arrow_cdata_dir, recursive = TRUE)) }) diff --git a/r/adbcsnowflake/tools/download-go-vendor-archive.R b/r/adbcsnowflake/tools/download-go-vendor-archive.R index 5c992c40d7..89902efd79 100644 --- a/r/adbcsnowflake/tools/download-go-vendor-archive.R +++ b/r/adbcsnowflake/tools/download-go-vendor-archive.R @@ -25,4 +25,8 @@ if (identical(uri, "")) { cat(sprintf("Downloading vendored dependency archive from %s\n", uri)) unlink("tools/src-go-adbc-vendor.zip") -download.file(uri, "tools/src-go-adbc-vendor.zip") +local({ + opts <- options(timeout = max(300, getOption("timeout"))) + on.exit(options(opts)) + download.file(uri, "tools/src-go-adbc-vendor.zip", mode = "wb") +}) diff --git a/r/adbcsnowflake/tools/verify-go-vendor-archive.R b/r/adbcsnowflake/tools/verify-go-vendor-archive.R new file mode 100644 index 0000000000..6e1a01008e --- /dev/null +++ b/r/adbcsnowflake/tools/verify-go-vendor-archive.R @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +digest_openssl <- function(f) { + con <- file(f, "rb") + on.exit(close(con)) + as.character(as.character(openssl::sha512(con))) +} + +digest_digest <- function(f) { + digest::digest(f, algo = "sha512", file = TRUE) +} + +read_check <- function(f) { + con <- file(f, "rb") + on.exit(close(con)) + scan(con, character(1), n = 1, quiet = TRUE) +} + +verify <- function() { + if (requireNamespace("digest", quietly = TRUE)) { + cat("Using digest::digest() to verify digest\n") + digest <- digest_digest("tools/src-go-adbc-vendor.zip") + } else if (requireNamespace("openssl", quietly = TRUE)) { + cat("Using openssl::sha512() to verify digest\n") + digest <- digest_openssl("tools/src-go-adbc-vendor.zip") + } else { + cat("openssl nor digest package was installed to verify digest\n") + return(FALSE) + } + + digest_check <- read_check("tools/src-go-adbc-vendor.zip.sha512") + result <- identical(digest_check, digest) + + if (isTRUE(result)) { + result + } else { + cat(sprintf("Digest: %s\n", digest)) + cat(sprintf("Check : %s\n", digest_check)) + FALSE + } +} + +result <- try(verify()) + +if (!isTRUE(result) && !interactive()) { + q(status = 1) +} else if (!interactive()) { + q(status = 0) +} diff --git a/r/adbcsqlite/NEWS.md b/r/adbcsqlite/NEWS.md index f77c65de3c..59b9be569b 100644 --- a/r/adbcsqlite/NEWS.md +++ b/r/adbcsqlite/NEWS.md @@ -1,3 +1,11 @@ +# adbcdrivermanager 0.8.0 + +- Update upstream ADBC libraries to version 0.8.0. + +# adbcdrivermanager 0.7.0 + +- Update upstream ADBC libraries to version 0.7.0. + # adbcsqlite 0.6.0 * Initial CRAN submission. diff --git a/r/adbcsqlite/cran-comments.md b/r/adbcsqlite/cran-comments.md index 2d2bbbdc12..906efef731 100644 --- a/r/adbcsqlite/cran-comments.md +++ b/r/adbcsqlite/cran-comments.md @@ -4,6 +4,4 @@ Apache Arrow ADBC libraries version. ## R CMD check results -0 errors | 0 warnings | 1 note - -* This is a new release. +0 errors | 0 warnings | 0 notes