Skip to content

Commit

Permalink
Merge branch 'master' into add-rag-ingestion-enrichment
Browse files Browse the repository at this point in the history
  • Loading branch information
Claude committed Jan 9, 2025
2 parents 232461b + d50cc15 commit b1a8f89
Show file tree
Hide file tree
Showing 125 changed files with 2,694 additions and 701 deletions.
3 changes: 3 additions & 0 deletions .github/REVIEWERS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ labels:
reviewers:
- igorbernstein2
- mutianf
- djyau
- andre-sampaio
- meeral-k
exclusionList: []
- name: healthcare
reviewers:
Expand Down
28 changes: 27 additions & 1 deletion .github/actions/setup-environment-action/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,41 @@ inputs:
required: false
description: 'Whether to disable the gradle cache'
default: false
python-cache:
required: false
description: 'Whether to enable Python pip caching'
default: true
tox-cache:
required: false
description: 'Whether to enable tox environment caching'
default: true

runs:
using: "composite"
steps:
- name: Install Python
if: ${{ inputs.python-version != '' }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version == 'default' && '3.9' || inputs.python-version }}
cache: ${{ inputs.python-cache && 'pip' || 'none' }}
cache-dependency-path: |
sdks/python/setup.py
sdks/python/tox.ini
- name: Cache tox environments
if: ${{ inputs.python-version != '' && inputs.tox-cache == 'true' }}
uses: actions/cache@v3
with:
path: |
sdks/python/target/.tox
!sdks/python/target/.tox/**/log
!sdks/python/target/.tox/.package_cache
key: tox-${{ runner.os }}-py${{ inputs.python-version == 'default' && '39' || inputs.python-version }}-${{ hashFiles('sdks/python/tox.ini') }}-${{ hashFiles('sdks/python/setup.py') }}
restore-keys: |
tox-${{ runner.os }}-py${{ inputs.python-version == 'default' && '39' || inputs.python-version }}-${{ hashFiles('sdks/python/tox.ini') }}-
tox-${{ runner.os }}-py${{ inputs.python-version == 'default' && '39' || inputs.python-version }}-
- name: Install Java
if: ${{ inputs.java-version != '' }}
uses: actions/setup-java@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"comment": "Modify this file in a trivial way to cause this test suite to run.",
"modification": 1
}
2 changes: 1 addition & 1 deletion .github/trigger_files/beam_PostCommit_Java_DataflowV2.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 3
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 3
}
95 changes: 95 additions & 0 deletions .github/workflows/beam_Publish_Java_SDK_Distroless_Snapshots.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Publish Beam Java SDK Distroless Snapshots

on:
schedule:
- cron: '45 */8 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.sender.login }}'
cancel-in-progress: true

env:
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
docker_registry: gcr.io

jobs:
Java_SDK_Distroless_Snapshots:
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'apache/beam')
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 160
name: ${{ matrix.job_name }} (${{ matrix.java_version }})
strategy:
fail-fast: false
matrix:
job_name: ["Java_SDK_Distroless_Snapshots"]
job_phrase: ["N/A"]
java_version:
- "java17"
- "java21"
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.java_version }})
- name: Find Beam Version
# We extract the Beam version here and tag the containers with it. Version will be in the form "2.xx.y.dev".
# This is needed to run pipelines that use the default environment at HEAD, for example, when a
# pipeline uses an expansion service built from HEAD.
run: |
BEAM_VERSION_LINE=$(cat gradle.properties | grep "sdk_version")
echo "BEAM_VERSION=${BEAM_VERSION_LINE#*sdk_version=}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker ${{ env.docker_registry }}
- name: Build and push Java distroless image
run: |
docker buildx build --push \
-t gcr.io/apache-beam-testing/beam-sdk/beam_${{ matrix.java_version }}_sdk_distroless:${{ github.sha }} \
-t gcr.io/apache-beam-testing/beam-sdk/beam_${{ matrix.java_version }}_sdk_distroless:${BEAM_VERSION} \
-t gcr.io/apache-beam-testing/beam-sdk/beam_${{ matrix.java_version }}_sdk_distroless:latest \
-f sdks/java/container/Dockerfile-distroless \
--build-arg=BEAM_BASE=gcr.io/apache-beam-testing/beam-sdk/beam_${{ matrix.java_version }}_sdk:${BEAM_VERSION} \
--build-arg=DISTROLESS_BASE=gcr.io/distroless/${{ matrix.java_version }}-debian12 \
.
92 changes: 92 additions & 0 deletions .github/workflows/beam_Publish_Python_SDK_Distroless_Snapshots.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Publish Beam Python SDK Distroless Snapshots

on:
schedule:
- cron: '45 */8 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.sender.login }}'
cancel-in-progress: true

env:
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
docker_registry: gcr.io

jobs:
Python_SDK_Distroless_Snapshots:
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'apache/beam')
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 160
name: ${{ matrix.job_name }} (${{ matrix.python_version }})
strategy:
fail-fast: false
matrix:
job_name: ["Python_SDK_Distroless_Snapshots"]
job_phrase: ["N/A"]
python_version:
- "python3.9"
- "python3.10"
- "python3.11"
- "python3.12"
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.python_version }})
- name: Find Beam Version
# We extract the Beam version here and tag the containers with it. Version will be in the form "2.xx.y.dev".
# This is needed to run pipelines that use the default environment at HEAD, for example, when a
# pipeline uses an expansion service built from HEAD.
run: |
BEAM_VERSION_LINE=$(cat gradle.properties | grep "sdk_version")
echo "BEAM_VERSION=${BEAM_VERSION_LINE#*sdk_version=}" >> $GITHUB_ENV
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker ${{ env.docker_registry }}
# TODO(https://github.com/apache/beam/issues/32914): create after merging into main branch
# - name: Build and push Python distroless image



3 changes: 2 additions & 1 deletion .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,15 @@ jobs:
runs-on: ${{ matrix.os_python.runner }}
timeout-minutes: 480
strategy:
fail-fast: false
matrix:
os_python: [
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-full }}", arch: "auto" },
# Temporarily pin to macos-13 because macos-latest breaks this build
# TODO(https://github.com/apache/beam/issues/31114)
{"os": "macos-13", "runner": "macos-13", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" },
{"os": "windows-latest", "runner": "windows-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "auto" },
{"os": "ubuntu-20.04", "runner": [self-hosted, ubuntu-20.04, main], "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "aarch64" }
{"os": "ubuntu-20.04", "runner": "ubuntu-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}", arch: "aarch64" }
]
# Keep in sync (remove asterisks) with PY_VERSIONS_FULL env var above - if changed, change that as well.
py_version: ["cp39-", "cp310-", "cp311-", "cp312-"]
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ jobs:
matrix:
os: [macos-latest, windows-latest]
params: [
{"py_ver": "3.9", "tox_env": "py39"},
{"py_ver": "3.10", "tox_env": "py310" },
{ "py_ver": "3.9", "tox_env": "py39" },
{ "py_ver": "3.10", "tox_env": "py310" },
{ "py_ver": "3.11", "tox_env": "py311" },
{ "py_ver": "3.12", "tox_env": "py312" },
]
Expand Down
20 changes: 13 additions & 7 deletions .github/workflows/republish_released_docker_containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@ on:
inputs:
RELEASE:
description: Beam version of current release (e.g. 2.XX.0)
required: true
default: ''
required: false
RC:
description: Integer RC version for the release (e.g. 3 for RC3)
required: true
default: ''
required: false
schedule:
- cron: "0 6 * * 1"
env:
docker_registry: gcr.io
release: ${{ github.event.inputs.RELEASE || "2.61.0" }}
rc: ${{ github.event.inputs.RC || "3" }}
release: "${{ github.event.inputs.RELEASE || '2.61.0' }}"
rc: "${{ github.event.inputs.RC || '3' }}"

jobs:

Expand Down Expand Up @@ -69,5 +67,13 @@ jobs:
run: |
gcloud auth configure-docker ${{ env.docker_registry }}
- name: Push docker images
run: ./gradlew :pushAllDockerImages -PisRelease -Pdocker-pull-licenses -Pprune-images -Pdocker-repository-root=gcr.io/apache-beam-testing/updated_released_container_images -Pdocker-tag=${{ env.release }}rc${{ env.rc }} --no-daemon --no-parallel
run: |
./gradlew :pushAllDockerImages \
-PisRelease \
-Pdocker-pull-licenses \
-Pprune-images \
-Pdocker-repository-root=gcr.io/apache-beam-testing/updated_released_container_images \
-Pdocker-tag-list=${{ env.release }},${{ github.sha }},$(date +'%Y-%m-%d') \
--no-daemon \
--no-parallel
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ sdks/**/vendor/**/*
runners/**/vendor/**/*
**/.gradletasknamecache
**/generated/*
/go.mod
/go.sum

# Ignore sources generated into the main tree
**/src/main/generated/**
Expand Down
4 changes: 2 additions & 2 deletions .test-infra/jenkins/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ task generateMetricsReport {
doLast {
exec {
executable 'sh'
args '-c', ". ${envdir}/bin/activate && tox -e py38-test -c ${toxConfigFilePath}"
args '-c', ". ${envdir}/bin/activate && tox -e py39-test -c ${toxConfigFilePath}"
}
exec {
executable 'sh'
args '-c', ". ${envdir}/bin/activate && tox -e py38-generate-report -c ${toxConfigFilePath} -- --influx-db=${influxDb} --influx-host=${influxHost} --influx-port=${influxPort} --output-file=${generateMetricsReportPath}"
args '-c', ". ${envdir}/bin/activate && tox -e py39-generate-report -c ${toxConfigFilePath} -- --influx-db=${influxDb} --influx-host=${influxHost} --influx-port=${influxPort} --output-file=${generateMetricsReportPath}"
}
logger.info('Create metrics report file {}', generateMetricsReportPath)
}
Expand Down
4 changes: 2 additions & 2 deletions .test-infra/jenkins/metrics_report/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ commands_pre =
pip --version
pip check

[testenv:py38-test]
[testenv:py39-test]
deps = -r requirements.txt
passenv = WORKSPACE,INFLUXDB_USER,INFLUXDB_USER_PASSWORD
commands = python -m unittest dashboards_parser.py

[testenv:py38-generate-report]
[testenv:py39-generate-report]
deps = -r requirements.txt
passenv = WORKSPACE,INFLUXDB_USER,INFLUXDB_USER_PASSWORD,GITHUB_WORKSPACE
commands = python report_generator.py {posargs}
5 changes: 4 additions & 1 deletion .test-infra/tools/stale_k8s_workload_cleaner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ function should_teardown() {
gcloud container clusters get-credentials io-datastores --zone us-central1-a --project apache-beam-testing

while read NAME STATUS AGE; do
if [[ $NAME =~ ^beam-.+(test|-it) ]] && should_teardown $AGE; then
# Regex has temporary workaround to avoid trying to delete beam-performancetests-singlestoreio-* to avoid getting stuck in a terminal state
# See https://github.com/apache/beam/pull/33545 for context.
# This may be safe to remove if https://cloud.google.com/knowledge/kb/deleted-namespace-remains-in-terminating-status-000004867 has been resolved, just try it before checking in :)
if [[ $NAME =~ ^beam-.+(test|-it)(?!s-singlestoreio) ]] && should_teardown $AGE; then
kubectl delete namespace $NAME
fi
done < <( kubectl get namespaces --context=gke_${PROJECT}_${LOCATION}_${CLUSTER} )
Loading

0 comments on commit b1a8f89

Please sign in to comment.