Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scan new versions for existing libraries #552

Merged
merged 23 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
da38f44
Scan new versions for existing libraries
dnestoro Oct 16, 2024
92e4e15
Remove redudant changes left from testing
dnestoro Oct 25, 2024
3e538b2
Fix function call after refactoring
dnestoro Oct 28, 2024
f1f45fb
Fix task registration after refactoring
dnestoro Oct 28, 2024
289954e
Reduce number of jobs to pass github limitations
dnestoro Oct 28, 2024
43df0e0
Use simple matrix
dnestoro Oct 28, 2024
38b848c
Use env variable to specify library version
dnestoro Oct 29, 2024
00006aa
Find update entry based on the latest supported version
dnestoro Oct 29, 2024
ba22a17
Use jackson to parse index file when fetching latest version
dnestoro Oct 30, 2024
2a338c9
Remove unused import
dnestoro Oct 30, 2024
0841c01
Improve workflow steps titles
dnestoro Oct 30, 2024
2f8cac8
Remove suppress warnings after refactoring
dnestoro Oct 30, 2024
39b9898
Extract github limitations as parameter of the gradle task
dnestoro Nov 5, 2024
e9f0783
Remove unused suppress
dnestoro Nov 6, 2024
fd73700
Use different PR branch name
dnestoro Nov 7, 2024
678eff6
Use bash instead of sh to invoke push script
dnestoro Nov 7, 2024
a133fc4
Add comments into the tryPush script
dnestoro Nov 7, 2024
fbba242
Always extract coordinates part in the same way
dnestoro Nov 7, 2024
d76a418
Extract gradle task for fetching newer versions
dnestoro Nov 8, 2024
7a7824d
Properly add Input and Output anotations to the updater task
dnestoro Nov 8, 2024
055b61a
Rename function that extracts information from provided coordinates
dnestoro Nov 8, 2024
2c2ad05
Use abstract getters for properties in the fetching task
dnestoro Nov 8, 2024
2d614b8
Add a doc file that explains how the scan works
dnestoro Nov 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions .github/workflows/check-new-library-versions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
name: "Check new library versions"

# The workflow runs bi-weekly alternating with the scheduled release workflow. This way we have enough time to provide metadata for failing tests.
# In case we need more scans, there is a possibility to trigger the workflow manually.
on:
schedule:
- cron: "0 0 8 * *"
dnestoro marked this conversation as resolved.
Show resolved Hide resolved
- cron: "0 0 22 * *"
workflow_dispatch:

permissions:
contents: write
actions: write

concurrency:
group: "workflow = ${{ github.workflow }}, ref = ${{ github.event.ref }}, pr = ${{ github.event.pull_request.id }}"
cancel-in-progress: true

jobs:
get-all-libraries:
if: github.repository == 'oracle/graalvm-reachability-metadata'
name: "📋 Get list of all supported libraries with newer versions"
permissions: write-all
runs-on: "ubuntu-20.04"
timeout-minutes: 5
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
issue: ${{ steps.set-issue.outputs.issue }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "🔧 Prepare environment"
uses: graalvm/setup-graalvm@v1
with:
java-version: '21'
distribution: 'graalvm'
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: "🕸️ Populate matrix"
id: set-matrix
run: |
./gradlew fetchExistingLibrariesWithNewerVersions --matrixLimit=200
- name: "🔨 Create branch"
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
git switch -C check-new-library-versions/$(date '+%Y-%m-%d')
git push origin check-new-library-versions/$(date '+%Y-%m-%d')
- name: "🔨 Create issue"
id: set-issue
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"

issue_url=$(gh issue create --title "List unsupported library versions" --body "This issue lists unsupported versions of the existing libraries in the repo")
echo "::set-output name=issue::$issue_url"

test-all-metadata:
name: "🧪 ${{ matrix.coordinates }} (GraalVM for JDK ${{ matrix.version }} @ ${{ matrix.os }})"
permissions: write-all
runs-on: ${{ matrix.os }}
timeout-minutes: 20
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
needs: get-all-libraries
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get-all-libraries.outputs.matrix) }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "🔧 Setup java"
uses: actions/setup-java@v4
with:
distribution: 'oracle'
java-version: '21'
- name: "🔧 Prepare environment"
uses: graalvm/setup-graalvm@v1
with:
set-java-home: 'false'
java-version: ${{ matrix.version }}
distribution: 'graalvm'
github-token: ${{ secrets.GITHUB_TOKEN }}
native-image-job-reports: 'true'
- name: "Extract test path and library version"
run: |
LIBRARY_PATH=$(echo ${{ matrix.coordinates }} | cut -d ':' -f1-2 | sed 's/:/\//g')
LATEST_VERSION=$(find tests/src/$LIBRARY_PATH/* -maxdepth 1 -type d | sort -V | tail -1 | cut -d '/' -f5)
TEST_PATH="$LIBRARY_PATH/$LATEST_VERSION"
TEST_COORDINATES=$(echo "$TEST_PATH" | tr / :)

echo "LATEST_VERSION=$LATEST_VERSION" >> ${GITHUB_ENV}
echo "TEST_PATH=$TEST_PATH" >> ${GITHUB_ENV}
echo "TEST_COORDINATES=$TEST_COORDINATES" >> ${GITHUB_ENV}
- name: "Pull allowed docker images"
run: |
./gradlew pullAllowedDockerImages --coordinates=${{ env.TEST_COORDINATES }}
- name: "Disable docker"
run: |
sudo apt-get install openbsd-inetd
sudo bash -c "cat ./.github/workflows/discard-port.conf >> /etc/inetd.conf"
sudo systemctl start inetd
sudo mkdir /etc/systemd/system/docker.service.d
sudo bash -c "cat ./.github/workflows/dockerd.service > /etc/systemd/system/docker.service.d/http-proxy.conf"
sudo systemctl daemon-reload
sudo systemctl restart docker
- name: "🧪 Run '${{ env.TEST_COORDINATES }}' tests"
run: |
TESTING_VERSION=$(echo ${{ matrix.coordinates }} | cut -d ":" -f3)
export GVM_TCK_LV=$TESTING_VERSION

./gradlew test -Pcoordinates=${{ env.TEST_COORDINATES }}
- name: "✔️ New library is supported"
if: success()
run: |
bash ./.github/workflows/tryPushVersionsUpdate.sh ${{ matrix.coordinates }} ${{ env.LATEST_VERSION }}
- name: "❗ New library is not supported"
if: failure()
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
gh issue comment "${{ needs.get-all-libraries.outputs.issue }}" --body "${{ matrix.coordinates }}"

process-results:
name: "🧪 Process results"
runs-on: "ubuntu-20.04"
if: ${{ always() }}
needs:
- get-all-libraries
- test-all-metadata
permissions: write-all
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "✏️ PR for supported versions"
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
git fetch origin check-new-library-versions/$(date '+%Y-%m-%d')
git checkout check-new-library-versions/$(date '+%Y-%m-%d')
gh pr create --title "Update supported library versions" --body "This pull request updates supported versions of the existing libraries in the repo"
46 changes: 46 additions & 0 deletions .github/workflows/tryPushVersionsUpdate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
:' This script tries to run addTestedVersion gradle task which adds new version in the tested-versions list of the proper index.json file.
Since the script could be executed from multiple parallel jobs, we want to avoid two things here: overwriting of previous changes and merge conflicts.
To prevent overwriting of changes that some job already created, we only push changes from the current job if we are 0 commits behind the origin branch.
Once that is achieved, we can try to push changes.
If the push was rejected because of a merge conflict, we are: removing changes of the current job, rebasing, and doing the process again until it succeeds.
'

set -x

git config --local user.email "[email protected]"
git config --local user.name "Github Actions"

BRANCH="check-new-library-versions/$(date '+%Y-%m-%d')"
git fetch origin "$BRANCH"
git checkout "$BRANCH"

while [ true ]
dnestoro marked this conversation as resolved.
Show resolved Hide resolved
do
# update the list of tested versions
./gradlew addTestedVersion --coordinates="$1" --lastSupportedVersion="$2"

# commit changes
git add -u
git commit -m "$1"

# only push changes if we are not behind the remote branch
if [ "$(git rev-list --count origin/$BRANCH --not $BRANCH)" -eq 0 ]
then
# try to push changes
git push origin "$BRANCH"
PUSH_RETVAL=$?
if [ "$PUSH_RETVAL" -eq 0 ]
then
# if the push was successful, we can exit the loop
break
fi
fi

# we are either behind the remote branch or we have a merge conflict => remove changes and rebase accepting incoming changes
git reset --hard HEAD~1
git fetch origin "$BRANCH"
git rebase -X theirs "origin/$BRANCH"
done


51 changes: 51 additions & 0 deletions docs/Infrastructure/check-new-versions-of-libraries.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Check new versions of existing libraries in the repository

As the number of libraries in the repository grow fast, it is hard to track new library versions for every library manually.
Instead of doing this process manually, we provided a mechanism (through [a GitHub workflow](https://github.com/oracle/graalvm-reachability-metadata/blob/master/.github/workflows/check-new-library-versions.yml))
that automatically scans MavenCentral repository for new versions of the libraries that we currently have.

## How it works

The workflow gets triggered every two weeks automatically (alternating to the automatic release weeks). Besides that, the job can be triggered manually from the GitHub actions.
The whole process consists of the following parts:
* Scanning of the MavenCentral
* Running existing tests with newer versions of the library
* Creating a pull-request that updates `tested-versions` field of the `index.json` file for libraries that passed tests with a new version
* Creating an issue that lists all versions of libraries that failed their existing tests.

As a preparation for the whole process, we are creating a branch for all successful tests, and a single issue for all failed tests.

### Scanning the MavenCentral

At first, the workflow runs gradle task called `fetchExistingLibrariesWithNewerVersions`.
The task itself does the following:
1. Gets the list of all existing libraries in the repository
2. For each library, it searches for the latest tested version in the corresponding library `index.json` file
3. For the given library name, it fetches `maven-metadata.xml` file from the MavenCentral repository
4. In the fetched `maven-metadata.xml` file, it finds the position of the latest tested version (gathered in the step 3) and returns all the versions after it
5. As a last step, the task returns list of maven coordinates of libraries with newer versions (alongside java version and os version required for testing)

### Running existing tests with newer versions

Now that we have coordinates list, we are spawning a new job in GitHub workflow for each coordinate in the list.
Each of the spawned jobs:
1. Extracts the following parts from the given maven coordinates:
1. Latest version that we have tests written for
2. Path to the latest tests we have
3. Maven coordinates of the latest tests
2. Sets `GVM_TCK_LV` env variable to the version we want to test. This way the executed tests will use library version specified in the env variable.
3. Run the latest test with `./gradlew test -Pcoordinates=<testCoordinates>` (with `testCoordinates` calculated in the step 1)

### Aggregating results of the tests

Based on the outcome of the test we:
* Update the list of `tested-versions` in the proper library `index.json` file and commit changes to the previously created branch, if the test passed
* Add a comment that explains which library version cannot pass the tests, in the issue we previously created

Note: since the spawned jobs run tests in parallel, we have to make some kind of synchronization to avoid merge conflicts if two tests are populating the same `index.json` file.
The whole process of synchronization is driven by the [tryPushVersionsUpdate](https://github.com/oracle/graalvm-reachability-metadata/blob/master/.github/workflows/tryPushVersionsUpdate.sh) script.

At the end, when all jobs have finished their executions, the workflow just creates a pull-request based on a branch the jobs committed to.
As a final result, we have:
* a pull-request with updates of all new tested versions
* an issue with list of all versions that doesn't work with existing metadata
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import org.graalvm.internal.tck.DockerTask
import org.graalvm.internal.tck.ConfigFilesChecker
import org.graalvm.internal.tck.ScaffoldTask
import org.graalvm.internal.tck.GrypeTask
import org.graalvm.internal.tck.TestedVersionUpdaterTask
import org.graalvm.internal.tck.harness.tasks.CheckstyleInvocationTask
import org.graalvm.internal.tck.harness.tasks.FetchExistingLibrariesWithNewerVersionsTask
import org.graalvm.internal.tck.harness.tasks.TestInvocationTask


Expand Down Expand Up @@ -161,6 +163,14 @@ Provider<Task> generateMatrixDiffCoordinates = tasks.register("generateMatrixDif
}
}

// groovy tasks
tasks.register("fetchExistingLibrariesWithNewerVersions", FetchExistingLibrariesWithNewerVersionsTask.class) { task ->
task.setGroup(METADATA_GROUP)
task.setDescription("Returns list of all libraries coordinates")
task.setAllLibraryCoordinates(matchingCoordinates)
}

// java tasks
tasks.register("checkAllowedDockerImages", GrypeTask.class) { task ->
task.setDescription("Returns list of allowed docker images")
task.setGroup(METADATA_GROUP)
Expand All @@ -182,3 +192,8 @@ tasks.register("checkConfigFiles", ConfigFilesChecker.class) { task ->
task.setDescription("Checks content of config files for a new library.")
task.setGroup(METADATA_GROUP)
}

tasks.register("addTestedVersion", TestedVersionUpdaterTask.class) { task ->
task.setDescription("Updates list of tested versions.")
task.setGroup(METADATA_GROUP)
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,33 @@
*/
package org.graalvm.internal.tck.harness;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.graalvm.internal.tck.model.MetadataVersionsIndexEntry;
import org.gradle.api.Project;
import org.gradle.api.file.Directory;
import org.gradle.api.file.DirectoryProperty;
import org.gradle.api.file.FileSystemLocation;
import org.gradle.api.provider.Property;
import org.gradle.api.provider.Provider;
import org.gradle.process.ExecOperations;
import org.gradle.util.internal.VersionNumber;

import javax.inject.Inject;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -299,9 +301,7 @@ List<String> getMatchingCoordinates(String coordinateFilter) {
String artifactId = strings.get(1);
String version = strings.get(2);


Set<String> matchingCoordinates = new HashSet<>();

for (String directory : getMatchingMetadataDirs(groupId, artifactId)) {
Path index = metadataRoot().resolve(directory).resolve("index.json");
List<Map<String, ?>> metadataIndex = (List<Map<String, ?>>) extractJsonFile(index);
Expand All @@ -322,7 +322,8 @@ List<String> getMatchingCoordinates(String coordinateFilter) {
}
}
}
return matchingCoordinates.stream().collect(Collectors.toList());

return new ArrayList<>(matchingCoordinates);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ abstract class AbstractSubprojectTask extends DefaultTask {
// Environment variables for setting up TCK
env.put("GVM_TCK_LC", coordinates)
env.put("GVM_TCK_EXCLUDE", override.toString())
env.put("GVM_TCK_LV", version)
if (System.getenv("GVM_TCK_LV") == null) {
// we only set this env variable if user didn't specify it manually
env.put("GVM_TCK_LV", version)
}
env.put("GVM_TCK_MD", metadataDir.toAbsolutePath().toString())
env.put("GVM_TCK_TCKDIR", tckExtension.getTckRoot().get().getAsFile().toPath().toAbsolutePath().toString())
spec.environment(env)
Expand Down
Loading
Loading