diff --git a/.github/workflows/pmml.yml b/.github/workflows/pmml.yml index 68eb0ef..27313f7 100644 --- a/.github/workflows/pmml.yml +++ b/.github/workflows/pmml.yml @@ -41,3 +41,37 @@ jobs: - name: Run All Steps (1-3) run: | python run_all_steps.py + + - name: Run tests + run: | + python utils/metrics_test.py + + shell: bash + run: | + accuracy=$(grep -Po 'accuracy_score: \K[0-9.]+' Metrics_output.txt) + mae=$(grep -Po 'MAE: \K[0-9.]+' Metrics_output.txt) + echo "Accuracy: $accuracy" + echo "MAE: $mae" + echo "::set-output name=accuracy::$accuracy" + echo "::set-output name=mae::$mae" + + - name: Post results to PR + if: github.event_name == 'pull_request' + uses: actions/github-script@v6 + with: + script: | + const accuracy = "${{ steps.compute_metrics.outputs.accuracy }}"; + const mae = "${{ steps.compute_metrics.outputs.mae }}"; + const body = ` + ### Metrics from latest run: + - **Accuracy**: ${accuracy} + - **MAE**: ${mae} + `; + + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }); + diff --git a/pmml/Metrics_output.txt b/pmml/Metrics_output.txt new file mode 100644 index 0000000..219a18f --- /dev/null +++ b/pmml/Metrics_output.txt @@ -0,0 +1,2 @@ +accuracy_score: 0.75 +MAE: 0.3333333333333333 diff --git a/pmml/utils/Metrics_output.txt b/pmml/utils/Metrics_output.txt new file mode 100644 index 0000000..219a18f --- /dev/null +++ b/pmml/utils/Metrics_output.txt @@ -0,0 +1,2 @@ +accuracy_score: 0.75 +MAE: 0.3333333333333333 diff --git a/pmml/utils/metrics_test.py b/pmml/utils/metrics_test.py new file mode 100644 index 0000000..752cc74 --- /dev/null +++ b/pmml/utils/metrics_test.py @@ -0,0 +1,22 @@ +from sklearn.metrics import accuracy_score, mean_absolute_error +import pandas +from os.path import basename + +# Load the preprocessed test data CSV into a DataFrame +storybooks_csv_path = '../step1_prepare/step1_3_storybooks_test.csv' +storybooks_dataframe = pandas.read_csv(storybooks_csv_path) +val_y = storybooks_dataframe['reading_level'] + +# Load Predicted values from step3_2_predictions.csv +val_predictions = pandas.read_csv('../step3_predict/step3_2_predictions.csv') + +accuracy = accuracy_score(val_y, val_predictions) +print(basename(__file__), f'accuracy_score: {accuracy}') + +mae = mean_absolute_error(val_y, val_predictions) +print(basename(__file__), f'accuracy_score: {mae}') + +# Save the results to a file for the GitHub workflow to read +with open('Metrics_output.txt', 'w') as f: + f.write(f'accuracy_score: {accuracy}\n') + f.write(f'MAE: {mae}\n')