Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: update elasticsearch index with merged PR data #67332

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/stats_merged_prs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Merged PR stats

on:
pull_request_target:
branches:
- main
- v*-branch
types: [closed]
jobs:
record_merged:
if: github.event.pull_request.merged == true && github.repository == 'zephyrproject-rtos/zephyr'
runs-on: ubuntu-22.04
steps:
- name: checkout
uses: actions/checkout@v3
Comment on lines +14 to +15
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the first run of this workflow is going to fail because it will check out the base branch (main) instead of the pull request head (github.event.pull_request.head.sha) -- that is a good thing because it does not expose ELASTICSEARCH_KEY wide open if someone decides to change types: [closed] to something else in the future.

- name: PR event
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ELASTICSEARCH_KEY: ${{ secrets.ELASTICSEARCH_KEY }}
ELASTICSEARCH_SERVER: "https://elasticsearch.zephyrproject.io:443"
PR_STAT_INDEX: "pr-test-7"
run: |
pip3 install pygithub elasticsearch
python3 ./scripts/ci/stats/merged_prs.py --pull-request ${{ github.event.pull_request.number }} --repo ${{ github.repository }}
157 changes: 157 additions & 0 deletions scripts/ci/stats/merged_prs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/usr/bin/env python3
# Copyright (c) 2024 Intel Corp.
# SPDX-License-Identifier: Apache-2.0

# Script that operates on a merged PR and sends data to elasticsearch for
# further insepctions using the PR dashboard at
# https://kibana.zephyrproject.io/

import sys
import os
from github import Github
import argparse
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from datetime import timedelta


date_format = '%Y-%m-%d %H:%M:%S'

def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)

parser.add_argument('--pull-request', required=True, help='pull request number', type=int)
parser.add_argument('--repo', required=True, help='github repo')

return parser.parse_args()

def gendata(data, index):
for t in data:
yield {
"_index": index,
"_source": t
}

def main():
args = parse_args()
token = os.environ.get('GITHUB_TOKEN')
if not token:
sys.exit('Github token not set in environment, please set the '
'GITHUB_TOKEN environment variable and retry.')

gh = Github(token)
json_list = []
gh_repo = gh.get_repo(args.repo)

if args.pull_request:
pr = gh_repo.get_pull(args.pull_request)

reviews = pr.get_reviews()
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
assignee_reviews = 0
reviewers = set()
prj = {}
for r in reviews:
if r.user and r.state == 'APPROVED':
reviewers.add(r.user.login)
if pr.assignees and r.user:
for assignee in pr.assignees:
if r.user.login == assignee.login:
assignee_reviews = assignee_reviews + 1
# was reviewed at least by one assignee
prj['reviewed_by_assignee'] = "yes"

# This is all data we get easily though the Github API and serves as the basis
# for displaying some trends and metrics.
# Data can be extended in the future if we find more information that
# is useful through the API

prj['nr'] = pr.number
prj['url'] = pr.url
prj['title'] = pr.title
prj['comments'] = pr.comments
prj['reviews'] = reviews.totalCount
prj['assignees'] = len(pr.assignees)
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
prj['merged_by'] = pr.merged_by.login
prj['submitted_by'] = pr.user.login
prj['changed_files'] = pr.changed_files
prj['additions'] = pr.additions
prj['deletions'] = pr.deletions
prj['commits'] = pr.commits
# The branch we are targeting. main vs release branches.
prj['base'] = pr.base.ref

ll = []
for l in pr.labels:
ll.append(l.name)
prj['labels'] = ll

# take first assignee, otherwise we have no assignees and this rule is not applicable
if pr.assignee:
prj['assignee'] = pr.assignee.login
henrikbrixandersen marked this conversation as resolved.
Show resolved Hide resolved
else:
prj['assignee'] = "none"
prj['reviewed_by_assignee'] = "na"
prj['review_rule'] = "na"

# go through all assignees and check if anyone has approved and reset assignee to the one who approved
for assignee in pr.assignees:
if assignee.login in reviewers:
prj['assignee'] = assignee.login
elif assignee.login == pr.user.login:
prj['reviewed_by_assignee'] = "yes"


# list assignees for later checks
assignees = [a.login for a in pr.assignees]

# Deal with exceptions when assignee approval is not needed.
if 'Trivial' in ll or 'Hotfix' in ll:
prj['review_rule'] = "yes"
elif pr.merged_by.login in assignees:
prj['review_rule'] = "yes"
else:
prj['review_rule'] = "no"

prj['assignee_reviews'] = assignee_reviews

delta = pr.closed_at - pr.created_at
deltah = delta.total_seconds() / 3600
prj['hours_open'] = deltah

dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))

# Get number of business days per the guidelines, we need at least 2.
res = sum(1 for day in dates if day.weekday() < 5)

if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll):
prj['time_rule'] = False
elif deltah < 4 and 'Trivial' in ll:
prj['time_rule'] = False
else:
prj['time_rule'] = True
prj['reviewers'] = list(reviewers)

json_list.append(prj)


# Send data over to elasticsearch.
es = Elasticsearch(
[os.environ['ELASTICSEARCH_SERVER']],
api_key=os.environ['ELASTICSEARCH_KEY'],
verify_certs=False
)

try:
index = os.environ['PR_STAT_INDEX']
bulk(es, gendata(json_list, index))
except KeyError as e:
print(f"Error: {e} not set.")
print(json_list)

if __name__ == "__main__":
main()
Loading