Skip to content

Commit

Permalink
ci: update elasticsearch index with merged PR data
Browse files Browse the repository at this point in the history
A simple workflow that runs when a PR is merged and updates the
elasticsearch index with merged PR info.
The dashboard for displaying the information can be found here:

https://kibana.zephyrproject.io/

Signed-off-by: Anas Nashif <[email protected]>
  • Loading branch information
nashif committed Jan 10, 2024
1 parent c1a1e45 commit 29a4e04
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/stats_merged_prs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Merged PR stats

on:
pull_request_target:
branches:
- main
- v*-branch
types: [closed]
jobs:
record_merged:
if: github.event.pull_request.merged == true && github.repository == 'zephyrproject-rtos/zephyr'
runs-on: ubuntu-22.04
steps:
- name: checkout
uses: actions/checkout@v3
- name: PR event
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ELASTICSEARCH_KEY: ${{ secrets.ELASTICSEARCH_KEY }}
ELASTICSEARCH_SERVER: "https://elasticsearch.zephyrproject.io:443"
PR_STAT_INDEX: "pr-test-7"
run: |
pip3 install pygithub elasticsearch
python3 ./scripts/ci/stats/merged_prs.py --pull-request ${{ github.event.pull_request.number }} --repo ${{ github.repository }}
157 changes: 157 additions & 0 deletions scripts/ci/stats/merged_prs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/usr/bin/env python3
# Copyright (c) 2024 Intel Corp.
# SPDX-License-Identifier: Apache-2.0

# Script that operates on a merged PR and sends data to elasticsearch for
# further insepctions using the PR dashboard at
# https://kibana.zephyrproject.io/

import sys
import os
from github import Github
import argparse
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from datetime import timedelta


date_format = '%Y-%m-%d %H:%M:%S'

def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)

parser.add_argument('--pull-request', required=True, help='pull request number', type=int)
parser.add_argument('--repo', required=True, help='github repo')

return parser.parse_args()

def gendata(data, index):
for t in data:
yield {
"_index": index,
"_source": t
}

def main():
args = parse_args()
token = os.environ.get('GITHUB_TOKEN')
if not token:
sys.exit('Github token not set in environment, please set the '
'GITHUB_TOKEN environment variable and retry.')

gh = Github(token)
json_list = []
gh_repo = gh.get_repo(args.repo)

if args.pull_request:
pr = gh_repo.get_pull(args.pull_request)

reviews = pr.get_reviews()
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
assignee_reviews = 0
reviewers = set()
prj = {}
for r in reviews:
if r.user and r.state == 'APPROVED':
reviewers.add(r.user.login)
if pr.assignees and r.user:
for assignee in pr.assignees:
if r.user.login == assignee.login:
assignee_reviews = assignee_reviews + 1
# was reviewed at least by one assignee
prj['reviewed_by_assignee'] = "yes"

# This is all data we get easily though the Github API and serves as the basis
# for displaying some trends and metrics.
# Data can be extended in the future if we find more information that
# is useful through the API

prj['nr'] = pr.number
prj['url'] = pr.url
prj['title'] = pr.title
prj['comments'] = pr.comments
prj['reviews'] = reviews.totalCount
prj['assignees'] = len(pr.assignees)
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
prj['merged_by'] = pr.merged_by.login
prj['submitted_by'] = pr.user.login
prj['changed_files'] = pr.changed_files
prj['additions'] = pr.additions
prj['deletions'] = pr.deletions
prj['commits'] = pr.commits
# The branch we are targeting. main vs release branches.
prj['base'] = pr.base.ref

ll = []
for l in pr.labels:
ll.append(l.name)
prj['labels'] = ll

# take first assignee, otherwise we have no assignees and this rule is not applicable
if pr.assignee:
prj['assignee'] = pr.assignee.login
else:
prj['assignee'] = "none"
prj['reviewed_by_assignee'] = "na"
prj['review_rule'] = "na"

# go through all assignees and check if anyone has approved and reset assignee to the one who approved
for assignee in pr.assignees:
if assignee.login in reviewers:
prj['assignee'] = assignee.login
elif assignee.login == pr.user.login:
prj['reviewed_by_assignee'] = "yes"


# list assignees for later checks
assignees = [a.login for a in pr.assignees]

# Deal with exceptions when assignee approval is not needed.
if 'Trivial' in ll or 'Hotfix' in ll:
prj['review_rule'] = "yes"
elif pr.merged_by.login in assignees:
prj['review_rule'] = "yes"
else:
prj['review_rule'] = "no"

prj['assignee_reviews'] = assignee_reviews

delta = pr.closed_at - pr.created_at
deltah = delta.total_seconds() / 3600
prj['hours_open'] = deltah

dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))

# Get number of business days per the guidelines, we need at least 2.
res = sum(1 for day in dates if day.weekday() < 5)

if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll):
prj['time_rule'] = False
elif deltah < 4 and 'Trivial' in ll:
prj['time_rule'] = False
else:
prj['time_rule'] = True
prj['reviewers'] = list(reviewers)

json_list.append(prj)


# Send data over to elasticsearch.
es = Elasticsearch(
[os.environ['ELASTICSEARCH_SERVER']],
api_key=os.environ['ELASTICSEARCH_KEY'],
verify_certs=False
)

try:
index = os.environ['PR_STAT_INDEX']
bulk(es, gendata(json_list, index))
except KeyError as e:
print(f"Error: {e} not set.")
print(json_list)

if __name__ == "__main__":
main()

0 comments on commit 29a4e04

Please sign in to comment.