Skip to content

Commit

Permalink
ci: update elasticsearch index with merged PR data
Browse files Browse the repository at this point in the history
A simple workflow that runs when a PR is merged and updates the
elasticsearch index with merged PR info.
The dashboard for displaying the information can be found here:

https://kibana.zephyrproject.io/

Signed-off-by: Anas Nashif <[email protected]>
  • Loading branch information
nashif committed Jan 9, 2024
1 parent fe75bb9 commit 07f1a3d
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/stats_merged_prs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Merged PR stats

on:
pull_request_target:
branches:
- main
- v*-branch
types: [closed]
jobs:
record_merged:
if: github.event.pull_request.merged == true && github.repository == 'zephyrproject-rtos/zephyr'
runs-on: ubuntu-22.04
steps:
- name: checkout
uses: actions/checkout@v3
- name: PR event
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ELASTICSEARCH_KEY: ${{ secrets.ELASTICSEARCH_KEY }}
ELASTICSEARCH_SERVER: "https://elasticsearch.zephyrproject.io:443"
PR_STAT_INDEX: "pr-test-7"
run: |
pip3 install pygithub elasticsearch
python3 ./scripts/ci/stats/merged_prs.py --pull-request ${{ github.event.pull_request.number }} --repo ${{ github.repository }}
137 changes: 137 additions & 0 deletions scripts/ci/stats/merged_prs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# Copyright (c) 2024 Intel Corp.
# SPDX-License-Identifier: Apache-2.0

# Script that operates on a merged PR and sends data to elasticsearch for
# further insepctions using the PR dashboard at
# https://kibana.zephyrproject.io/

import sys
import os
from github import Github
import argparse
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from datetime import timedelta


date_format = '%Y-%m-%d %H:%M:%S'

def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)

parser.add_argument('--pull-request', required=True, help='pull request number', type=int)
parser.add_argument('--repo', required=True, help='github repo')

return parser.parse_args()

def gendata(data, index):
for t in data:
yield {
"_index": index,
"_source": t
}

def main():
args = parse_args()
token = os.environ.get('GITHUB_TOKEN')
if not token:
sys.exit('Github token not set in environment, please set the '
'GITHUB_TOKEN environment variable and retry.')

gh = Github(token)
json_list = []
gh_repo = gh.get_repo(args.repo)

if args.pull_request:
pr = gh_repo.get_pull(args.pull_request)

reviews = pr.get_reviews()
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
assignee_reviews = 0
reviewers = set()
for r in reviews:
if r.user and r.state == 'APPROVED':
reviewers.add(r.user.login)
if pr.assignee and r.user:
if r.user.login == pr.assignee.login:
assignee_reviews = assignee_reviews + 1
prj = {}
# This is all data we get easily though the Github API and serves as the basis
# for displaying some trends and metrics.
# Data can be extended in the future if we find more information that
# is useful through the API

prj['nr'] = pr.number
prj['url'] = pr.url
prj['title'] = pr.title
prj['comments'] = pr.comments
prj['reviews'] = reviews.totalCount
prj['assignees'] = len(pr.assignees)
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
prj['merged_by'] = pr.merged_by.login
prj['submitted_by'] = pr.user.login
prj['changed_files'] = pr.changed_files
prj['additions'] = pr.additions
prj['deletions'] = pr.deletions
prj['commits'] = pr.commits
# The branch we are targeting. main vs release branches.
prj['base'] = pr.base.ref

if pr.assignee:
prj['assignee'] = pr.assignee.login
prj['reviewed_by_assignee'] = "yes" if pr.assignee.login in reviewers else "no"
# Deal with exceptions when assignee approval is not needed.
if pr.assignee.login in reviewers or 'Hotfix' in pr.labels or pr.assignee.login == pr.user.login:
prj['review_rule'] = "yes"
else:
prj['review_rule'] = "no"
else:
prj['assignee'] = "none"
prj['reviewed_by_assignee'] = "na"
prj['review_rule'] = "na"

prj['assignee_reviews'] = assignee_reviews
ll = []
for l in pr.labels:
ll.append(l.name)

prj['labels'] = ll
delta = pr.closed_at - pr.created_at
deltah = delta.total_seconds() / 3600
prj['hours_open'] = deltah

dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))

# Get number of business days per the guidelines, we need at least 2.
res = sum(1 for day in dates if day.weekday() < 5)

if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll):
prj['time_rule'] = False
elif deltah < 4 and 'Trivial' in ll:
prj['time_rule'] = False
else:
prj['time_rule'] = True
prj['reviewers'] = list(reviewers)

json_list.append(prj)


# Send data over to elasticsearch.
es = Elasticsearch(
[os.environ['ELASTICSEARCH_SERVER']],
api_key=os.environ['ELASTICSEARCH_KEY'],
verify_certs=False
)

try:
index = os.environ['PR_STAT_INDEX']
bulk(es, gendata(json_list, index))
except KeyError as e:
print(f"Error: {e} not set.")

if __name__ == "__main__":
main()

0 comments on commit 07f1a3d

Please sign in to comment.