From eeddf7e322434d43fca37ba110aed3476be5baac Mon Sep 17 00:00:00 2001 From: sherlock-admin4 <162441180+sherlock-admin4@users.noreply.github.com> Date: Thu, 31 Oct 2024 23:39:00 +0100 Subject: [PATCH] Initial commit --- .data/sync_issues.py | 409 ++++++++++++++++++++ .data/validate_changes.py | 70 ++++ .data/validate_filesystem.py | 144 +++++++ .github/workflows/sync-issues.yml | 19 + .github/workflows/validate-judging-repo.yml | 43 ++ .gitignore | 10 + comments.csv | 1 + 7 files changed, 696 insertions(+) create mode 100644 .data/sync_issues.py create mode 100644 .data/validate_changes.py create mode 100644 .data/validate_filesystem.py create mode 100644 .github/workflows/sync-issues.yml create mode 100644 .github/workflows/validate-judging-repo.yml create mode 100644 .gitignore create mode 100644 comments.csv diff --git a/.data/sync_issues.py b/.data/sync_issues.py new file mode 100644 index 0000000..9d88381 --- /dev/null +++ b/.data/sync_issues.py @@ -0,0 +1,409 @@ +import datetime +import os +import re +import time +from functools import lru_cache, wraps + +from github import ContentFile, Github, Issue, Repository +from github.GithubException import ( + GithubException, + RateLimitExceededException, + UnknownObjectException, +) + +token = os.environ.get("GITHUB_TOKEN") +github = Github(token) + +exception_filenames = [ + ".data", + ".git", + ".github", + "README.md", + "Audit_Report.pdf", + "comments.csv", + ".gitkeep", +] + + +def github_retry_on_rate_limit(func): + @wraps(func) + def inner(*args, **kwargs): + global github + while True: + try: + return func(*args, **kwargs) + except RateLimitExceededException: + print("Rate Limit hit.") + rl = github.get_rate_limit() + time_to_sleep = int( + (rl.core.reset - datetime.datetime.utcnow()).total_seconds() + 1 + ) + print("Sleeping for %s seconds" % time_to_sleep) + time.sleep(time_to_sleep) + + return inner + + +class IssueExtended(Issue.Issue): + @classmethod + def cast(cls, issue: Issue): + issue.__class__ = IssueExtended + + for func in ["edit"]: + setattr(issue, func, github_retry_on_rate_limit(getattr(issue, func))) + return issue + + +class RepositoryExtended(Repository.Repository): + @classmethod + def cast(cls, repo: Repository.Repository): + repo.__class__ = RepositoryExtended + + for func in [ + "create_issue", + "get_contents", + "get_issue", + "get_labels", + "create_label", + ]: + setattr(repo, func, github_retry_on_rate_limit(getattr(repo, func))) + return repo + + +class ContentFileExtended(ContentFile.ContentFile): + @classmethod + def cast(cls, content_file: ContentFile): + content_file.__class__ = ContentFileExtended + + for func in ["_completeIfNotSet"]: + setattr( + content_file, + func, + github_retry_on_rate_limit(getattr(content_file, func)), + ) + return content_file + + +class GithubExtended(Github): + @classmethod + def cast(cls, github: Github): + github.__class__ = GithubExtended + + for func in ["get_repo"]: + setattr(github, func, github_retry_on_rate_limit(getattr(github, func))) + return github + + +github = GithubExtended.cast(github) + +# Issues list. Each issue is in the format: +# { +# "id": 1, # corresponds to the issue 001 +# "parent": 5, # corresponds to the issue 005 => issue is duplicate of 005 +# "closed": True, # True for a closed or duplicate issue +# "auditor": "rcstanciu", +# "severity": "H", # Possible values: "H", "M" or "false" +# "title": "Issue title", +# "body": "Issue body", +# "has_duplicates": True, +# } +issues = {} + + +def process_directory(repo, path): + global issues + + print(f"[+] Processing directory /{path}") + + path_items = [ + x for x in repo.get_contents(path) if x.name not in exception_filenames + ] + dirs = [x for x in path_items if x.type == "dir"] + files = [x for x in path_items if x.type != "dir"] + + # Root issues are closed by default + closed = ( + True + if path == "" + else any(x in path.lower() for x in ["low", "false", "invalid"]) + ) + severity = "Invalid" + + if not closed: + directory_severity = None + + try: + directory_severity = ( + re.match( + r"^(H|M|High|Medium|GH|General-Health|GeneralHealth)-\d+$", + path, + re.IGNORECASE, + ) + .group(1) + .upper() + ) + except Exception: + pass + + if not directory_severity: + try: + directory_severity = ( + re.match( + r"^\d+-(H|M|High|Medium|GH|General-Health|GeneralHealth)$", + path, + re.IGNORECASE, + ) + .group(1) + .upper() + ) + except Exception: + pass + + if directory_severity: + severity = directory_severity + + dir_issues_ids = [] + parent = None + for index, file in enumerate(files): + print(f"[-] Reading file {file.name}") + last_file = index == len(files) - 1 + + file = ContentFileExtended.cast(file) + if "best" in file.name: + issue_id = int(file.name.replace("-best.md", "")) + parent = issue_id + else: + issue_id = int(file.name.replace(".md", "")) + + # We automatically set the parent in the following cases: + # 1. The family has only one issue and no report has been selected. + # We select the only issue available as the report. + # 2. The family is an invalid family (deduplicated inside the invalid folder) and no report is selected. + # We select the last processed issue in that family as the report. + if not parent and ( + len(files) == 1 + or ( + severity == "Invalid" + and path not in ["low", "false", "invalid"] + and last_file + ) + ): + print( + f"[!] Setting issue {issue_id} as the default parent of the current family /{path}" + ) + parent = issue_id + + body = file.decoded_content.decode("utf-8") + auditor = body.split("\n")[0] + issue_title = re.match(r"^(?:[#\s]+)(.*)$", body.split("\n")[4]).group(1) + title = f"{auditor} - {issue_title}" + + # Stop the script if an issue is found multiple times in the filesystem + if issue_id in issues.keys(): + raise Exception("Issue %s found multiple times." % issue_id) + + issues[issue_id] = { + "id": issue_id, + "parent": None, + "severity": severity, + "body": body, + "closed": closed, + "auditor": auditor, + "title": title, + "has_duplicates": False, + } + dir_issues_ids.append(issue_id) + + # Set the parent field for all duplicates in this directory + if parent is None and severity != "Invalid": + raise Exception("Family %s does not have a primary file (-best.md)." % path) + + if parent: + for issue_id in dir_issues_ids: + if issue_id != parent: + issues[parent]["has_duplicates"] = True + issues[issue_id]["parent"] = parent + issues[issue_id]["closed"] = True + + # Process any directories inside + for directory in dirs: + process_directory(repo, directory.path) + + +@lru_cache(maxsize=1024) +def get_github_issue(repo, issue_id): + print("Fetching issue #%s" % issue_id) + return IssueExtended.cast(repo.get_issue(issue_id)) + + +def main(): + global issues + global github + + repo = os.environ.get("GITHUB_REPOSITORY") + repo = RepositoryExtended.cast(github.get_repo(repo)) + + process_directory(repo, "") + # Sort them by ID so we match the order + # in which GitHub Issues created + issues = dict(sorted(issues.items(), key=lambda item: item[1]["id"])) + + # Ensure issue IDs are sequential + actual_issue_ids = list(issues.keys()) + expected_issue_ids = list(range(1, max(actual_issue_ids) + 1)) + missing_issue_ids = [x for x in expected_issue_ids if x not in actual_issue_ids] + assert actual_issue_ids == expected_issue_ids, ( + "Expected issues %s actual issues %s. Missing %s" + % ( + expected_issue_ids, + actual_issue_ids, + missing_issue_ids, + ) + ) + + # Sync issues + for issue_id, issue in issues.items(): + print("Issue #%s" % issue_id) + + issue_labels = [] + if issue["has_duplicates"]: + issue_labels.append("Has Duplicates") + elif issue["parent"]: + issue_labels.append("Duplicate") + + if not issue["closed"] or issue["parent"]: + if issue["severity"] in ["H", "High"]: + issue_labels.append("High") + elif issue["severity"] in ["M", "Medium"]: + issue_labels.append("Medium") + elif issue["severity"] in ["GH", "General-Health", "GeneralHealth"]: + issue_labels.append("General Health") + + if issue["closed"] and not issue["parent"]: + issue_labels.append("Excluded") + + # Try creating/updating the issue until a success path is hit + must_sleep = False + while True: + try: + # Fetch existing issue + gh_issue = get_github_issue(repo, issue_id) + + # We persist all labels except High/Medium/Has Duplicates/Duplicate + existing_labels = [x.name for x in gh_issue.labels] + new_labels = existing_labels.copy() + if "High" in existing_labels: + new_labels.remove("High") + if "Medium" in existing_labels: + new_labels.remove("Medium") + if "General Health" in existing_labels: + new_labels.remove("General Health") + if "Has Duplicates" in existing_labels: + new_labels.remove("Has Duplicates") + if "Duplicate" in existing_labels: + new_labels.remove("Duplicate") + if "Excluded" in existing_labels: + new_labels.remove("Excluded") + new_labels = issue_labels + new_labels + + must_update = False + if sorted(existing_labels) != sorted(new_labels): + must_update = True + print( + "\tLabels differ. Old: %s New: %s" + % (existing_labels, new_labels) + ) + + if gh_issue.title != issue["title"]: + must_update = True + print( + "\tTitles differ: Old: %s New: %s" + % (gh_issue.title, issue["title"]) + ) + + expected_body = ( + issue["body"] + if not issue["parent"] + else issue["body"] + f"\n\nDuplicate of #{issue['parent']}\n" + ) + if expected_body != gh_issue.body: + must_update = True + print("\tBodies differ. See the issue edit history for the diff.") + + if must_update: + print("\tIssue needs to be updated.") + gh_issue.edit( + title=issue["title"], + body=issue["body"], + state="closed" if issue["closed"] else "open", + labels=new_labels, + ) + # Exit the inifite loop and sleep + must_sleep = True + break + else: + print("\tIssue does not need to be updated.") + # Exit the infinite loop and don't sleep + # since we did not make any edits + break + except UnknownObjectException: + print("\tCreating issue") + # Create issue - 1 API call + gh_issue = repo.create_issue( + issue["title"], body=issue["body"], labels=issue_labels + ) + if issue["closed"]: + gh_issue.edit(state="closed") + + # Exit the infinite loop and sleep + must_sleep = True + break + + # Sleep between issues if any edits/creations have been made + if must_sleep: + print("\tSleeping for 1 second...") + time.sleep(1) + + print("Referencing parent issue from duplicate issues") + duplicate_issues = {k: v for k, v in issues.items() if v["parent"]} + # Set duplicate label + for issue_id, issue in duplicate_issues.items(): + # Try updating the issue until a success path is hit + must_sleep = False + while True: + try: + print( + "\tReferencing parent issue %s from duplicate issue %s." + % (issue["parent"], issue_id) + ) + + # Fetch existing issue + gh_issue = get_github_issue(repo, issue_id) + expected_body = issue["body"] + f"\n\nDuplicate of #{issue['parent']}\n" + + if expected_body != gh_issue.body: + gh_issue.edit( + body=issue["body"] + f"\n\nDuplicate of #{issue['parent']}\n", + ) + must_sleep = True + else: + print("\t\tIssue %s does not need to be updated." % issue_id) + + # Exit the inifinite loop + break + + except GithubException as e: + print(e) + + # Sleep for 5 minutes (in case secondary limits have been hit) + # Don't exit the inifite loop and try again + time.sleep(300) + + # Sleep between issue updates + if must_sleep: + print("\t\tSleeping for 1 second...") + time.sleep(1) + + +if __name__ == "__main__": + main() diff --git a/.data/validate_changes.py b/.data/validate_changes.py new file mode 100644 index 0000000..990d1b4 --- /dev/null +++ b/.data/validate_changes.py @@ -0,0 +1,70 @@ +import os + +exception_filenames = [".data", ".git", ".github", "README.md", "Audit_Report.pdf", "comments.csv", ".gitignore"] + +def main(): + added_files = os.environ.get("ADDED_FILES") + modified_files = os.environ.get("MODIFIED_FILES") + renamed_files = os.environ.get("RENAMED_FILES") + removed_files = os.environ.get("REMOVED_FILES") + + if added_files != "": + added_files = [ + x + for x in added_files.split(" ") + if not any( + y in x + for y in exception_filenames + ) + ] + else: + added_files = [] + + if modified_files != "": + modified_files = [ + x + for x in modified_files.split(" ") + if not any( + y in x + for y in exception_filenames + ) + ] + else: + modified_files = [] + + if renamed_files != "": + renamed_files = [ + x + for x in renamed_files.split(" ") + if not any( + y in x + for y in exception_filenames + ) + ] + else: + renamed_files = [] + + if removed_files != "": + removed_files = [ + x + for x in removed_files.split(" ") + if not any( + y in x + for y in exception_filenames + ) + ] + else: + removed_files = [] + + print("MODIFIED FILES") + print(modified_files) + + if len(modified_files) > 0: + print("❌ File contents should not be altered.") + exit(1) + + print("✅ File contents have not be altered.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.data/validate_filesystem.py b/.data/validate_filesystem.py new file mode 100644 index 0000000..22d6ddf --- /dev/null +++ b/.data/validate_filesystem.py @@ -0,0 +1,144 @@ +import os +import re +import csv + +total_issues = None +comment_filename = "comments.csv" +exception_filenames = [ + ".data", + ".git", + ".github", + "README.md", + "Audit_Report.pdf", + ".gitkeep", + ".gitignore", + comment_filename, +] + + +def consume_comment_file(): + with open(comment_filename) as f: + try: + reader = csv.DictReader(f) + except Exception: + return ["Unable to consume %s" % comment_filename] + + if not reader.fieldnames or reader.fieldnames != ["issue_number", "comment"]: + return ["Incorrect csv header, expected `issue_number,comment`"] + + errors = [] + for row in reader: + try: + issue_number = int(re.match(r"(\d+)", row["issue_number"]).group(0)) + except Exception: + errors.append("Unable to extract issue number from %s" % row) + continue + if issue_number < 1 or issue_number > total_issues: + errors.append("Issue %s should not be in csv" % issue_number) + + comment = row.get("comment") + if not comment or len(comment) == 0: + errors.append("Empty comment on issue %s in the csv" % issue_number) + return errors + + +def main(): + global total_issues + + try: + total_issues = int(os.environ.get("TOTAL_ISSUES")) + except: + print("TOTAL_ISSUES secret not set.") + return + + # Store all the errors found + errors = [] + # Store all the issues read + issues = [] + + def process_directory(path): + nonlocal issues + print("Directory %s" % path) + + # Get the items in the directory + items = [x for x in os.listdir(path) if x not in exception_filenames] + + directory_has_report = False + for item in items: + print("- Item %s" % item) + is_dir = os.path.isdir(os.path.join(path, item)) + + if is_dir: + matches = [ + r"^(H|M|High|Medium|GH|General-Health|GeneralHealth)-\d+$", + r"^\d+-(H|M|High|Medium|GH|General-Health|GeneralHealth)$", + r"^false$", + r"^invalid$", + ] + correctly_formatted = any( + re.match(pattern, item, re.IGNORECASE) for pattern in matches + ) + if ( + not any([x in path for x in ["invalid", "false"]]) + and not correctly_formatted + ): + errors.append("Directory %s is not formatted properly." % item) + else: + process_directory(os.path.join(path, item)) + else: + if not re.match(r"^\d+(-best)?.md$", item): + errors.append("File %s is not formatted properly." % item) + continue + + # Check if the file is the best report + if "-best" in item: + if not directory_has_report: + directory_has_report = True + else: + errors.append( + "Directory %s has multiple best reports marked." % path + ) + + # Extract issue number from the file name + issue_number = int(re.match(r"(\d+)", item).group(0)) + + # Check if the issue was already found + if issue_number in issues: + errors.append("Issue %s exists multiple times." % issue_number) + else: + issues.append(issue_number) + + if ( + path != "." + and not any(x in path for x in ["invalid", "false"]) + and not directory_has_report + and len(items) > 1 + ): + errors.append("Directory %s does not have a best report selected." % path) + + # Start processing from the root + process_directory(".") + + expected_issues = [x + 1 for x in range(total_issues)] + # Check if all issues are found in the repo + for x in expected_issues: + if x not in issues: + errors.append("Issue %s not found in the repo." % x) + # Check if there are no additional issues added + for x in issues: + if x not in expected_issues: + errors.append("Issue %s should not be in the repo." % x) + + if os.path.exists(comment_filename): + errors.extend(consume_comment_file()) + + if len(errors) > 0: + for error in errors: + print("❌ %s" % error) + exit(1) + + print("✅ Repo structure is valid.") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/sync-issues.yml b/.github/workflows/sync-issues.yml new file mode 100644 index 0000000..842a266 --- /dev/null +++ b/.github/workflows/sync-issues.yml @@ -0,0 +1,19 @@ +name: sync-files-to-issues +run-name: sync-files-to-issues +on: + workflow_dispatch: +permissions: write-all +jobs: + sync-issues: + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Setup Python dependencies + run: pip install PyGithub==1.55 + - name: Sync the issues + run: GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} GITHUB_REPOSITORY=$GITHUB_REPOSITORY GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER python .data/sync_issues.py diff --git a/.github/workflows/validate-judging-repo.yml b/.github/workflows/validate-judging-repo.yml new file mode 100644 index 0000000..c798384 --- /dev/null +++ b/.github/workflows/validate-judging-repo.yml @@ -0,0 +1,43 @@ +name: validate-judging-repo +run-name: validate-judging-repo +on: + push: + branches: + - "main" + workflow_dispatch: +jobs: + validate-changes: + if: "!contains(github.event.commits[0].message, 'Initial commit')" + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v3 + - uses: jitterbit/get-changed-files@v1 + id: changes + with: + format: space-delimited + token: ${{ secrets.GITHUB_TOKEN }} + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Validate changes + run: > + ADDED_FILES="${{ steps.changes.outputs.added }}" + MODIFIED_FILES="${{ steps.changes.outputs.modified }}" + RENAMED_FILES="${{ steps.changes.outputs.renamed }}" + REMOVED_FILES="${{ steps.changes.outputs.removed }}" + python .data/validate_changes.py + validate-filesystem: + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Validate filesystem structure + run: > + TOTAL_ISSUES="${{ secrets.TOTAL_ISSUES }}" + python .data/validate_filesystem.py \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3fbffbb --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +* +!*/ +!/.data +!/.github +!/.gitignore +!/README.md +!/comments.csv +!*.md +!**/*.md +!/Audit_Report.pdf diff --git a/comments.csv b/comments.csv new file mode 100644 index 0000000..6f5675e --- /dev/null +++ b/comments.csv @@ -0,0 +1 @@ +issue_number,comment \ No newline at end of file