From b46f90f421d42eec828489654e581377ae8bc771 Mon Sep 17 00:00:00 2001 From: Tim Kelly <1355145+austimkelly@users.noreply.github.com> Date: Tue, 21 Nov 2023 12:23:49 -0600 Subject: [PATCH] Develop (#6) * Bump requests from 2.26.0 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.26.0 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.26.0...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Create dependency-review.yml * make sure code scanning tools reported are unique * code cleanup * add in code alert counts. parse codeowners content if found * provide some output example. print out status and metrics at the end * update requirements.txt --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- README.md | 26 +++++++++++++++++++++++--- example/example_output.csv | 17 +++++++++++++++++ ghas-scan.py | 34 +++++++++++++++++++++++++++++++++- requirements.txt | 3 ++- 4 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 example/example_output.csv diff --git a/README.md b/README.md index 6f34e6a..bc1efea 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ This is a Python script that interacts with the GitHub API to fetch repository details and code scanning analysis information. Make sure the repository exists and your GitHub token has the necessary permissions to access it. + ## Prerequisites - Python 3.6 or higher @@ -37,14 +38,33 @@ Make sure the repository exists and your GitHub token has the necessary permissi 2. Open `ghas-scan.py` in your favorite text editor. 3. Replace `owner_type` variabe value with `user` or `org`. 4. Replace `owner_name` variable value with the corresponding user or org name. -5. Run the script: +5. Set `skip_forks` to `True` if you want to omit forked repos from the results. +6. Run the script: ```bash python3 ghas-scan.py ``` -### Output +### Output and Example + +Output is written to `github_data.csv` at the repository root. It looks something like this: + +``` +Getting list of repositories... +Fetching repo security configs... +CSV file 'github_data.csv' written successfully. +Total repositories: 16 +Total public repositories: 16 +Percent of repositories that are forked: 0.0% +Percent of repositories with Codeowners: 6.25% +Percent of repositories with Secrets Scanning Enabled: 12.5% +Percent of repositories with Secrets Push Protection Enabled: 12.5% +Total number of open critical and high code scanning alerts: 0 +Total number of open critical dependabot alerts: 0 +Done. +``` + -Output is written to `github_data.csv` at the repository root. +You can see an example in [./example/example_output.csv](./example/example_output.csv). This is just a basic example to give you an idea of the scehma. # References diff --git a/example/example_output.csv b/example/example_output.csv new file mode 100644 index 0000000..aae4764 --- /dev/null +++ b/example/example_output.csv @@ -0,0 +1,17 @@ +repo_name,codeowners,last_commit_date,first_commit_date,is_fork,is_private,is_archived,security_and_analysis_enabled,secret_scanning_enabled,secret_scanning_push_protection_enabled,code_scanners_enabled,code_scanning_critical_alert_count,code_scanning_high_alert_count,code_scanning_medium_alert_count,code_scanning_low_alert_count,code_scanning_warning_alert_count,code_scanning_note_alert_count,code_scanning_error_alert_count,dependabot_enabled,dependabot_open_alerts_count,num_critical_dep_alerts,num_high_dep_alerts,num_medium_dep_alerts,num_low_dep_alerts +bv-ios-sdk,Not found,2015-10-12T19:09:08Z,2015-09-25T18:58:29Z,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +DVWA,Not found,2023-11-08T15:53:38Z,2023-11-08T02:30:31Z,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,"PHP Security Analysis, Security Audit for Infrastructure, PHP Security Audit, CodeQL",0,0,0,0,0,2,28,FALSE,0,0,0,0,0 +flutter_hello_world,Not found,2022-10-11T02:10:31Z,2022-10-11T02:04:07Z,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +ghas-utils," + +* @austimkelly",2023-11-21T02:11:49Z,2023-11-18T16:48:53Z,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,CodeQL,0,0,0,0,0,0,0,TRUE,0,0,0,0,0 +jQuery-Autocomplete,Not found,2015-06-24T21:23:47Z,2015-06-24T21:15:43Z,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +MKNetworkKit,Not found,2014-05-26T19:38:46Z,2014-05-26T19:13:38Z,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +NodeGoat,Not found,2023-03-18T12:37:32Z,2023-04-12T14:45:28Z,TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +pp14parser,Not found,2014-07-14T14:05:07Z,2014-07-11T17:19:22Z,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +pr-practice,Not found,2022-12-02T19:23:47Z,2022-11-11T18:47:06Z,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +rekon,Not found,2023-11-18T17:30:43Z,2023-09-22T15:02:20Z,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,CodeQL,0,0,0,0,0,0,0,TRUE,1,0,0,1,0 +riverdata,Not found,2023-09-22T17:00:25Z,2022-09-20T01:31:34Z,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +riverdata-alrudiny,Not found,2023-11-06T19:03:50Z,2023-09-30T21:08:12Z,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +snyk-automatic-ticketing,Not found,2023-08-11T13:23:59Z,2023-08-10T15:15:54Z,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,None,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 +terragoat,Not found,2021-12-08T14:18:34Z,2021-12-08T14:17:19Z,TRUE,FALSE,FALSE,TRUE,FALSE,FALSE,tfsec,0,0,0,0,0,0,0,FALSE,0,0,0,0,0 \ No newline at end of file diff --git a/ghas-scan.py b/ghas-scan.py index feef941..7cc87bb 100644 --- a/ghas-scan.py +++ b/ghas-scan.py @@ -2,7 +2,7 @@ import requests import os import base64 -import re +import pandas as pd # Set the GitHub owner type, owner name, and personal access token owner_type = 'user' # Change to 'org' if needed @@ -16,6 +16,27 @@ # Set up headers with the access token headers = {'Authorization': f'token {access_token}'} +def print_aggregated_metrics_from_csv(csv_file): + df = pd.read_csv(csv_file) + + total_repos = len(df) + public_repos = len(df[df['is_private'] == False]) + forked_repos = len(df[df['is_fork'] == True]) + repos_with_codeowners = len(df[df['codeowners'].notna() & (df['codeowners'] != 'Not found') & (df['codeowners'] != '')]) + repos_with_secrets_scanning = len(df[df['secret_scanning_enabled'] == True]) + repos_with_secrets_push_protection = len(df[df['secret_scanning_push_protection_enabled'] == True]) + open_critical_high_alerts = df['code_scanning_critical_alert_count'].sum() + df['code_scanning_high_alert_count'].sum() + open_critical_dependabot_alerts = df['num_critical_dep_alerts'].sum() + + print(f"Total repositories: {total_repos}") + print(f"Total public repositories: {public_repos}") + print(f"Percent of repositories that are forked: {forked_repos / total_repos * 100}%") + print(f"Percent of repositories with Codeowners: {repos_with_codeowners / total_repos * 100}%") + print(f"Percent of repositories with Secrets Scanning Enabled: {repos_with_secrets_scanning / total_repos * 100}%") + print(f"Percent of repositories with Secrets Push Protection Enabled: {repos_with_secrets_push_protection / total_repos * 100}%") + print(f"Total number of open critical and high code scanning alerts: {open_critical_high_alerts}") + print(f"Total number of open critical dependabot alerts: {open_critical_dependabot_alerts}") + def get_dependabot_alerts(owner, repo_name, headers): dependabot_url = f'https://api.github.com/repos/{owner}/{repo_name}/dependabot/alerts' dependabot_alerts = requests.get(dependabot_url, headers=headers) @@ -263,6 +284,7 @@ def get_repos(owner, headers, owner_type): raise Exception(f"Failed to fetch repositories. Status code: {response.status_code}, Response: {response.text}") # Get list of repositories for the user or organization +print("Getting list of repositories...") repos = get_repos(owner_name, headers, owner_type) # Write data to CSV @@ -297,6 +319,16 @@ def get_repos(owner, headers, owner_type): writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() + print("Fetching repo security configs... (this may take a while))") for repo in repos: repo_details = get_repo_details(owner_name, repo['name'], headers) writer.writerow(repo_details) + + csvfile.close() + print(f"CSV file '{csv_filename}' written successfully.") + + with open(csv_filename, 'r') as csvfile: + print_aggregated_metrics_from_csv(csvfile) + csvfile.close() + + print("Done.") diff --git a/requirements.txt b/requirements.txt index 077c95d..1ef835a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -requests==2.31.0 \ No newline at end of file +requests==2.31.0 +pandas==1.3.3 \ No newline at end of file