From 89b1a2b65694b440435b476e56365e15feec77e1 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 9 Mar 2022 18:22:04 +0100 Subject: [PATCH 1/3] feat: #32 - backup of all projects for GitLab organisation (#51) Signed-off-by: Pierre-Yves Lapersonne --- .gitignore | 1 + CHANGELOG.md | 6 + README.md | 38 +++- toolbox/dry-run.sh | 26 +++ toolbox/github/configuration.rb | 4 +- toolbox/gitlab/GitLabWizard.sh | 140 +++++++++++++++ toolbox/gitlab/configuration.rb | 31 ++++ .../dump-git-repositories-from-gitlab.sh | 170 ++++++++++++++++++ 8 files changed, 413 insertions(+), 3 deletions(-) create mode 100755 toolbox/gitlab/GitLabWizard.sh create mode 100644 toolbox/gitlab/configuration.rb create mode 100755 toolbox/gitlab/utils/dump-git-repositories-from-gitlab.sh diff --git a/.gitignore b/.gitignore index 7407c3c..d76e3b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ toolbox/github/data toolbox/diver/data +toolbox/gitlab/data diff --git a/CHANGELOG.md b/CHANGELOG.md index d88a346..0cc3961 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # CHANGELOG +## Version 2.5.0 + +### Features + +- [#32](https://github.com/Orange-OpenSource/floss-toolbox/issues/32) GitLab Auto Backup + ## Version 2.4.0 ### Features diff --git a/README.md b/README.md index cafd33d..c1d8bf3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# floss-toolbox (version 2.4.0) +# floss-toolbox (version 2.5.0) Toolbox to help developers and open source referents to have cleaner projects in GitHub organizations. @@ -428,3 +428,39 @@ brew install gitleaks You need to define in the _configuration.rb_ files the Github organisation at **GITHUB_ORGANIZATION_NAME** and also your GitHub personal token at ** GITHUB_PERSONAL_ACCESS_TOKEN**. **You should also have your _git_ environment ready i.e. add your SSH private key if you clone by SSH for example. _gh_ must be installed, and _python3_ be ready. Obvisously _gitleaks_ must be installed** + +# Play with GitLab web API + +## Prerequisites + +- Ruby Gem: `git 1.8.1` +- Python3 + +- Create a [GitLab personal token](https://gitlab.com/-/profile/personal_access_tokens) and define it in the _configuration.rb_ file for the `GILAB_PERSONAL_ACCESS_TOKEN` variable. +- Define the GitLab organization id in the _configuration.rb_ file for the `GITLAB_ORGANIZATION_ID` variable. It will allow to send requests to query and modify your organization. + +## Prepare project + +```ruby +gem install git +``` + +## Features + +### Make a backup of organization repositories + +_Keywords: #organisation #GitLab #repositories #clones #dump_ + +This feature allows to clone all repositories of the defined GitLab organization (groups and subgroups incldued) and save them in a specific folder. + +Run the following command: +```shell +bash GitLabWizard.sh backup-all-repositories-from-org +``` + +This script will get configuation details picked from the Ruby configuration file; and triggers another Shell script to make a CURL request to the GitLab endpoint. A Python code will be called so as to extract repositories URLbefoire the cloning operation. + +You need to define in the _configuration.rb_ files the GitLab organisation ID at **GITLAB_ORGANIZATION_ID**. +You have to also define the location to store clones at **REPOSITORIES_CLONE_LOCATION_PATH** and the access token at **GILAB_PERSONAL_ACCESS_TOKEN**. + +**You should also have your _git_ environment ready, i.e. add your SSH private key if you clone by SSH for example.** diff --git a/toolbox/dry-run.sh b/toolbox/dry-run.sh index 067e427..33d087d 100755 --- a/toolbox/dry-run.sh +++ b/toolbox/dry-run.sh @@ -9,6 +9,7 @@ # Since...............: 08/03/2021 # Description.........: Make a dry-run of the project to check if everything is ready to use +# Version.............: 1.1.0 # Couts # ----- @@ -128,6 +129,23 @@ CheckIfFileExists "github/utils/GitHubWrapper.rb" CheckIfFileExists "github/utils/GitWrapper.rb" CheckIfFileExists "github/utils/IO.rb" +# GitLab features +# --------------- + +echo -e "\n----------------------------------" +echo "Assertions for the GITLAB features" +echo "----------------------------------" + +echo -e "\nCheck if main folder exists..." +CheckIfDirectoryExists "gitlab" +CheckIfDirectoryExists "gitlab/data" + +echo -e "\nCheck files..." +CheckIfFileExists "gitlab/configuration.rb" +CheckIfFileExists "gitlab/GitLabWizard.sh" +CheckIfFileExists "gitlab/utils/dump-git-repositories-from-gitlab.sh" +CheckIfFileExists "github/utils/extract-repos-field-from-json.py" # Stored in github folder but used by ump-git-repositories-from-gitlab.sh + # Runtimes and tools # ------------------ @@ -164,6 +182,7 @@ echo "Assertions for configuration file" echo "---------------------------------" echo -e "\nCheck for entries in configuration file..." + CheckIfConfigurationKeyDefined "github/configuration.rb" "GITHUB_PERSONAL_ACCESS_TOKEN" CheckIfConfigurationKeyDefined "github/configuration.rb" "GITHUB_ORGANIZATION_NAME" CheckIfConfigurationKeyDefined "github/configuration.rb" "GITHUB_ORGANIZATION_ADMINS" @@ -183,6 +202,13 @@ CheckIfConfigurationKeyDefined "github/configuration.rb" "FILENAME_PROJECTS_WITH CheckIfConfigurationKeyDefined "github/configuration.rb" "FILENAME_EMPTY_PROJECTS" CheckIfConfigurationKeyDefined "github/configuration.rb" "REPOSITORIES_CLONE_LOCATION_PATH" CheckIfConfigurationKeyDefined "github/configuration.rb" "REPOSITORIES_CLONE_URL_JSON_KEY" + +CheckIfConfigurationKeyDefined "gitlab/configuration.rb" "GILAB_PERSONAL_ACCESS_TOKEN" +CheckIfConfigurationKeyDefined "gitlab/configuration.rb" "GITLAB_ORGANIZATION_ID" +CheckIfConfigurationKeyDefined "gitlab/configuration.rb" "RESULTS_PER_PAGE" +CheckIfConfigurationKeyDefined "gitlab/configuration.rb" "REPOSITORIES_CLONE_LOCATION_PATH" +CheckIfConfigurationKeyDefined "gitlab/configuration.rb" "REPOSITORIES_CLONE_URL_JSON_KEY" + echo -e "🔎 I hope configuration entries are - well - defined, be sure of that" # Conclusion diff --git a/toolbox/github/configuration.rb b/toolbox/github/configuration.rb index 92086ea..491fedd 100644 --- a/toolbox/github/configuration.rb +++ b/toolbox/github/configuration.rb @@ -9,8 +9,8 @@ # Allow debug message or not $LOG_DEBUG = false -# API, tokens, GitHub organization -# -------------------------------- +# GitHub organization +# ------------------- # You have to create a personal access token on: https://github.com/settings/tokens $GITHUB_PERSONAL_ACCESS_TOKEN = "" diff --git a/toolbox/gitlab/GitLabWizard.sh b/toolbox/gitlab/GitLabWizard.sh new file mode 100755 index 0000000..83817b9 --- /dev/null +++ b/toolbox/gitlab/GitLabWizard.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# Software Name: floss-toolbox +# SPDX-FileCopyrightText: Copyright (c) 2021 Orange +# SPDX-License-Identifier: Apache-2.0 +# +# This software is distributed under the Apache 2.0 license. +# +# Author: Pierre-Yves LAPERSONNE et al. + +# Since...............: 09/03/2022 +# Description.........: Received from arguments a feature to launch. + +#set -euxo pipefail +VERSION="1.0.0" + +# Common files +# ------------ + +RUBY_CONFIGURATION_FILE="./configuration.rb" +SHELL_REPOSITORIES_DUMPER="./utils/dump-git-repositories-from-gitlab.sh" + +# Exit codes +# ---------- + +EXIT_OK=0 +EXIT_BAD_ARGUMENTS=1 +EXIT_NO_FEATURE=2 +EXIT_UNKNOWN_FEATURE=3 +EXIT_BAD_SETUP=100 + +# Functions +# --------- + +UsageAndExit(){ + echo "GitLabWizard.sh - Version $VERSION" + echo "USAGE:" + echo "bash GitLabWizard.sh feature-to-launch" + echo "with feature-to-launch:" + echo -e "\t backup-all-repositories-from-org...............: Dump all repositories in GitHub to a specific location in the disk" + echo "About exit codes:" + echo -e "\t 0................: Normal exit" + echo -e "\t 1................: Bad arguments given to the script" + echo -e "\t 2................: No defined feature in argument" + echo -e "\t 3................: Feature not recognized" + echo -e "\t 100..............: Bad prerequisites to run this script" + exit $EXIT_OK +} + +# Check arguments +# --------------- + +if [ "$#" -eq 0 ]; then + UsageAndExit + exit $EXIT_OK +fi + +if [ "$#" -ne 1 ]; then + echo "ERROR: Bad arguments. Exit now" + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +feature_to_run=$1 + +if [ -z "$feature_to_run" ]; then + echo "ERROR: No feature to run. Exit now." + UsageAndExit + exit $EXIT_NO_FEATURE +fi + +if [ $feature_to_run != "backup-all-repositories-from-org" ]; then + echo "ERROR: '$feature_to_run' is unknown feature. Exit now" + UsageAndExit + exit $EXIT_UNKNOWN_FEATURE +fi + +# Run toolbox for features +# ------------------------ + +echo "----------------------------------" +echo "GitLabWizard.sh - Version $VERSION" +echo "----------------------------------" + +# Common prerequisites + +if [ ! -f "$RUBY_CONFIGURATION_FILE" ]; then + echo "ERROR: RUBY_CONFIGURATION_FILE does not exist. Exits now." + exit $EXIT_BAD_SETUP +fi + +if [ $feature_to_run == "backup-all-repositories-from-org" ]; then + + if [ ! -f "$SHELL_REPOSITORIES_DUMPER" ]; then + echo "ERROR: SHELL_REPOSITORIES_DUMPER does not exist. Exits now." + exit $EXIT_BAD_SETUP + fi + + GITLAB_ORGANIZATION_ID=`cat $RUBY_CONFIGURATION_FILE | grep GITLAB_ORGANIZATION_ID | cut -d= -f2 | tr -d '"'` + if [ -z "$GITLAB_ORGANIZATION_ID" ]; then + echo "ERROR: Cannot define value for GITLAB_ORGANIZATION_ID from RUBY_CONFIGURATION_FILE. Exits now." + exit $EXIT_BAD_SETUP + fi + + GILAB_PERSONAL_ACCESS_TOKEN=`cat $RUBY_CONFIGURATION_FILE | grep GILAB_PERSONAL_ACCESS_TOKEN | cut -d= -f2 | tr -d '"'` + if [ -z "$GILAB_PERSONAL_ACCESS_TOKEN" ]; then + echo "ERROR: Cannot define value for GILAB_PERSONAL_ACCESS_TOKEN from RUBY_CONFIGURATION_FILE. Exits now." + exit $EXIT_BAD_SETUP + fi + + RESULTS_PER_PAGE=`cat $RUBY_CONFIGURATION_FILE | grep RESULTS_PER_PAGE | cut -d= -f2 | tr -d '"'` + if [ -z "$RESULTS_PER_PAGE" ]; then + echo "ERROR: Cannot define value for RESULTS_PER_PAGE from RUBY_CONFIGURATION_FILE. Exits now." + exit $EXIT_BAD_SETUP + fi + + CLONING_URL_JSON_KEY=`cat $RUBY_CONFIGURATION_FILE | grep REPOSITORIES_CLONE_URL_JSON_KEY | cut -d= -f2 | tr -d '"'` + if [ -z "$CLONING_URL_JSON_KEY" ]; then + echo "ERROR: Cannot define value for CLONING_URL_JSON_KEY from RUBY_CONFIGURATION_FILE. Exits now." + exit $EXIT_BAD_SETUP + fi + + REPOSITORIES_CLONE_LOCATION_PATH=`cat $RUBY_CONFIGURATION_FILE | grep REPOSITORIES_CLONE_LOCATION_PATH | cut -d= -f2 | tr -d '"'` + if [ -z "$REPOSITORIES_CLONE_LOCATION_PATH" ]; then + echo "ERROR: Cannot define value for REPOSITORIES_CLONE_LOCATION_PATH from RUBY_CONFIGURATION_FILE. Exits now." + exit $EXIT_BAD_SETUP + fi + + echo "Start Shell script ($SHELL_REPOSITORIES_DUMPER) for feature to dump repositories of '$GITLAB_ORGANIZATION_ID' to '$REPOSITORIES_CLONE_LOCATION_PATH'" + start_time_seconds=`date +%s` + ./$SHELL_REPOSITORIES_DUMPER $CLONING_URL_JSON_KEY $GITLAB_ORGANIZATION_ID $RESULTS_PER_PAGE $REPOSITORIES_CLONE_LOCATION_PATH $GILAB_PERSONAL_ACCESS_TOKEN +fi + +# Stats & bye + +return_status=$? +end_time_seconds=`date +%s` +elapsed_time_seconds=`expr $end_time_seconds - $start_time_seconds` +echo "Elapsed time: $elapsed_time_seconds seconds" + +exit $return_status \ No newline at end of file diff --git a/toolbox/gitlab/configuration.rb b/toolbox/gitlab/configuration.rb new file mode 100644 index 0000000..f1b4ddf --- /dev/null +++ b/toolbox/gitlab/configuration.rb @@ -0,0 +1,31 @@ +# Software Name: floss-toolbox +# SPDX-FileCopyrightText: Copyright (c) 2021 Orange +# SPDX-License-Identifier: Apache-2.0 +# +# This software is distributed under the Apache 2.0 license. +# +# Author: Pierre-Yves LAPERSONNE et al. + +# GitLab organization +# ------------------- + +# You have to create a personal access token on: https://gitlab.com/-/profile/personal_access_tokens +$GILAB_PERSONAL_ACCESS_TOKEN = "" + +# ID of the GitLab group, i.e. the organisation ID you want to deal with so as to request the GitLab API +$GITLAB_ORGANIZATION_ID = "" + +# Requests +# -------- + +# Results returned in one page (GitLab pagination, max 100) +$RESULTS_PER_PAGE = 100 + +# Repositories +# ------------ + +# Location to store all clones of repositories (absolute paths, no interpretation with ~ etc) +$REPOSITORIES_CLONE_LOCATION_PATH = "" + +# Field for URL to use for repositories cloning (within http_url_to_repo for HTTP and ssh_url_to_repo for SSH) +$REPOSITORIES_CLONE_URL_JSON_KEY = "ssh_url_to_repo" \ No newline at end of file diff --git a/toolbox/gitlab/utils/dump-git-repositories-from-gitlab.sh b/toolbox/gitlab/utils/dump-git-repositories-from-gitlab.sh new file mode 100755 index 0000000..5c03226 --- /dev/null +++ b/toolbox/gitlab/utils/dump-git-repositories-from-gitlab.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# Software Name: floss-toolbox +# SPDX-FileCopyrightText: Copyright (c) 2021 Orange +# SPDX-License-Identifier: Apache-2.0 +# +# This software is distributed under the Apache 2.0 license. +# +# Author: Pierre-Yves LAPERSONNE et al. + +# Since...............: 09/03/2022 +# Description.........: Received from arguments a GitLab organisation name ID a location path to dump all repositories from this GitLab organisation to that path. + +#set -euxo pipefail +VERSION="1.0.0" + +# Config +# ------ + +EXIT_OK=0 +EXIT_BAD_ARGUMENTS=1 +EXIT_BAD_SETUP=2 + +URL_EXTRACTER_FILE="./../github/utils/extract-repos-field-from-json.py" # TODO: Extract this Python sript to common files + +# Functions +# --------- + +UsageAndExit(){ + echo "dump-git-repositories-from-gitlab.sh - Version $VERSION" + echo "USAGE:" + echo "bash dump-git-repositories-from-gitlab.sh KEY ORGANISATION DESTINATION" + echo "with KEY: JSON key to ge cloning URL" + echo "with ORGANISATION: GitLab organisation ID" + echo "with PAGINATE: Number of items by page for requests" + echo "with DESTINATION: Destination to download the clones of the ORGANISATION repositories" + echo "with TOKEN: GitLab access token to make the request" + echo "About exit codes:" + echo -e "\t 0................: Normal exit" + echo -e "\t 1................: Bad arguments given to the script" + echo -e "\t 2................: File URL_EXTRACTER_FILE is not defined. Impossible to extract URL from API results." + exit $EXIT_OK +} + +# Check setup +# ----------- + +if [ "$#" -eq 0 ]; then + UsageAndExit + exit $EXIT_OK +fi + +if [ "$#" -ne 5 ]; then + echo "ERROR: Bad arguments number. Exits now" + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +if [ ! -f "$URL_EXTRACTER_FILE" ]; then + echo "ERROR: Bad set up fr URL extracter. Exits now" + UsageAndExit + exit $EXIT_BAD_SETUP +fi + +cloning_url_key=$1 +if [ -z "$cloning_url_key" ]; then + echo "ERROR: No JSON key for URL. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +organisation_id=$2 +if [ -z "$organisation_id" ]; then + echo "ERROR: No organisation ID defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +pagination=$3 +if [ -z "$pagination" ]; then + echo "ERROR: No pagination defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +repositories_location=$4 +if [ -z "$repositories_location" ]; then + echo "ERROR: No location for clones is defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +access_token=$5 +if [ -z "$access_token" ]; then + echo "ERROR: No access token is defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +# Run +# --- + +echo "-------------------------------------------------------" +echo "dump-git-repositories-from-gitlab.sh - Version $VERSION" +echo "-------------------------------------------------------" + +# Step 1 - Get all groups and subgroups projects + +max_number_of_pages=10 # TODO: Remove magic number for max number of pages +echo "Get all projects of groups and subgroups with $pagination items per page and arbitrary $max_number_of_pages pages max..." + +gitlab_projects_dump_file_raw="./data/.gitlab-projects-dump.raw.json" +gitlab_projects_dump_file_clean="./data/.gitlab-projects-dump.clean.json" +if [ -f "$gitlab_projects_dump_file_raw" ]; then + rm $gitlab_projects_dump_file_raw +fi + +for page in `seq 1 $max_number_of_pages` +do + curl --header "Authorization: Bearer $access_token" --location --request GET "https://gitlab.com/api/v4/groups/$organisation_id/projects?include_subgroups=true&per_page=$pagination&page=$page" >> $gitlab_projects_dump_file_raw +done + +# Step 2 - Extract repositories URL + +# Because of pagination (max 100 items par ages, arbitrary 10 pages here, raw pages are concatenated in one file. +# So with have pasted JSON array in one file. +# We see arrays with pattern ][. Merge all arrays be replacing cumulated JSON arrays, so replacing ][ by , +# By for empty pages we have the empty arrays ][ replaced by cumulated , so with remove them. +# Then it remains the final array with a useless , with pattern },] replaced by }] +cat $gitlab_projects_dump_file_raw | sed -e "s/\]\[/,/g" | tr -s ',' | sed -e "s/\}\,\]/\}\]/g" > $gitlab_projects_dump_file_clean + +url_for_cloning="./data/.url-for-cloning.txt" +echo "Extract cloning from results (using '$cloning_url_key' as JSON key)..." +python3 "$URL_EXTRACTER_FILE" --field $cloning_url_key --source $gitlab_projects_dump_file_clean > $url_for_cloning +repo_count=`cat $url_for_cloning | wc -l | sed 's/ //g'` +echo "Extraction done. Found '$repo_count' items." + +# Step 3 - Clone repositories + +dir_before_dump=`pwd` +echo "Creating dump directory..." +directory_name=$(date '+%Y-%m-%d') +cd "$repositories_location" +if [ -d "$directory_name" ]; then + echo "Removing old directory with the same name" + rm -rf $directory_name +fi +mkdir $directory_name +cd $directory_name +echo "Dump directory created with name '$directory_name' at location `pwd`." + +# Step 4 - For each repository, clone it + +number_of_url=`cat "$dir_before_dump/$url_for_cloning" | wc | awk {'print $1 '}` +cpt=1 +echo "Dumping of $number_of_url repositories..." +while read url_line; do + echo "Cloning ($cpt / $number_of_url) '$url_line'..." + git clone "$url_line" + cpt=$((cpt+1)) +done < "$dir_before_dump/$url_for_cloning" + +echo "Dump done!" + +# Step 5 - Clean up + +cd $dir_before_dump + +rm $gitlab_projects_dump_file_raw +rm $gitlab_projects_dump_file_clean +rm $url_for_cloning \ No newline at end of file From 2d20ed67ab8ab86bb167f15c5ce6b2d655965a7b Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 9 Mar 2022 18:36:27 +0100 Subject: [PATCH 2/3] fix: #52 - manage failure of git log command if no commits in repo (#53) Signed-off-by: Pierre-Yves Lapersonne --- CHANGELOG.md | 4 ++++ toolbox/diver/extract-emails-from-history.sh | 10 ++++++++-- toolbox/diver/find-contributors-in-git-logs.sh | 9 +++++++-- .../diver/find-missing-developers-in-git-commits.sh | 9 +++++++-- toolbox/diver/list-contributors-in-history.sh | 9 +++++++-- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cc3961..7c6eb34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - [#32](https://github.com/Orange-OpenSource/floss-toolbox/issues/32) GitLab Auto Backup +### Bugs + +- [#52](https://github.com/Orange-OpenSource/floss-toolbox/issues/52) Failure of git log if no commits + ## Version 2.4.0 ### Features diff --git a/toolbox/diver/extract-emails-from-history.sh b/toolbox/diver/extract-emails-from-history.sh index ee28674..5c085ac 100644 --- a/toolbox/diver/extract-emails-from-history.sh +++ b/toolbox/diver/extract-emails-from-history.sh @@ -7,7 +7,7 @@ # # Author: Pierre-Yves LAPERSONNE et al. -# Version.............: 1.0.1 +# Version.............: 1.0.2 # Since...............: 06/10/2021 # Description.........: Using the Git history, provide a list of contributors # @@ -22,7 +22,7 @@ set -euo pipefail -VERSION="1.0.1" +VERSION="1.0.2" SCRIPT_NAME="extract-emails-from-history" # ------------- @@ -216,6 +216,12 @@ fi touch "$git_log_file" +if [ "$( git log --oneline -5 2>/dev/null | wc -l )" -eq 0 ]; then + echo "Warning: Project '$git_based_project' is a git repository without any commit, that's weird" + CleanFiles + NormalExit +fi + git log --since=$git_log_limit > $git_log_file if [ ! -s "$git_log_file" ]; then diff --git a/toolbox/diver/find-contributors-in-git-logs.sh b/toolbox/diver/find-contributors-in-git-logs.sh index f4ef503..678ef8c 100755 --- a/toolbox/diver/find-contributors-in-git-logs.sh +++ b/toolbox/diver/find-contributors-in-git-logs.sh @@ -7,7 +7,7 @@ # # Author: Pierre-Yves LAPERSONNE et al. -# Version.............: 1.2.0 +# Version.............: 1.2.1 # Since...............: 11/05/2020 # Description.........: Looks for words (defined in dedicated file) in git logs # @@ -21,7 +21,7 @@ # -VERSION="1.2.0" +VERSION="1.2.1" SCRIPT_NAME="find-contributors-in-git-logs" # ------------- @@ -234,6 +234,11 @@ if [ -f $git_log_file ]; then rm $git_log_file fi +if [ "$( git log --oneline -5 2>/dev/null | wc -l )" -eq 0 ]; then + echo "Warning: Project '$git_based_project' is a git repository without any commit, that's weird" + NormalExit +fi + git log --since=$git_log_limit > $git_log_file if [ ! -s "$git_log_file" ]; then diff --git a/toolbox/diver/find-missing-developers-in-git-commits.sh b/toolbox/diver/find-missing-developers-in-git-commits.sh index e34a486..b1cdb1a 100755 --- a/toolbox/diver/find-missing-developers-in-git-commits.sh +++ b/toolbox/diver/find-missing-developers-in-git-commits.sh @@ -7,7 +7,7 @@ # # Author: Pierre-Yves LAPERSONNE et al. -# Version.............: 1.1.0 +# Version.............: 1.1.1 # Since...............: 12/05/2020 # Description.........: Looks in git commits in the DCO has been used, i.e. if commits have been signed off. # Checks also if commits authors are defined. @@ -21,7 +21,7 @@ # 3 - problem with a command # -VERSION="1.1.0" +VERSION="1.1.1" SCRIPT_NAME="find-missing-developers-in-git-commits" # ------------- @@ -199,6 +199,11 @@ if [ -f $git_log_file ]; then rm $git_log_file fi +if [ "$( git log --oneline -5 2>/dev/null | wc -l )" -eq 0 ]; then + echo "Warning: Project '$git_based_project' is a git repository without any commit, that's weird" + NormalExit +fi + git log --since=$git_log_limit > $git_log_file if [ ! -s "$git_log_file" ]; then diff --git a/toolbox/diver/list-contributors-in-history.sh b/toolbox/diver/list-contributors-in-history.sh index 47ff169..c6a420c 100755 --- a/toolbox/diver/list-contributors-in-history.sh +++ b/toolbox/diver/list-contributors-in-history.sh @@ -7,7 +7,7 @@ # # Author: Pierre-Yves LAPERSONNE et al. -# Version.............: 1.0.0 +# Version.............: 1.0.2 # Since...............: 24/02/2022 # Description.........: Using the Git history, provide a list of contributors' email addresses # @@ -22,7 +22,7 @@ set -euo pipefail -VERSION="1.0.1" +VERSION="1.0.2" SCRIPT_NAME="list-contributors-in-history" # ------------- @@ -197,6 +197,11 @@ fi touch "$git_log_file" +if [ "$( git log --oneline -5 2>/dev/null | wc -l )" -eq 0 ]; then + echo "Warning: Project '$git_based_project' is a git repository without any commit, that's weird" + NormalExit +fi + git log --since=$git_log_limit > $git_log_file if [ ! -s "$git_log_file" ]; then From 3acc76022c070d8c04aa068174b38f79381c2981 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 9 Mar 2022 19:55:58 +0100 Subject: [PATCH 3/3] [#49] [Feature] Use of GitLeaks for GitLab projects (#56) * refactor: GitHub - preconditions Signed-off-by: Pierre-Yves Lapersonne * feat: #49 - look for leaks with GitLeaks in GitLab projects Signed-off-by: Pierre-Yves Lapersonne --- CHANGELOG.md | 3 +- README.md | 29 ++- toolbox/dry-run.sh | 3 +- .../github/utils/check-leaks-from-github.sh | 6 +- toolbox/gitlab/GitLabWizard.sh | 19 +- .../gitlab/utils/check-leaks-from-gitlab.sh | 217 ++++++++++++++++++ 6 files changed, 267 insertions(+), 10 deletions(-) create mode 100755 toolbox/gitlab/utils/check-leaks-from-gitlab.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c6eb34..c7590cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Features - [#32](https://github.com/Orange-OpenSource/floss-toolbox/issues/32) GitLab Auto Backup +- [#49](https://github.com/Orange-OpenSource/floss-toolbox/issues/49) Look for leaks (GitLab) ### Bugs @@ -14,7 +15,7 @@ ### Features -- [#44](https://github.com/Orange-OpenSource/floss-toolbox/issues/44) Look for leaks +- [#44](https://github.com/Orange-OpenSource/floss-toolbox/issues/44) Look for leaks (GitHub) - [#29](https://github.com/Orange-OpenSource/floss-toolbox/issues/29) Dry run ### Refactoring diff --git a/README.md b/README.md index c1d8bf3..6f44ae4 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,7 @@ brew install gitleaks You need to define in the _configuration.rb_ files the Github organisation at **GITHUB_ORGANIZATION_NAME** and also your GitHub personal token at ** GITHUB_PERSONAL_ACCESS_TOKEN**. -**You should also have your _git_ environment ready i.e. add your SSH private key if you clone by SSH for example. _gh_ must be installed, and _python3_ be ready. Obvisously _gitleaks_ must be installed** +**You should also have your _git_ environment ready i.e. add your SSH private key if you clone by SSH for example. _gh_ must be installed, and _python3_ be ready. Obviously _gitleaks_ must be installed** # Play with GitLab web API @@ -464,3 +464,30 @@ You need to define in the _configuration.rb_ files the GitLab organisation ID at You have to also define the location to store clones at **REPOSITORIES_CLONE_LOCATION_PATH** and the access token at **GILAB_PERSONAL_ACCESS_TOKEN**. **You should also have your _git_ environment ready, i.e. add your SSH private key if you clone by SSH for example.** + +### Check if there are leaks in organisation repositories (using gitleaks) + +_Keywords: #organisation #GitLab #repositories #leaks #gitleaks_ + +**Warning: This operation can take long time because of both Git histories and file trees parsing** + +This feature allows to check in all repositories of the GitHub organisation if there are leaks using the _gitleaks_ tool. + +Run the following command: +```shell +bash GitLabWizard.sh look-for-leaks +``` + +This script needs a GitLab personal access otken to make requests to GitLab API and also the GitLab group ID to use to get projects under it. +The wizard Shell script will pick configuration details from the Ruby configuration file ; and triggers another Shell script for the data process. A Python code will be called too to process JSON sent by GItLab API.. + +The [gitleaks](https://github.com/zricethezav/gitleaks) tool will be used to look inside the repository. To install it: + +```shell +brew install gitleaks +``` + +You need to define in the _configuration.rb_ files the GitLab organisation ID at **GITLAB_ORGANIZATION_ID**. +You have to also define the location to store clones at **REPOSITORIES_CLONE_LOCATION_PATH** and the access token at **GILAB_PERSONAL_ACCESS_TOKEN**. + +**You should also have your _git_ environment ready i.e. add your SSH private key if you clone by SSH for example. _gh_ must be installed, and _python3_ be ready. Obviously _gitleaks_ must be installed** \ No newline at end of file diff --git a/toolbox/dry-run.sh b/toolbox/dry-run.sh index 33d087d..851218f 100755 --- a/toolbox/dry-run.sh +++ b/toolbox/dry-run.sh @@ -144,7 +144,8 @@ echo -e "\nCheck files..." CheckIfFileExists "gitlab/configuration.rb" CheckIfFileExists "gitlab/GitLabWizard.sh" CheckIfFileExists "gitlab/utils/dump-git-repositories-from-gitlab.sh" -CheckIfFileExists "github/utils/extract-repos-field-from-json.py" # Stored in github folder but used by ump-git-repositories-from-gitlab.sh +CheckIfFileExists "github/utils/extract-repos-field-from-json.py" # Stored in github folder but used by dump-git-repositories-from-gitlab.sh +CheckIfFileExists "github/utils/count-leaks-nodes.py" # Stored in github folder but used by check-leaks-from-gitlab.sh # Runtimes and tools # ------------------ diff --git a/toolbox/github/utils/check-leaks-from-github.sh b/toolbox/github/utils/check-leaks-from-github.sh index 70d0569..9b6ad20 100755 --- a/toolbox/github/utils/check-leaks-from-github.sh +++ b/toolbox/github/utils/check-leaks-from-github.sh @@ -75,14 +75,14 @@ if [ -z "$organisation_name" -o "$organisation_name" == "" ]; then fi cloning_url_key=$2 -if [ -z "$cloning_url_key" -o "$organisation_name" == "" ]; then +if [ -z "$cloning_url_key" -o "$cloning_url_key" == "" ]; then echo "ERROR: No JSON key for URL. Exits now." UsageAndExit exit $EXIT_BAD_ARGUMENTS fi dump_folder_name=$3 -if [ -z "$dump_folder_name" -o "$organisation_name" == "" ]; then +if [ -z "$dump_folder_name" -o "$dump_folder_name" == "" ]; then echo "ERROR: No dump folder name defined. Exits now." UsageAndExit exit $EXIT_BAD_ARGUMENTS @@ -195,7 +195,7 @@ while read url_line; do done < "$dir_before_dump/$url_for_cloning" -echo "Looking done!" +echo "Scanning done!" # Step 6 - Clean up diff --git a/toolbox/gitlab/GitLabWizard.sh b/toolbox/gitlab/GitLabWizard.sh index 83817b9..4acbdb1 100755 --- a/toolbox/gitlab/GitLabWizard.sh +++ b/toolbox/gitlab/GitLabWizard.sh @@ -18,6 +18,7 @@ VERSION="1.0.0" RUBY_CONFIGURATION_FILE="./configuration.rb" SHELL_REPOSITORIES_DUMPER="./utils/dump-git-repositories-from-gitlab.sh" +SHELL_REPOSITORIES_LEAKS_SCANNER="./utils/check-leaks-from-gitlab.sh" # Exit codes # ---------- @@ -37,6 +38,7 @@ UsageAndExit(){ echo "bash GitLabWizard.sh feature-to-launch" echo "with feature-to-launch:" echo -e "\t backup-all-repositories-from-org...............: Dump all repositories in GitHub to a specific location in the disk" + echo -e "\t look-for-leaks.................................: Checks with gitleaks if there are leaks in all repositories" echo "About exit codes:" echo -e "\t 0................: Normal exit" echo -e "\t 1................: Bad arguments given to the script" @@ -68,7 +70,7 @@ if [ -z "$feature_to_run" ]; then exit $EXIT_NO_FEATURE fi -if [ $feature_to_run != "backup-all-repositories-from-org" ]; then +if [ $feature_to_run != "backup-all-repositories-from-org" -a $feature_to_run != "look-for-leaks" ]; then echo "ERROR: '$feature_to_run' is unknown feature. Exit now" UsageAndExit exit $EXIT_UNKNOWN_FEATURE @@ -88,7 +90,8 @@ if [ ! -f "$RUBY_CONFIGURATION_FILE" ]; then exit $EXIT_BAD_SETUP fi -if [ $feature_to_run == "backup-all-repositories-from-org" ]; then +# Features: backup-all-repositories-from-org, look-for-leaks +if [ $feature_to_run == "backup-all-repositories-from-org" -o $feature_to_run == "look-for-leaks" ]; then if [ ! -f "$SHELL_REPOSITORIES_DUMPER" ]; then echo "ERROR: SHELL_REPOSITORIES_DUMPER does not exist. Exits now." @@ -125,9 +128,17 @@ if [ $feature_to_run == "backup-all-repositories-from-org" ]; then exit $EXIT_BAD_SETUP fi - echo "Start Shell script ($SHELL_REPOSITORIES_DUMPER) for feature to dump repositories of '$GITLAB_ORGANIZATION_ID' to '$REPOSITORIES_CLONE_LOCATION_PATH'" start_time_seconds=`date +%s` - ./$SHELL_REPOSITORIES_DUMPER $CLONING_URL_JSON_KEY $GITLAB_ORGANIZATION_ID $RESULTS_PER_PAGE $REPOSITORIES_CLONE_LOCATION_PATH $GILAB_PERSONAL_ACCESS_TOKEN + + if [ $feature_to_run == "backup-all-repositories-from-org" ]; then + echo "Start Shell script ($SHELL_REPOSITORIES_DUMPER) for feature to dump repositories of '$GITLAB_ORGANIZATION_ID' to '$REPOSITORIES_CLONE_LOCATION_PATH'" + ./$SHELL_REPOSITORIES_DUMPER $CLONING_URL_JSON_KEY $GITLAB_ORGANIZATION_ID $RESULTS_PER_PAGE $REPOSITORIES_CLONE_LOCATION_PATH $GILAB_PERSONAL_ACCESS_TOKEN + fi + + if [ $feature_to_run == "look-for-leaks" ]; then + echo "Start Shell script ($SHELL_REPOSITORIES_LEAKS_SCANNER) to look for leaks in repositories of '$GITLAB_ORGANIZATION_ID'" + ./$SHELL_REPOSITORIES_LEAKS_SCANNER $GITLAB_ORGANIZATION_ID $CLONING_URL_JSON_KEY $RESULTS_PER_PAGE $GILAB_PERSONAL_ACCESS_TOKEN + fi fi # Stats & bye diff --git a/toolbox/gitlab/utils/check-leaks-from-gitlab.sh b/toolbox/gitlab/utils/check-leaks-from-gitlab.sh new file mode 100755 index 0000000..1c92c70 --- /dev/null +++ b/toolbox/gitlab/utils/check-leaks-from-gitlab.sh @@ -0,0 +1,217 @@ +#!/bin/bash +# Software Name: floss-toolbox +# SPDX-FileCopyrightText: Copyright (c) 2021-2022 Orange +# SPDX-License-Identifier: Apache-2.0 +# +# This software is distributed under the Apache 2.0 license. +# +# Author: Pierre-Yves LAPERSONNE et al. + +# Since...............: 09/03/2022 +# Description.........: Check if there are leaks thanks to gitleaks in GitLab projects + +#set -euxo pipefail +VERSION="1.0.0" + +# Config +# ------ + +EXIT_OK=0 +EXIT_BAD_ARGUMENTS=1 +EXIT_BAD_SETUP=2 + +URL_EXTRACTER_FILE="./../github/utils/extract-repos-field-from-json.py" # TODO: Extract this Python sript to common files +LEAKS_PARSER="./../github/utils/count-leaks-nodes.py" # TODO: Extract this Python sript to common files +GITLEAKS_FINAL_REPORT="$$_gitleaks-final_report-count.csv" + +# Functions +# --------- + +UsageAndExit(){ + echo "check-leaks-from-gitlab.sh - Version $VERSION" + echo "USAGE:" + echo "bash check-leaks-from-gitlab.sh ORGANISATION_ID KEY TOKEN FOLDER_NAME PAGINATION TOKEN" + echo "with ORGANISATION_ID: GitLab organisation ID" + echo "with KEY: JSON key to use for cloning URL" + echo "with PAGINATION: number if items per page" + echo "with TOKEN: GitLab access token" + echo "About exit codes:" + echo -e "\t 0................: Normal exit" + echo -e "\t 1................: Bad arguments given to the script" + echo -e "\t 2................: Bad setup for the script or undefined LEAKS_PARSER file" + exit $EXIT_OK +} + +# Check setup +# ----------- + +if [ "$#" -eq 0 ]; then + UsageAndExit + exit $EXIT_OK +fi + +if [ "$#" -ne 4 ]; then + echo "ERROR: Bad arguments number. Exits now" + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +if [ ! -f "$URL_EXTRACTER_FILE" ]; then + echo "ERROR: Bad set up for URL extracter. Exits now" + UsageAndExit + exit $EXIT_BAD_SETUP +fi + +if [ ! -f "$LEAKS_PARSER" ]; then + echo "ERROR: Bad set up for leaks parser. Exits now" + UsageAndExit + exit $EXIT_BAD_SETUP +fi + +organisation_id=$1 +if [ -z "$organisation_id" -o "$organisation_id" == "" ]; then + echo "ERROR: No organisation ID defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +cloning_url_key=$2 +if [ -z "$cloning_url_key" -o "$cloning_url_key" == "" ]; then + echo "ERROR: No JSON key for URL. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +pagination=$3 +if [ -z "$pagination" ]; then + echo "ERROR: No pagination defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +access_token=$4 +if [ -z "$access_token" ]; then + echo "ERROR: No access token is defined. Exits now." + UsageAndExit + exit $EXIT_BAD_ARGUMENTS +fi + +# Run +# --- + +echo "---------------------------------------------" +echo "check-leaks-from-gitlab.sh - Version $VERSION" +echo "---------------------------------------------" + +# Step 1 - Get all groups and subgroups projects + +max_number_of_pages=10 # TODO: Remove magic number for max number of pages +echo "Get all projects of groups and subgroups with $pagination items per page and arbitrary $max_number_of_pages pages max..." + +gitlab_projects_dump_file_raw="./data/.gitlab-projects-dump.raw.json" +gitlab_projects_dump_file_clean="./data/.gitlab-projects-dump.clean.json" +if [ -f "$gitlab_projects_dump_file_raw" ]; then + rm $gitlab_projects_dump_file_raw +fi + +for page in `seq 1 $max_number_of_pages` +do + curl --silent --header "Authorization: Bearer $access_token" --location --request GET "https://gitlab.com/api/v4/groups/$organisation_id/projects?include_subgroups=true&per_page=$pagination&page=$page" >> $gitlab_projects_dump_file_raw +done + +# Step 2 - Extract repositories URL + +# Because of pagination (max 100 items par ages, arbitrary 10 pages here, raw pages are concatenated in one file. +# So with have pasted JSON array in one file. +# We see arrays with pattern ][. Merge all arrays be replacing cumulated JSON arrays, so replacing ][ by , +# By for empty pages we have the empty arrays ][ replaced by cumulated , so with remove them. +# Then it remains the final array with a useless , with pattern },] replaced by }] +cat $gitlab_projects_dump_file_raw | sed -e "s/\]\[/,/g" | tr -s ',' | sed -e "s/\}\,\]/\}\]/g" > $gitlab_projects_dump_file_clean + +url_for_cloning="./data/.url-for-cloning.txt" +echo "Extract cloning from results (using '$cloning_url_key' as JSON key)..." +python3 "$URL_EXTRACTER_FILE" --field $cloning_url_key --source $gitlab_projects_dump_file_clean > $url_for_cloning +repo_count=`cat $url_for_cloning | wc -l | sed 's/ //g'` +echo "Extraction done. Found '$repo_count' items." + +# Step 3 - Clone repositories + +dir_before_dump=`pwd` +echo "Creating dump directory..." +directory_name=$(date '+%Y-%m-%d') +cd "$repositories_location" +if [ -d "$directory_name" ]; then + echo "Removing old directory with the same name" + rm -rf $directory_name +fi +mkdir $directory_name +cd $directory_name +echo "Dump directory created with name '$directory_name' at location `pwd`." + +# Step 4 - For each repository, clone it and make a scan + +number_of_url=`cat "$dir_before_dump/$url_for_cloning" | wc | awk {'print $1 '}` +cpt=1 +echo "Dumping of $number_of_url repositories..." +while read url_line; do + + # Step 4.1 - Clone + # WARNING: gitleaks looks inside files and git histories, so for old and big projects it will take too many time! + + echo "Cloning ($cpt / $number_of_url) '$url_line'..." + git clone "$url_line" + + # Step 4.2 - Extract new folder name + + target_folder_name=`basename -s .git $(echo "$url_line")` + echo "Cloned in folder '$target_folder_name'" + + # Step 5.3 - Look for leaks + + gitleaks_file_name="$target_folder_name".gitleaks.json + gitleaks detect --report-format json --report-path "$gitleaks_file_name" --source "$target_folder_name" || true # gitleaks returns 1 if leaks found + + # In JSON report, a project as no leak if the result file containsan empty JSON array, i.e. only the line + # [] + if [ -f "$gitleaks_file_name" ]; then + pwd + count=`python3 "../$LEAKS_PARSER" --file "$gitleaks_file_name"` + + if [ "$count" -eq "0" ]; then + echo "✅ ;$target_folder_name;$count" >> $GITLEAKS_FINAL_REPORT + echo "✅ Gitleaks did not find leaks for '$target_folder_name'" + cpt_clean_repo=$((cpt_clean_repo+1)) + else + echo "🚨;$target_folder_name;$count" >> $GITLEAKS_FINAL_REPORT + echo "🚨 WARNING! gitleaks may have found '$count' leaks for '$target_folder_name'" + cpt_dirty_repo=$((cpt_dirty_repo+1)) + fi + else + echo "💥 ERROR: The file '$gitleaks_file_name' does not exist, something has failed with gitleaks!" + fi + + rm -rf "$target_folder_name" + + cpt=$((cpt+1)) + +done < "$dir_before_dump/$url_for_cloning" + +echo "Scanning done!" + +# Step 6 - Clean up + +git config --global diff.renameLimit $previous_git_diff_rename_limit # (default seems to be 0) + +mv $GITLEAKS_FINAL_REPORT "$dir_before_dump" +echo "GitLab organisation ID...............: '$organisation_id'" +echo "Total number of projects.............: '$number_of_url'" +echo "Number of projects with alerts.......: '$cpt_dirty_repo'" +echo "Number of projects without alerts....: '$cpt_clean_repo'" +echo "Final report is......................: '$GITLEAKS_FINAL_REPORT'" + +rm -rf "$target_folder_name" +rm -rf "$dir_before_dump/$url_for_cloning" +cd "$dir_before_dump" +rm -f $url_for_cloning + +echo "Check done!"