Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to do bulk queries of ES logs #274

Merged
merged 4 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion get-pipeline-git-resources-without-verification.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if ! fly --target "${FLY_TARGET}" workers > /dev/null; then
fi

function find_git_resources_without_commit_verification {
fly -t ci get-pipeline --pipeline "$1" --json \
fly -t "${FLY_TARGET}" get-pipeline --pipeline "$1" --json \
| jq '.resources[] |
select(.type=="git") |
select(.source.uri | test("github.com.*(cloud-gov|18[Ff])")) |
Expand Down
2 changes: 1 addition & 1 deletion get-pipeline-git-resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ if ! fly --target "${FLY_TARGET}" workers > /dev/null; then
fi

function find_git_resources_for_branch {
fly -t ci get-pipeline --pipeline "$1" --json \
fly -t "${FLY_TARGET}" get-pipeline --pipeline "$1" --json \
| jq --arg repo "$REPO" --arg branch "$BRANCH" '.resources[] |
select(.type=="git") |
select(.source.uri | test("github.com.*(cloud-gov|18[Ff])")) |
Expand Down
49 changes: 49 additions & 0 deletions jumpbox-utils/download-file.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

function usage {
echo -e "
./$( basename "$0" ) <environment> <output-directory> <filename>

Downloads a file from a jumpbox

Optional environment variable \$CI_URL matching your Concourse URL.
example: CI_URL=https://ci.fr.cloud.gov ./$( basename "$0" )

\$CI_URL, Defaults to https://ci.fr.cloud.gov

Examples:
./$( basename "$0" ) production ~/Downloads test.csv
"
exit
}

if [ -z "$1" ]; then
echo "Environment (e.g. development, staging, production) required as first argument"
usage
fi

if [ -z "$2" ]; then
echo "Output directory for file required as second argument"
usage
fi

if [ -z "$3" ]; then
echo "Filename to download required as third argument"
usage
fi

ENVIRONMENT=$1
OUTPUT_DIR=$2
FILE=$3

CI_URL="${CI_URL:-"https://ci.fr.cloud.gov"}"
FLY_TARGET=$(fly targets | grep "${CI_URL}" | head -n 1 | awk '{print $1}')

JUMPBOX_CONTAINER="container-bosh-${ENVIRONMENT}"
BUILD_NUMBER=$(fly -t "$FLY_TARGET" containers | grep jumpbox | grep "$JUMPBOX_CONTAINER" | awk '{print $5}' | sort -nr | head -n 1)

echo "Attempting to download files from $JUMPBOX_CONTAINER, build ${BUILD_NUMBER}"

fly -t "$FLY_TARGET" i -j "jumpbox/$JUMPBOX_CONTAINER" -s jumpbox -b "$BUILD_NUMBER" -- cat "$FILE" > "$OUTPUT_DIR/$FILE"

echo "successfully downloaded $FILE"
78 changes: 78 additions & 0 deletions logs/get-csv-logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

function usage {
echo -e "
./$( basename "$0" ) <IP_OR_HOSTNAME> <SEARCH_JSON_FILE> <'field1,field2'>

Use the Elasticsearch/Opensearch scroll API to query logs for bulk export

Examples:
./$( basename "$0" ) 127.0.0.1 search.json '."@timestamp",.rtr.path,.rtr.status'
"
exit
}

if [ -z "$1" ]; then
echo "IP/hostname for Elasticsearch/Opensearch cluster is required as first argument"
usage
fi

if [ -z "$2" ]; then
echo "Filename containing search query JSON is required as second argument"
usage
fi

if [ -z "$3" ]; then
echo "String containing fields to extract into CSV is required as third argument"
usage
fi

# Use port 9200
es_url="$1:9200"
search_json_file="$2"
csv_fields="$3"

get_hits_count() {
echo "$1" | jq -r '.hits.hits | length'
}

get_scroll_id() {
echo "$1" | jq -r '._scroll_id'
}

write_csv_output() {
echo "$1" | jq ".hits.hits[]._source | [$csv_fields] | @csv" > "results-$2.csv"
}

response=$(curl -s -X POST "$es_url/_search?scroll=1m" \
-H 'content-type: application/json' \
-d "@$search_json_file")

hits_count=$(get_hits_count "$response")
hits_total=$(echo "$response" | jq -r '.hits.total.value')
hits_so_far=$hits_count
scroll_id=$(get_scroll_id "$response")
counter=1

echo "Got initial response with $hits_count hits out of $hits_total"

write_csv_output "$response" $counter

while [ "$hits_count" != "0" ]; do
counter=$((counter + 1))

response=$(curl -X POST -s -H 'content-type: application/json' "$es_url/_search/scroll" -d "{ \"scroll\": \"1m\", \"scroll_id\": \"$scroll_id\" }")

scroll_id=$(get_scroll_id "$response")
hits_count=$(get_hits_count "$response")
hits_so_far=$((hits_so_far + hits_count))

echo "Got response with $hits_count hits ($hits_so_far total hits so far out of $hits_total)"

if [ "$hits_count" != "0" ]; then
write_csv_output "$response" $counter
fi
done

echo Done!

Loading