From e51baf46f224dedcda8a1c4426d62557c55ea84d Mon Sep 17 00:00:00 2001 From: tuntoja Date: Thu, 31 Oct 2024 11:12:04 +0100 Subject: [PATCH 01/13] enh(ci): update pull and push jobs in workflows --- .github/workflows/centreon-collect.yml | 12 ++++++------ .github/workflows/docker-builder.yml | 4 ++-- .github/workflows/gorgone.yml | 8 ++++---- .github/workflows/libzmq.yml | 8 ++++---- .github/workflows/lua-curl.yml | 4 ++-- .github/workflows/package-collect.yml | 4 ++-- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/centreon-collect.yml b/.github/workflows/centreon-collect.yml index d59282d0fe9..2d671bb3b52 100644 --- a/.github/workflows/centreon-collect.yml +++ b/.github/workflows/centreon-collect.yml @@ -121,8 +121,8 @@ jobs: veracode_api_id: ${{ secrets.VERACODE_API_ID_COLL }} veracode_api_key: ${{ secrets.VERACODE_API_KEY_COLL }} veracode_srcclr_token: ${{ secrets.VERACODE_SRCCLR_TOKEN }} - docker_registry_id: ${{ secrets.DOCKER_REGISTRY_ID }} - docker_registry_passwd: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + docker_registry_id: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + docker_registry_passwd: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} unit-test: needs: [get-version] @@ -146,8 +146,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/centreon-collect-${{ matrix.distrib }}:${{ needs.get-version.outputs.img_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: unit test ${{ matrix.distrib }} @@ -352,8 +352,8 @@ jobs: tests_params: ${{matrix.tests_params}} test_group_name: ${{matrix.test_group_name}} secrets: - registry_username: ${{ secrets.DOCKER_REGISTRY_ID }} - registry_password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + registry_username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + registry_password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} collect_s3_access_key: ${{ secrets.COLLECT_S3_ACCESS_KEY }} collect_s3_secret_key: ${{ secrets.COLLECT_S3_SECRET_KEY }} xray_client_id: ${{ secrets.XRAY_CLIENT_ID }} diff --git a/.github/workflows/docker-builder.yml b/.github/workflows/docker-builder.yml index 469b29e404d..2469f108858 100644 --- a/.github/workflows/docker-builder.yml +++ b/.github/workflows/docker-builder.yml @@ -105,8 +105,8 @@ jobs: uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: registry: ${{ vars.DOCKER_PROXY_REGISTRY_URL }} - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} - uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 diff --git a/.github/workflows/gorgone.yml b/.github/workflows/gorgone.yml index b029788ce1a..c90adbebbc8 100644 --- a/.github/workflows/gorgone.yml +++ b/.github/workflows/gorgone.yml @@ -51,8 +51,8 @@ jobs: veracode_api_id: ${{ secrets.VERACODE_API_ID_GORG }} veracode_api_key: ${{ secrets.VERACODE_API_KEY_GORG }} veracode_srcclr_token: ${{ secrets.VERACODE_SRCCLR_TOKEN }} - docker_registry_id: ${{ secrets.DOCKER_REGISTRY_ID }} - docker_registry_passwd: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + docker_registry_id: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + docker_registry_passwd: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} package: needs: [get-version] @@ -81,8 +81,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: package ${{ matrix.distrib }} diff --git a/.github/workflows/libzmq.yml b/.github/workflows/libzmq.yml index bc342cc8d09..1f385411bde 100644 --- a/.github/workflows/libzmq.yml +++ b/.github/workflows/libzmq.yml @@ -41,8 +41,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: package ${{ matrix.distrib }} @@ -93,8 +93,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: package ${{ matrix.distrib }} ${{ matrix.arch }} diff --git a/.github/workflows/lua-curl.yml b/.github/workflows/lua-curl.yml index a0051595337..60e7cda2a0f 100644 --- a/.github/workflows/lua-curl.yml +++ b/.github/workflows/lua-curl.yml @@ -71,8 +71,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.img_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: package ${{ matrix.distrib }} ${{ matrix.arch }} diff --git a/.github/workflows/package-collect.yml b/.github/workflows/package-collect.yml index 4f101ca71fb..a8dbdfd6e05 100644 --- a/.github/workflows/package-collect.yml +++ b/.github/workflows/package-collect.yml @@ -58,8 +58,8 @@ jobs: container: image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ inputs.image }}:${{ inputs.img_version }} credentials: - username: ${{ secrets.DOCKER_REGISTRY_ID }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} + username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} + password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} name: package ${{ inputs.distrib }} ${{ inputs.arch }} From fb10c668115e39c51a8885a9ce02aa1fefbeae97 Mon Sep 17 00:00:00 2001 From: Kevin Duret Date: Mon, 4 Nov 2024 14:56:12 +0100 Subject: [PATCH 02/13] enh(ci): refactor get-version workflow (#1777) (#1781) --- .github/actions/deb-delivery/action.yml | 6 +- .github/actions/delivery/action.yml | 20 +- .github/actions/promote-to-stable/action.yml | 12 +- .github/actions/rpm-delivery/action.yml | 16 +- .../docker/Dockerfile.gorgone-testing-alma8 | 4 +- .../docker/Dockerfile.gorgone-testing-alma9 | 4 +- .../Dockerfile.gorgone-testing-bookworm | 4 +- .../Dockerfile.gorgone-testing-bullseye | 4 +- .../docker/Dockerfile.gorgone-testing-jammy | 4 +- .github/workflows/centreon-collect.yml | 84 +++-- .github/workflows/check-status.yml | 6 +- .github/workflows/docker-builder.yml | 34 +- .github/workflows/docker-gorgone-testing.yml | 12 +- .github/workflows/get-environment.yml | 294 ++++++++++++++++++ .github/workflows/get-version.yml | 233 -------------- .github/workflows/gorgone.yml | 83 ++--- .github/workflows/libzmq.yml | 56 ++-- .github/workflows/lua-curl.yml | 52 ++-- .github/workflows/rebase-master.yml | 2 +- .github/workflows/rebase-version.yml | 2 +- .github/workflows/release-trigger-builds.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/robot-test.yml | 8 +- .github/workflows/veracode-analysis.yml | 4 +- .../workflows/windows-agent-robot-test.yml | 16 +- .github/workflows/windows-agent.yml | 34 +- 26 files changed, 535 insertions(+), 463 deletions(-) create mode 100644 .github/workflows/get-environment.yml delete mode 100644 .github/workflows/get-version.yml diff --git a/.github/actions/deb-delivery/action.yml b/.github/actions/deb-delivery/action.yml index 46b6c5ec189..1c6a3850ba0 100644 --- a/.github/actions/deb-delivery/action.yml +++ b/.github/actions/deb-delivery/action.yml @@ -22,7 +22,7 @@ inputs: release_type: description: "Type of release (hotfix, release)" required: true - release_cloud: + is_cloud: description: "Release context (cloud or not cloud)" required: true @@ -49,12 +49,12 @@ runs: echo "[DEBUG] - Version: ${{ inputs.version }}" echo "[DEBUG] - Distrib: ${{ inputs.distrib }}" echo "[DEBUG] - module_name: ${{ inputs.module_name }}" - echo "[DEBUG] - release_cloud: ${{ inputs.release_cloud }}" + echo "[DEBUG] - is_cloud: ${{ inputs.is_cloud }}" echo "[DEBUG] - release_type: ${{ inputs.release_type }}" echo "[DEBUG] - stability: ${{ inputs.stability }}" # Make sure all required inputs are NOT empty - if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.version }} || -z ${{ inputs.release_cloud }} || -z ${{ inputs.release_type }} ]]; then + if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.version }} || -z "${{ inputs.is_cloud }}" ]]; then echo "Some mandatory inputs are empty, please check the logs." exit 1 fi diff --git a/.github/actions/delivery/action.yml b/.github/actions/delivery/action.yml index 8cbca5c8073..9b6328723e9 100644 --- a/.github/actions/delivery/action.yml +++ b/.github/actions/delivery/action.yml @@ -22,7 +22,7 @@ inputs: release_type: description: "Type of release (hotfix, release)" required: true - release_cloud: + is_cloud: description: "Release context (cloud or not cloud)" required: true @@ -63,12 +63,12 @@ runs: echo "[DEBUG] - Major version: ${{ inputs.major_version }}" echo "[DEBUG] - Distrib: ${{ inputs.distrib }}" echo "[DEBUG] - module_name: ${{ inputs.module_name }}" - echo "[DEBUG] - release_cloud: ${{ inputs.release_cloud }}" + echo "[DEBUG] - is_cloud: ${{ inputs.is_cloud }}" echo "[DEBUG] - release_type: ${{ inputs.release_type }}" echo "[DEBUG] - stability: ${{ inputs.stability }}" # Make sure all required inputs are NOT empty - if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z ${{ inputs.release_cloud }} || -z ${{ inputs.release_type }} ]]; then + if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z "${{ inputs.is_cloud }}" ]]; then echo "Some mandatory inputs are empty, please check the logs." exit 1 fi @@ -87,32 +87,32 @@ runs: mv "$FILE" "$ARCH" done - # Build upload target path based on release_cloud and release_type values + # Build upload target path based on is_cloud and release_type values # if cloud + hotfix or cloud + release, deliver to internal testing- # if cloud + develop, delivery to internal unstable # if non-cloud, delivery to onprem testing or unstable # CLOUD + HOTFIX + REPO STANDARD INTERNAL OR CLOUD + RELEASE + REPO STANDARD INTERNAL - if [[ ${{ inputs.release_cloud }} -eq 1 && ( ${{ inputs.release_type }} == "hotfix" || ${{ inputs.release_type }} == "release" ) ]]; then + if [[ "${{ inputs.is_cloud }}" == "true" && ( "${{ inputs.release_type }}" == "hotfix" || "${{ inputs.release_type }}" == "release" ) ]]; then echo "[DEBUG] : Release cloud + ${{ inputs.release_type }}, using rpm-standard-internal." ROOT_REPO_PATHS="rpm-standard-internal" UPLOAD_REPO_PATH="${{ inputs.major_version }}/${{ inputs.distrib }}/${{ inputs.stability }}-${{ inputs.release_type }}/$ARCH/${{ inputs.module_name }}/" # CLOUD + NOT HOTFIX OR CLOUD + NOT RELEASE + REPO STANDARD INTERNAL - elif [[ ${{ inputs.release_cloud }} -eq 1 && ( ${{ inputs.release_type }} != "hotfix" && ${{ inputs.release_type }} != "release" ) ]]; then + elif [[ "${{ inputs.is_cloud }}" == "true" && ( "${{ inputs.release_type }}" != "hotfix" && "${{ inputs.release_type }}" != "release" ) ]]; then echo "[DEBUG] : Release cloud + NOT ${{ inputs.release_type }}, using rpm-standard-internal." ROOT_REPO_PATHS="rpm-standard-internal" UPLOAD_REPO_PATH="${{ inputs.major_version }}/${{ inputs.distrib }}/${{ inputs.stability }}-${{ inputs.release_type }}/$ARCH/${{ inputs.module_name }}/" # NON-CLOUD + (HOTFIX OR RELEASE) + REPO STANDARD - elif [[ ${{ inputs.release_cloud }} -eq 0 ]]; then + elif [[ "${{ inputs.is_cloud }}" == "false" ]]; then echo "[DEBUG] : NOT Release cloud + ${{ inputs.release_type }}, using rpm-standard." ROOT_REPO_PATHS="rpm-standard" UPLOAD_REPO_PATH="${{ inputs.major_version }}/${{ inputs.distrib }}/${{ inputs.stability }}/$ARCH/${{ inputs.module_name }}/" # NOT VALID, DO NOT DELIVER else - echo "::error:: Invalid combination of release_type [${{ inputs.release_type }}] and release_cloud [${{ inputs.release_cloud }}]" + echo "::error:: Invalid combination of release_type [${{ inputs.release_type }}] and is_cloud [${{ inputs.is_cloud }}]" exit 1 fi @@ -141,12 +141,12 @@ runs: echo "[DEBUG] - Major version: ${{ inputs.major_version }}" echo "[DEBUG] - Distrib: ${{ inputs.distrib }}" echo "[DEBUG] - module_name: ${{ inputs.module_name }}" - echo "[DEBUG] - release_cloud: ${{ inputs.release_cloud }}" + echo "[DEBUG] - is_cloud: ${{ inputs.is_cloud }}" echo "[DEBUG] - release_type: ${{ inputs.release_type }}" echo "[DEBUG] - stability: ${{ inputs.stability }}" # Make sure all required inputs are NOT empty - if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z ${{ inputs.release_cloud }} || -z ${{ inputs.release_type }} ]]; then + if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z "${{ inputs.is_cloud }}" || -z "${{ inputs.release_type }}" ]]; then echo "Some mandatory inputs are empty, please check the logs." exit 1 fi diff --git a/.github/actions/promote-to-stable/action.yml b/.github/actions/promote-to-stable/action.yml index e450b3c4b90..32523490e65 100644 --- a/.github/actions/promote-to-stable/action.yml +++ b/.github/actions/promote-to-stable/action.yml @@ -22,7 +22,7 @@ inputs: release_type: description: "Type of release (hotfix, release)" required: true - release_cloud: + is_cloud: description: "Release context (cloud or not cloud)" required: true @@ -48,7 +48,7 @@ runs: # DEBUG echo "[DEBUG] - Major version: ${{ inputs.major_version }}" echo "[DEBUG] - Distrib: ${{ inputs.distrib }}" - echo "[DEBUG] - release_cloud: ${{ inputs.release_cloud }}" + echo "[DEBUG] - is_cloud: ${{ inputs.is_cloud }}" echo "[DEBUG] - release_type: ${{ inputs.release_type }}" # Cloud specific promote @@ -62,15 +62,15 @@ runs: # Search for testing packages candidate for promote for ARCH in "noarch" "x86_64"; do - # Build search path based on release_cloud and release_type values + # Build search path based on is_cloud and release_type values # if cloud, search in testing- path # if non-cloud, search in the testing usual path - if [[ ${{ inputs.release_cloud }} -eq 1 && ${{ inputs.release_type }} == "hotfix" ]] || [[ ${{ inputs.release_cloud }} -eq 1 && ${{ inputs.release_type }} == "release" ]]; then + if [[ "${{ inputs.is_cloud }}" == "true" && "${{ inputs.release_type }}" == "hotfix" ]] || [[ "${{ inputs.is_cloud }}" == "true" && "${{ inputs.release_type }}" == "release" ]]; then SEARCH_REPO_PATH="${{ inputs.major_version }}/${{ inputs.distrib }}/testing-${{ inputs.release_type }}/$ARCH/${{ inputs.module_name }}" - elif [[ ${{ inputs.release_cloud }} -eq 0 ]]; then + elif [[ "${{ inputs.is_cloud }}" == "false" ]]; then SEARCH_REPO_PATH="${{ inputs.major_version }}/${{ inputs.distrib }}/testing/$ARCH/${{ inputs.module_name }}" else - echo "Invalid combination of release_type and release_cloud" + echo "Invalid combination of release_type and is_cloud" fi echo "[DEBUG] - Get path of $ARCH testing artifacts to promote to stable." diff --git a/.github/actions/rpm-delivery/action.yml b/.github/actions/rpm-delivery/action.yml index 3174c753300..b1fbc79e2d7 100644 --- a/.github/actions/rpm-delivery/action.yml +++ b/.github/actions/rpm-delivery/action.yml @@ -22,7 +22,7 @@ inputs: release_type: description: "Type of release (hotfix, release)" required: true - release_cloud: + is_cloud: description: "Release context (cloud or not cloud)" required: true @@ -61,12 +61,12 @@ runs: echo "[DEBUG] - Version: ${{ inputs.version }}" echo "[DEBUG] - Distrib: ${{ inputs.distrib }}" echo "[DEBUG] - module_name: ${{ inputs.module_name }}" - echo "[DEBUG] - release_cloud: ${{ inputs.release_cloud }}" + echo "[DEBUG] - is_cloud: ${{ inputs.is_cloud }}" echo "[DEBUG] - release_type: ${{ inputs.release_type }}" echo "[DEBUG] - stability: ${{ inputs.stability }}" # Make sure all required inputs are NOT empty - if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.version }} || -z ${{ inputs.release_cloud }} || -z ${{ inputs.release_type }} ]]; then + if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.version }} || -z "${{ inputs.is_cloud }}" ]]; then echo "Some mandatory inputs are empty, please check the logs." exit 1 fi @@ -85,32 +85,32 @@ runs: mv "$FILE" "$ARCH" done - # Build upload target path based on release_cloud and release_type values + # Build upload target path based on is_cloud and release_type values # if cloud + hotfix or cloud + release, deliver to internal testing- # if cloud + develop, delivery to internal unstable # if non-cloud, delivery to onprem testing or unstable # CLOUD + HOTFIX + REPO STANDARD INTERNAL OR CLOUD + RELEASE + REPO STANDARD INTERNAL - if [[ ${{ inputs.release_cloud }} -eq 1 ]] && ([[ ${{ inputs.release_type }} == "hotfix" ]] || [[ ${{ inputs.release_type }} == "release" ]]); then + if [[ "${{ inputs.is_cloud }}" == "true" ]] && ([[ "${{ inputs.release_type }}" == "hotfix" ]] || [[ "${{ inputs.release_type }}" == "release" ]]); then echo "[DEBUG] : Release cloud + ${{ inputs.release_type }}, using rpm-standard-internal." ROOT_REPO_PATHS="rpm-standard-internal" UPLOAD_REPO_PATH="${{ inputs.version }}/${{ inputs.distrib }}/${{ inputs.stability }}-${{ inputs.release_type }}/$ARCH/${{ inputs.module_name }}/" # CLOUD + NOT HOTFIX OR CLOUD + NOT RELEASE + REPO STANDARD INTERNAL - elif [[ ${{ inputs.release_cloud }} -eq 1 ]] && ([[ ${{ inputs.release_type }} != "hotfix" ]] || [[ ${{ inputs.release_type }} != "release" ]]); then + elif [[ "${{ inputs.is_cloud }}" == "true" ]] && ([[ "${{ inputs.release_type }}" != "hotfix" ]] || [[ "${{ inputs.release_type }}" != "release" ]]); then echo "[DEBUG] : Release cloud + NOT ${{ inputs.release_type }}, using rpm-standard-internal." ROOT_REPO_PATHS="rpm-standard-internal" UPLOAD_REPO_PATH="${{ inputs.version }}/${{ inputs.distrib }}/${{ inputs.stability }}-${{ inputs.release_type }}/$ARCH/${{ inputs.module_name }}/" # NON-CLOUD + (HOTFIX OR RELEASE) + REPO STANDARD - elif [[ ${{ inputs.release_cloud }} -eq 0 ]]; then + elif [[ "${{ inputs.is_cloud }}" == "false" ]]; then echo "[DEBUG] : NOT Release cloud + ${{ inputs.release_type }}, using rpm-standard." ROOT_REPO_PATHS="rpm-standard" UPLOAD_REPO_PATH="${{ inputs.version }}/${{ inputs.distrib }}/${{ inputs.stability }}/$ARCH/${{ inputs.module_name }}/" # ANYTHING ELSE else - echo "::error:: Invalid combination of release_type [${{ inputs.release_type }}] and release_cloud [${{ inputs.release_cloud }}]" + echo "::error:: Invalid combination of release_type [${{ inputs.release_type }}] and is_cloud [${{ inputs.is_cloud }}]" exit 1 fi diff --git a/.github/docker/Dockerfile.gorgone-testing-alma8 b/.github/docker/Dockerfile.gorgone-testing-alma8 index 7fe2db43131..95cd2c6f71b 100644 --- a/.github/docker/Dockerfile.gorgone-testing-alma8 +++ b/.github/docker/Dockerfile.gorgone-testing-alma8 @@ -1,11 +1,13 @@ FROM almalinux:8 +ARG VERSION + RUN bash -e < /dev/null 2>&1 diff --git a/.github/docker/Dockerfile.gorgone-testing-bullseye b/.github/docker/Dockerfile.gorgone-testing-bullseye index 0c3cc92a2a8..6643621c66b 100644 --- a/.github/docker/Dockerfile.gorgone-testing-bullseye +++ b/.github/docker/Dockerfile.gorgone-testing-bullseye @@ -1,5 +1,7 @@ FROM debian:bullseye +ARG VERSION + ENV DEBIAN_FRONTEND noninteractive # fix locale ENV LANG en_US.utf8 @@ -18,7 +20,7 @@ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 pip3 install robotframework robotframework-examples robotframework-databaselibrary \ pymysql robotframework-requests robotframework-jsonlibrary -lsb_release -sc | xargs -I % sh -c 'echo deb https://packages.centreon.com/apt-standard-24.05-stable/ % main' | tee /etc/apt/sources.list.d/centreon.list +lsb_release -sc | xargs -I % sh -c 'echo deb https://packages.centreon.com/apt-standard-${VERSION}-stable/ % main' | tee /etc/apt/sources.list.d/centreon.list lsb_release -sc | xargs -I % sh -c 'echo deb https://packages.centreon.com/apt-plugins-stable/ % main' | tee /etc/apt/sources.list.d/centreon-plugins.list wget -O- https://apt-key.centreon.com | gpg --dearmor | tee /etc/apt/trusted.gpg.d/centreon.gpg > /dev/null 2>&1 diff --git a/.github/docker/Dockerfile.gorgone-testing-jammy b/.github/docker/Dockerfile.gorgone-testing-jammy index 6338489114d..a85639603ad 100644 --- a/.github/docker/Dockerfile.gorgone-testing-jammy +++ b/.github/docker/Dockerfile.gorgone-testing-jammy @@ -1,5 +1,7 @@ FROM ubuntu:jammy +ARG VERSION + ENV DEBIAN_FRONTEND=noninteractive # Set locale @@ -11,7 +13,7 @@ RUN apt-get update && \ ENV LANG=en_US.UTF-8 # Add Centreon repositories and their public key -RUN echo "deb https://packages.centreon.com/ubuntu-standard-24.05-testing/ jammy main" | tee -a /etc/apt/sources.list.d/centreon-testing.list && \ +RUN echo "deb https://packages.centreon.com/ubuntu-standard-${VERSION}-testing/ jammy main" | tee -a /etc/apt/sources.list.d/centreon-testing.list && \ echo "deb https://packages.centreon.com/ubuntu-plugins-testing/ jammy main" | tee -a /etc/apt/sources.list.d/centreon-plugins-testing.list && \ wget -O- https://apt-key.centreon.com | gpg --dearmor | tee /etc/apt/trusted.gpg.d/centreon.gpg > /dev/null 2>&1 && \ apt-get update diff --git a/.github/workflows/centreon-collect.yml b/.github/workflows/centreon-collect.yml index 2d671bb3b52..f5472bc9966 100644 --- a/.github/workflows/centreon-collect.yml +++ b/.github/workflows/centreon-collect.yml @@ -103,20 +103,20 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml with: version_file: CMakeLists.txt veracode-analysis: - needs: [get-version] + needs: [get-environment] if: ${{ github.event_name == 'schedule' && github.ref_name == 'develop' }} uses: ./.github/workflows/veracode-analysis.yml with: module_name: centreon-collect - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} - img_version: ${{ needs.get-version.outputs.img_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} + img_version: ${{ needs.get-environment.outputs.img_version }} secrets: veracode_api_id: ${{ secrets.VERACODE_API_ID_COLL }} veracode_api_key: ${{ secrets.VERACODE_API_KEY_COLL }} @@ -125,8 +125,8 @@ jobs: docker_registry_passwd: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} unit-test: - needs: [get-version] - if: ${{ github.event.inputs.unit_tests == 'true' && ! contains(fromJson('["stable"]'), needs.get-version.outputs.stability) }} + needs: [get-environment] + if: ${{ github.event.inputs.unit_tests == 'true' && ! contains(fromJson('["stable"]'), needs.get-environment.outputs.stability) }} strategy: fail-fast: false @@ -144,7 +144,7 @@ jobs: LEGACY_ENGINE: ${{ github.event.inputs.legacy_engine != 'false' && 'ON' || 'OFF' }} container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/centreon-collect-${{ matrix.distrib }}:${{ needs.get-version.outputs.img_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/centreon-collect-${{ matrix.distrib }}:${{ needs.get-environment.outputs.img_version }} credentials: username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} @@ -239,8 +239,8 @@ jobs: shell: bash package: - needs: [get-version] - if: ${{ ! contains(fromJson('["stable"]'), needs.get-version.outputs.stability) }} + needs: [get-environment] + if: ${{ ! contains(fromJson('["stable"]'), needs.get-environment.outputs.stability) }} strategy: fail-fast: false matrix: @@ -274,12 +274,12 @@ jobs: uses: ./.github/workflows/package-collect.yml with: - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} - img_version: ${{ needs.get-version.outputs.img_version }} - release: ${{ needs.get-version.outputs.release }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} + img_version: ${{ needs.get-environment.outputs.img_version }} + release: ${{ needs.get-environment.outputs.release }} commit_hash: ${{ github.sha }} - stability: ${{ needs.get-version.outputs.stability }} + stability: ${{ needs.get-environment.outputs.stability }} legacy_engine: ${{ github.event.inputs.legacy_engine != 'false' && 'ON' || 'OFF' }} packages_in_artifact: ${{ github.event.inputs.packages_in_artifact == 'true' }} image: ${{ matrix.image }} @@ -290,7 +290,7 @@ jobs: secrets: inherit robot-test: - needs: [get-version, package] + needs: [get-environment, package] if: | (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.is_nightly == 'true')) && ! cancelled() && @@ -344,8 +344,8 @@ jobs: distrib: ${{ matrix.distrib }} arch: ${{ matrix.arch }} image: ${{ matrix.image }} - image_test: ${{ matrix.image }}:${{ needs.get-version.outputs.test_img_version }} - image_version: ${{ needs.get-version.outputs.img_version }} + image_test: ${{ matrix.image }}:${{ needs.get-environment.outputs.test_img_version }} + image_version: ${{ needs.get-environment.outputs.img_version }} package_cache_key: ${{ github.run_id }}-${{ github.sha }}-${{ matrix.package_extension }}-centreon-collect-${{ matrix.distrib }}-${{ matrix.arch }}-${{ github.head_ref || github.ref_name }} package_cache_path: ./*.${{ matrix.package_extension}} database_type: ${{ matrix.database_type }} @@ -361,10 +361,10 @@ jobs: deliver-sources: runs-on: [self-hosted, common] - needs: [get-version, package] + needs: [get-environment, package] if: | github.event_name != 'workflow_dispatch' && - contains(fromJson('["stable"]'), needs.get-version.outputs.stability) && + contains(fromJson('["stable"]'), needs.get-environment.outputs.stability) && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') @@ -381,18 +381,17 @@ jobs: bucket_directory: centreon-collect module_directory: centreon-collect module_name: centreon-collect - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} token_download_centreon_com: ${{ secrets.TOKEN_DOWNLOAD_CENTREON_COM }} deliver-rpm: if: | - contains(fromJson('["unstable", "testing"]'), needs.get-version.outputs.stability) && + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') - needs: [get-version, robot-test] - environment: ${{ needs.get-version.outputs.environment }} + needs: [get-environment, robot-test] runs-on: [self-hosted, common] strategy: matrix: @@ -413,21 +412,20 @@ jobs: with: module_name: collect distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.run_id }}-${{ github.sha }}-rpm-centreon-collect-${{ matrix.distrib }}-${{ matrix.arch }}-${{ github.head_ref || github.ref_name }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} deliver-deb: if: | - contains(fromJson('["unstable", "testing"]'), needs.get-version.outputs.stability) && + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') - needs: [get-version, robot-test] - environment: ${{ needs.get-version.outputs.environment }} + needs: [get-environment, robot-test] runs-on: [self-hosted, common] strategy: matrix: @@ -446,17 +444,17 @@ jobs: with: module_name: collect distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.run_id }}-${{ github.sha }}-deb-centreon-collect-${{ matrix.distrib }}-${{ matrix.arch }}-${{ github.head_ref || github.ref_name }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} promote: - needs: [get-version, deliver-rpm, deliver-deb] + needs: [get-environment, deliver-rpm, deliver-deb] if: | - (contains(fromJson('["stable", "testing"]'), needs.get-version.outputs.stability) && github.event_name != 'workflow_dispatch') && + (contains(fromJson('["stable", "testing"]'), needs.get-environment.outputs.stability) && github.event_name != 'workflow_dispatch') && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') @@ -475,8 +473,8 @@ jobs: artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} module_name: collect distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} - stability: ${{ needs.get-version.outputs.stability }} + major_version: ${{ needs.get-environment.outputs.major_version }} + stability: ${{ needs.get-environment.outputs.stability }} github_ref_name: ${{ github.ref_name }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} diff --git a/.github/workflows/check-status.yml b/.github/workflows/check-status.yml index cd7a383daef..b56f2253b55 100644 --- a/.github/workflows/check-status.yml +++ b/.github/workflows/check-status.yml @@ -86,8 +86,10 @@ jobs: core.summary.addList(failedCheckRuns); core.summary.write() - core.setFailed(`${failure.length} workflow(s) failed`); - return; + if (failedCheckRuns.length > 0) { + core.setFailed(`${failedCheckRuns.length} job(s) failed`); + return; + } } if (pending.length === 1) { diff --git a/.github/workflows/docker-builder.yml b/.github/workflows/docker-builder.yml index 2469f108858..5944ace9e7f 100644 --- a/.github/workflows/docker-builder.yml +++ b/.github/workflows/docker-builder.yml @@ -17,13 +17,13 @@ on: - '.github/docker/Dockerfile.centreon-collect-*' jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml with: version_file: CMakeLists.txt create-and-push-docker: - needs: [get-version] + needs: [get-environment] strategy: fail-fast: false @@ -32,59 +32,59 @@ jobs: - runner: collect dockerfile: centreon-collect-alma8 image: centreon-collect-alma8 - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect dockerfile: centreon-collect-alma9 image: centreon-collect-alma9 - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect dockerfile: centreon-collect-alma9-test image: centreon-collect-alma9-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} - runner: collect dockerfile: centreon-collect-mysql-alma9 image: centreon-collect-mysql-alma9 - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect dockerfile: centreon-collect-mysql-alma9-test image: centreon-collect-mysql-alma9-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} - runner: collect dockerfile: centreon-collect-debian-bullseye image: centreon-collect-debian-bullseye - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect dockerfile: centreon-collect-debian-bullseye-test image: centreon-collect-debian-bullseye-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} - runner: collect dockerfile: centreon-collect-debian-bookworm image: centreon-collect-debian-bookworm - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect dockerfile: centreon-collect-debian-bookworm-test image: centreon-collect-debian-bookworm-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} - runner: collect dockerfile: centreon-collect-ubuntu-jammy image: centreon-collect-ubuntu-jammy - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect-arm64 dockerfile: centreon-collect-debian-bullseye image: centreon-collect-debian-bullseye-arm64 - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect-arm64 dockerfile: centreon-collect-debian-bullseye-test image: centreon-collect-debian-bullseye-arm64-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} - runner: collect-arm64 dockerfile: centreon-collect-debian-bookworm image: centreon-collect-debian-bookworm-arm64 - tag: ${{ needs.get-version.outputs.img_version }} + tag: ${{ needs.get-environment.outputs.img_version }} - runner: collect-arm64 dockerfile: centreon-collect-debian-bookworm-test image: centreon-collect-debian-bookworm-arm64-test - tag: ${{ needs.get-version.outputs.test_img_version }} + tag: ${{ needs.get-environment.outputs.test_img_version }} runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.runner)) }} diff --git a/.github/workflows/docker-gorgone-testing.yml b/.github/workflows/docker-gorgone-testing.yml index 26cc8149505..bbdaeb4859e 100644 --- a/.github/workflows/docker-gorgone-testing.yml +++ b/.github/workflows/docker-gorgone-testing.yml @@ -19,12 +19,12 @@ on: - ".github/workflows/docker-gorgone-testing.yml" jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml dockerize: - needs: [get-version] - runs-on: ubuntu-22.04 + needs: [get-environment] + runs-on: ubuntu-24.04 strategy: matrix: @@ -46,6 +46,8 @@ jobs: with: file: .github/docker/Dockerfile.gorgone-testing-${{ matrix.distrib }} context: . + build-args: | + "VERSION=${{ needs.get-environment.outputs.major_version }}" pull: true push: true - tags: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/gorgone-testing-${{ matrix.distrib }}:${{ needs.get-version.outputs.gorgone_docker_version }} + tags: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/gorgone-testing-${{ matrix.distrib }}:${{ needs.get-environment.outputs.gorgone_docker_version }} diff --git a/.github/workflows/get-environment.yml b/.github/workflows/get-environment.yml new file mode 100644 index 00000000000..c33fad5fe5f --- /dev/null +++ b/.github/workflows/get-environment.yml @@ -0,0 +1,294 @@ +on: + workflow_call: + inputs: + version_file: + required: false + type: string + default: CMakeLists.txt + outputs: + latest_major_version: + description: "latest major version" + value: ${{ jobs.get-environment.outputs.latest_major_version }} + is_cloud: + description: "context of release (cloud or not cloud)" + value: ${{ jobs.get-environment.outputs.is_cloud }} + major_version: + description: "major version" + value: ${{ jobs.get-environment.outputs.major_version }} + minor_version: + description: "minor version" + value: ${{ jobs.get-environment.outputs.minor_version }} + release: + description: "release" + value: ${{ jobs.get-environment.outputs.release }} + stability: + description: "branch stability (stable, testing, unstable, canary)" + value: ${{ jobs.get-environment.outputs.stability }} + target_stability: + description: "Final target branch stability (stable, testing, unstable, canary or not defined if not a pull request)" + value: ${{ jobs.get-environment.outputs.target_stability }} + release_type: + description: "type of release (hotfix, release or not defined if not a release)" + value: ${{ jobs.get-environment.outputs.release_type }} + is_targeting_feature_branch: + description: "if it is a PR, check if targeting a feature branch" + value: ${{ jobs.get-environment.outputs.is_targeting_feature_branch }} + img_version: + description: "docker image version (vcpkg checksum)" + value: ${{ jobs.get-environment.outputs.img_version }} + test_img_version: + description: "test docker image version (checksum of database sql, script and dockerfiles)" + value: ${{ jobs.get-environment.outputs.test_img_version }} + gorgone_docker_version: + description: "md5 of gorgone dockerfile" + value: ${{ jobs.get-environment.outputs.gorgone_docker_version }} + +jobs: + get-environment: + runs-on: ubuntu-24.04 + outputs: + latest_major_version: ${{ steps.latest_major_version.outputs.latest_major_version }} + is_cloud: ${{ steps.detect_cloud_version.outputs.result }} + major_version: ${{ steps.get_version.outputs.major_version }} + minor_version: ${{ steps.get_version.outputs.minor_version }} + release: ${{ steps.get_release.outputs.release }} + stability: ${{ steps.get_stability.outputs.stability }} + target_stability: ${{ steps.get_stability.outputs.target_stability }} + release_type: ${{ steps.get_release_type.outputs.release_type }} + is_targeting_feature_branch: ${{ steps.get_stability.outputs.is_targeting_feature_branch }} + img_version: ${{ steps.get_docker_images_version.outputs.img_version }} + test_img_version: ${{ steps.get_docker_images_version.outputs.test_img_version }} + gorgone_docker_version: ${{ steps.get_docker_images_version.outputs.gorgone_docker_version }} + + steps: + - name: Checkout sources (current branch) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + # get latest major version to detect cloud / on-prem versions + - name: Checkout sources (develop branch) + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + ref: develop + path: centreon-develop + sparse-checkout: .version + + - name: Store latest major version + id: latest_major_version + run: | + . centreon-develop/.version + echo "latest_major_version=$MAJOR" >> $GITHUB_OUTPUT + shell: bash + + - if: ${{ github.event_name == 'pull_request' }} + name: Get nested pull request path + id: pr_path + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const prPath = ['${{ github.head_ref }}', '${{ github.base_ref }}']; + + const result = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + per_page: 100, + state: 'open' + }); + + let found = true; + while (found) { + found = false; + result.data.forEach(({ head: { ref: headRef }, base: { ref: baseRef} }) => { + if (headRef === prPath[prPath.length - 1] && ! prPath.includes(baseRef)) { + found = true; + prPath.push(baseRef); + } + }); + } + + return prPath; + + - name: Get stability + id: get_stability + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const getStability = (branchName) => { + switch (true) { + case /(^develop$)|(^dev-\d{2}\.\d{2}\.x$)|(^prepare-release-cloud.*)/.test(branchName): + return 'unstable'; + case /(^release.+)|(^hotfix.+)/.test(branchName): + return 'testing'; + case /(^master$)|(^\d{2}\.\d{2}\.x$)/.test(branchName): + return 'stable'; + default: + return 'canary'; + } + }; + + core.setOutput('stability', getStability('${{ github.head_ref || github.ref_name }}')); + + let isTargetingFeatureBranch = false; + if ("${{ github.event_name }}" === "pull_request") { + let targetStability = 'canary'; + const prPath = ${{ steps.pr_path.outputs.result || '[]' }}; + prPath.shift(); // remove current branch + + if (prPath.length && getStability(prPath[0]) === 'canary') { + isTargetingFeatureBranch = true; + } + + prPath.every((branchName) => { + console.log(`checking stability of ${branchName}`) + targetStability = getStability(branchName); + + if (targetStability !== 'canary') { + return false; + } + + return true; + }); + + core.setOutput('target_stability', targetStability); + } + + core.setOutput('is_targeting_feature_branch', isTargetingFeatureBranch); + + - name: Get version from ${{ inputs.version_file }} + id: get_version + run: | + if [[ "${{ inputs.version_file }}" == */.version ]]; then + . .version + . ${{ inputs.version_file }} + VERSION="$MAJOR.$MINOR" + elif [[ "${{ inputs.version_file }}" == CMakeLists.txt ]]; then + MAJOR=$(awk '$1 ~ "COLLECT_MAJOR" {maj=substr($2, 1, length($2)-1)} $1 ~ "COLLECT_MINOR" {min=substr($2, 1, length($2)-1) ; print maj "." min}' CMakeLists.txt) + MINOR=$(awk '$1 ~ "COLLECT_PATCH" {print substr($2, 1, length($2) - 1)}' CMakeLists.txt) + VERSION="$MAJOR.$MINOR" + else + echo "Unable to parse version file ${{ inputs.version_file }}" + exit 1 + fi + + if grep -E '^[2-9][0-9]\.[0-9][0-9]\.[0-9]+' <<<"$VERSION" >/dev/null 2>&1 ; then + n=${VERSION//[!0-9]/ } + a=(${n//\./ }) + echo "major_version=${a[0]}.${a[1]}" >> $GITHUB_OUTPUT + MAJOR=${a[0]}.${a[1]} + echo "minor_version=${a[2]}" >> $GITHUB_OUTPUT + else + echo "Cannot parse version number from ${{ inputs.version_file }}" + exit 1 + fi + shell: bash + + - name: "Get release: 1 for testing / stable, . for others" + id: get_release + run: | + if [[ "${{ steps.get_stability.outputs.stability }}" == "testing" || "${{ steps.get_stability.outputs.stability }}" == "stable" ]]; then + RELEASE="1" + else + RELEASE="$(date +%s).$(echo ${{ github.sha }} | cut -c -7)" + fi + + echo "release=$RELEASE" >> $GITHUB_OUTPUT + shell: bash + + - name: "Get release type: hotfix, release or not defined if not a release" + id: get_release_type + run: | + RELEASE_TYPE=$(echo "${{ github.head_ref || github.ref_name }}" | cut -d '-' -f 1) + if [[ "$RELEASE_TYPE" == "hotfix" || "$RELEASE_TYPE" == "release" ]]; then + echo "release_type=$RELEASE_TYPE" >> $GITHUB_OUTPUT + fi + shell: bash + + - name: "Detect cloud version" + id: detect_cloud_version + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + if ("${{ github.ref_name }}" === "master") { + return true; + } + + if ("${{ github.event_name }}" === "pull_request") { + const prPath = ${{ steps.pr_path.outputs.result || '[]' }}; + const finalTargetBranch = prPath.pop(); + if (['develop', 'master'].includes(finalTargetBranch)) { + return true; + } else if (/\d{2}\.\d{2}\.x$/.test(finalTargetBranch)) { + return false; + } + } + + const developMajorVersion = "${{ steps.latest_major_version.outputs.latest_major_version }}"; + const currentMajorVersion = "${{ steps.get_version.outputs.major_version }}"; + + if (Number(currentMajorVersion) >= Number(developMajorVersion)) { + return true; + } + + return false; + + - name: Get docker images version + id: get_docker_images_version + run: | + IMG_VERSION=$( cat `ls .github/docker/Dockerfile.centreon-collect-* | grep -v test` vcpkg.json | md5sum | awk '{print substr($1, 0, 8)}') + echo "img_version=$IMG_VERSION" >> $GITHUB_OUTPUT + + TEST_IMG_VERSION=$(cat .github/docker/Dockerfile.centreon-collect-*-test .github/scripts/collect-prepare-test-robot.sh resources/*.sql | md5sum | cut -c1-8) + echo "test_img_version=$TEST_IMG_VERSION" >> $GITHUB_OUTPUT + + GORGONE_DOCKER_VERSION=$(cat .github/docker/Dockerfile.gorgone-testing-* | md5sum | cut -c1-8) + echo "gorgone_docker_version=$GORGONE_DOCKER_VERSION" >> $GITHUB_OUTPUT + + - name: Display info in job summary + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + env: + VERSION_FILE: ${{ inputs.version_file }} + with: + script: | + const outputTable = [ + [{data: 'Name', header: true}, {data: 'Value', header: true}], + ['latest_major_version', '${{ steps.latest_major_version.outputs.latest_major_version }}'], + ['is_cloud', '${{ steps.detect_cloud_version.outputs.result }}'], + ['major_version', '${{ steps.get_version.outputs.major_version }}'], + ['minor_version', '${{ steps.get_version.outputs.minor_version }}'], + ['release', '${{ steps.get_release.outputs.release }}'], + ['stability', '${{ steps.get_stability.outputs.stability }}'], + ['release_type', '${{ steps.get_release_type.outputs.release_type || 'not defined because this is not a release' }}'], + ['is_targeting_feature_branch', '${{ steps.get_stability.outputs.is_targeting_feature_branch }}'], + ['img_version', '${{ steps.get_docker_images_version.outputs.img_version }}'], + ['test_img_version', '${{ steps.get_docker_images_version.outputs.test_img_version }}'], + ['gorgone_docker_version', '${{ steps.get_docker_images_version.outputs.gorgone_docker_version }}'], + ]; + + outputTable.push(['target_stability', '${{ steps.get_stability.outputs.target_stability || 'not defined because current run is not triggered by pull request event' }}']); + + core.summary + .addHeading(`${context.workflow} environment outputs`) + .addTable(outputTable); + + if ("${{ github.event_name }}" === "pull_request") { + const prPath = ${{ steps.pr_path.outputs.result || '[]' }}; + const mainBranchName = prPath.pop(); + let codeBlock = ` + %%{ init: { 'gitGraph': { 'mainBranchName': '${mainBranchName}', 'showCommitLabel': false } } }%% + gitGraph + commit`; + prPath.reverse().forEach((branchName) => { + codeBlock = `${codeBlock} + branch ${branchName} + checkout ${branchName} + commit`; + }); + + core.summary + .addHeading('Git workflow') + .addCodeBlock( + codeBlock, + "mermaid" + ); + } + + core.summary.write(); diff --git a/.github/workflows/get-version.yml b/.github/workflows/get-version.yml deleted file mode 100644 index bc24ae629e5..00000000000 --- a/.github/workflows/get-version.yml +++ /dev/null @@ -1,233 +0,0 @@ -on: - workflow_call: - inputs: - version_file: - required: false - type: string - default: CMakeLists.txt - outputs: - major_version: - description: "major version" - value: ${{ jobs.get-version.outputs.major_version }} - minor_version: - description: "minor version" - value: ${{ jobs.get-version.outputs.minor_version }} - img_version: - description: "docker image version (vcpkg checksum)" - value: ${{ jobs.get-version.outputs.img_version }} - test_img_version: - description: "test docker image version (checksum of database sql, script and dockerfiles)" - value: ${{ jobs.get-version.outputs.test_img_version }} - version: - description: "major version" - value: ${{ jobs.get-version.outputs.version }} - release: - description: "release" - value: ${{ jobs.get-version.outputs.release }} - stability: - description: "branch stability (stable, testing, unstable, canary)" - value: ${{ jobs.get-version.outputs.stability }} - environment: - description: "branch stability (stable, testing, unstable, canary)" - value: ${{ jobs.get-version.outputs.environment }} - release_type: - description: "type of release (hotfix, release)" - value: ${{ jobs.get-version.outputs.release_type }} - release_cloud: - description: "context of release (cloud or not cloud)" - value: ${{ jobs.get-version.outputs.release_cloud }} - gorgone_docker_version: - description: "md5 of gorgone dockerfile" - value: ${{ jobs.get-version.outputs.gorgone_docker_version }} - -jobs: - get-version: - runs-on: ubuntu-24.04 - outputs: - major_version: ${{ steps.get_version.outputs.major_version }} - minor_version: ${{ steps.get_version.outputs.minor_version }} - img_version: ${{ steps.get_version.outputs.img_version }} - test_img_version: ${{ steps.get_version.outputs.test_img_version }} - version: ${{ steps.get_version.outputs.version }} - release: ${{ steps.get_version.outputs.release }} - stability: ${{ steps.get_version.outputs.stability }} - environment: ${{ steps.get_version.outputs.env }} - release_type: ${{ steps.get_version.outputs.release_type }} - release_cloud: ${{ steps.get_version.outputs.release_cloud}} - gorgone_docker_version: ${{ steps.get_version.outputs.gorgone_docker_version }} - - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - ref: develop - path: centreon-collect-develop - sparse-checkout: .version - - - name: store latest major version - id: latest_major_version - run: | - . centreon-collect-develop/.version - echo "latest_major_version=$MAJOR" >> $GITHUB_OUTPUT - shell: bash - - - name: install gh cli on self-hosted runner - run: | - if ! command -v gh &> /dev/null; then - echo "Installing GH CLI." - type -p curl >/dev/null || (sudo apt update && sudo apt install curl -y) - curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg - sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null - sudo apt update - sudo apt install gh -y - else - echo "GH CLI is already installed." - fi - shell: bash - - - id: get_version - run: | - set -x - - if [[ "${{ inputs.version_file }}" == */.version ]]; then - . .version - . ${{ inputs.version_file }} - VERSION="$MAJOR.$MINOR" - elif [[ "${{ inputs.version_file }}" == CMakeLists.txt ]]; then - MAJOR=$(awk '$1 ~ "COLLECT_MAJOR" {maj=substr($2, 1, length($2)-1)} $1 ~ "COLLECT_MINOR" {min=substr($2, 1, length($2)-1) ; print maj "." min}' CMakeLists.txt) - MINOR=$(awk '$1 ~ "COLLECT_PATCH" {print substr($2, 1, length($2) - 1)}' CMakeLists.txt) - VERSION="$MAJOR.$MINOR" - else - echo "Unable to parse ${{ inputs.version_file }}" - exit 1 - fi - - echo "VERSION=$VERSION" - - if egrep '^[2-9][0-9]\.[0-9][0-9]\.[0-9]+' <<<"$VERSION" >/dev/null 2>&1 ; then - n=${VERSION//[!0-9]/ } - a=(${n//\./ }) - echo "major_version=${a[0]}.${a[1]}" >> $GITHUB_OUTPUT - MAJOR=${a[0]}.${a[1]} - echo "minor_version=${a[2]}" >> $GITHUB_OUTPUT - else - echo "Cannot parse version number from ${{ inputs.version_file }}" - exit 1 - fi - - GORGONE_DOCKER_VERSION=$(cat .github/docker/Dockerfile.gorgone-testing-* | md5sum | cut -c1-8) - echo "gorgone_docker_version=$GORGONE_DOCKER_VERSION" >> $GITHUB_OUTPUT - - IMG_VERSION=$( cat `ls .github/docker/Dockerfile.centreon-collect-* | grep -v test` vcpkg.json | md5sum | awk '{print substr($1, 0, 8)}') - TEST_IMG_VERSION=$(cat .github/docker/Dockerfile.centreon-collect-*-test .github/scripts/collect-prepare-test-robot.sh resources/*.sql | md5sum | cut -c1-8) - echo "img_version=$IMG_VERSION" >> $GITHUB_OUTPUT - echo "test_img_version=$TEST_IMG_VERSION" >> $GITHUB_OUTPUT - echo "version=$VERSION" >> $GITHUB_OUTPUT - - if [[ -z "$GITHUB_HEAD_REF" ]]; then - BRANCHNAME="$GITHUB_REF_NAME" - else - BRANCHNAME="$GITHUB_HEAD_REF" - fi - - echo "BRANCHNAME is: $BRANCHNAME" - - # Set default release values - GITHUB_RELEASE_CLOUD=0 - GITHUB_RELEASE_TYPE=$(echo $BRANCHNAME |cut -d '-' -f 1) - - # if current branch major version has a matching dev-$MAJOR branch ==> onprem version - if git ls-remote -q | grep -E "refs/heads/dev-$MAJOR.x$" >/dev/null 2>&1; then - GITHUB_RELEASE_CLOUD=0 - # if current branch major version is greater or equal than the develop branch major version ==> cloud version - elif [[ "$(printf '%s\n' "${{ steps.latest_major_version.outputs.latest_major_version }}" "$MAJOR" | sort -V | head -n1)" == "${{ steps.latest_major_version.outputs.latest_major_version }}" ]]; then - GITHUB_RELEASE_CLOUD=1 - fi - - case "$BRANCHNAME" in - master) - echo "release=1" >> $GITHUB_OUTPUT - GITHUB_RELEASE_CLOUD=1 - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - [2-9][0-9].[0-9][0-9].x) - echo "release=1" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - develop) - echo "release=`date +%s`.`echo ${{ github.sha }} | cut -c -7`" >> $GITHUB_OUTPUT - GITHUB_RELEASE_CLOUD=1 - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - dev-[2-9][0-9].[0-9][0-9].x) - echo "release=`date +%s`.`echo ${{ github.sha }} | cut -c -7`" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - release* | hotfix*) - # Handle workflow_dispatch run triggers and run a dispatch ONLY for cloud release - GITHUB_RELEASE_BRANCH_BASE_REF_NAME="$(gh pr view $BRANCHNAME -q .baseRefName --json headRefName,baseRefName,state)" - echo "GITHUB_RELEASE_BRANCH_BASE_REF_NAME is: $GITHUB_RELEASE_BRANCH_BASE_REF_NAME" - GITHUB_RELEASE_BRANCH_PR_STATE="$(gh pr view $BRANCHNAME -q .state --json headRefName,baseRefName,state)" - echo "GITHUB_RELEASE_BRANCH_PR_STATE is: $GITHUB_RELEASE_BRANCH_PR_STATE" - - # Check if the release context (cloud and hotfix or cloud and release) - if [[ "$GITHUB_RELEASE_BRANCH_BASE_REF_NAME" == "master" ]] && [[ "$GITHUB_RELEASE_BRANCH_PR_STATE" == "OPEN" ]]; then - # Get release pull request ID - GITHUB_RELEASE_BRANCH_PR_NUMBER="$(gh pr view $BRANCHNAME -q .[] --json number)" - # Set release cloud to 1 (0=not-cloud, 1=cloud) - GITHUB_RELEASE_CLOUD=1 - # Debug - echo "GITHUB_RELEASE_TYPE is: $GITHUB_RELEASE_TYPE" - echo "GITHUB_RELEASE_BRANCH_PR_NUMBER is: $GITHUB_RELEASE_BRANCH_PR_NUMBER" # We do leave this here as debug help. - echo "GITHUB_RELEASE_CLOUD is: $GITHUB_RELEASE_CLOUD" - # Github ouputs - echo "release=`date +%s`.`echo ${{ github.sha }} | cut -c -7`" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - else - echo "release=1" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - fi - ;; - prepare-release-cloud*) - # Set release cloud to 1 (0=not-cloud, 1=cloud) - GITHUB_RELEASE_CLOUD=1 - # Debug - echo "GITHUB_RELEASE_TYPE is: $GITHUB_RELEASE_TYPE" - echo "GITHUB_RELEASE_CLOUD is: $GITHUB_RELEASE_CLOUD" - # Github ouputs - echo "release=`date +%s`.`echo ${{ github.sha }} | cut -c -7`" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - *) - echo "release=`date +%s`.`echo ${{ github.sha }} | cut -c -7`" >> $GITHUB_OUTPUT - echo "release_type=$GITHUB_RELEASE_TYPE" >> $GITHUB_OUTPUT - ;; - esac - - echo "release_cloud=$GITHUB_RELEASE_CLOUD" >> $GITHUB_OUTPUT - - case "$BRANCHNAME" in - develop | dev-[2-9][0-9].[0-9][0-9].x | prepare-release-cloud*) - STABILITY="unstable" - ENV="development" - ;; - release* | hotfix*) - STABILITY="testing" - ENV="testing" - ;; - master | [2-9][0-9].[0-9][0-9].x) - STABILITY="stable" - ENV="production" - ;; - *) - STABILITY="canary" - ;; - esac - echo "stability=$STABILITY" >> $GITHUB_OUTPUT - echo "env=$VERSION-$ENV" >> $GITHUB_OUTPUT - echo "GH_ENV: $VERSION-$ENV" - shell: bash - env: - GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/gorgone.yml b/.github/workflows/gorgone.yml index c90adbebbc8..a39dde94490 100644 --- a/.github/workflows/gorgone.yml +++ b/.github/workflows/gorgone.yml @@ -33,20 +33,21 @@ env: base_directory: gorgone jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml with: version_file: gorgone/.version veracode-analysis: - needs: [get-version] + needs: [get-environment] + if: ${{ needs.get-environment.outputs.is_targeting_feature_branch != 'true' && github.event.pull_request.draft != 'true' }} uses: ./.github/workflows/veracode-analysis.yml with: module_directory: gorgone module_name: centreon-gorgone - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} - img_version: ${{ needs.get-version.outputs.img_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} + img_version: ${{ needs.get-environment.outputs.img_version }} secrets: veracode_api_id: ${{ secrets.VERACODE_API_ID_GORG }} veracode_api_key: ${{ secrets.VERACODE_API_KEY_GORG }} @@ -55,8 +56,8 @@ jobs: docker_registry_passwd: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} package: - needs: [get-version] - if: ${{ needs.get-version.outputs.stability != 'stable' }} + needs: [get-environment] + if: ${{ needs.get-environment.outputs.stability != 'stable' }} strategy: fail-fast: false @@ -76,10 +77,10 @@ jobs: image: packaging-nfpm-jammy distrib: jammy - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-environment.outputs.major_version }} credentials: username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} @@ -104,7 +105,7 @@ jobs: if: ${{ matrix.package_extension == 'rpm' }} run: | cd gorgone/selinux - sed -i "s/@VERSION@/${{ needs.get-version.outputs.major_version }}.${{ needs.get-version.outputs.minor_version }}/g" centreon-gorgoned.te + sed -i "s/@VERSION@/${{ needs.get-environment.outputs.major_version }}.${{ needs.get-environment.outputs.minor_version }}/g" centreon-gorgoned.te make -f /usr/share/selinux/devel/Makefile shell: bash @@ -119,19 +120,19 @@ jobs: nfpm_file_pattern: "gorgone/packaging/*.yaml" distrib: ${{ matrix.distrib }} package_extension: ${{ matrix.package_extension }} - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} - release: ${{ needs.get-version.outputs.release }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} + release: ${{ needs.get-environment.outputs.release }} arch: all commit_hash: ${{ github.sha }} cache_key: ${{ github.sha }}-${{ github.run_id }}-${{ matrix.package_extension }}-${{ matrix.distrib }} rpm_gpg_key: ${{ secrets.RPM_GPG_SIGNING_KEY }} rpm_gpg_signing_key_id: ${{ secrets.RPM_GPG_SIGNING_KEY_ID }} rpm_gpg_signing_passphrase: ${{ secrets.RPM_GPG_SIGNING_PASSPHRASE }} - stability: ${{ needs.get-version.outputs.stability }} + stability: ${{ needs.get-environment.outputs.stability }} test-gorgone: - needs: [get-version, package] + needs: [get-environment, package] strategy: fail-fast: false @@ -151,9 +152,9 @@ jobs: image: gorgone-testing-bookworm distrib: bookworm - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.gorgone_docker_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-environment.outputs.gorgone_docker_version }} credentials: username: ${{ secrets.DOCKER_REGISTRY_ID }} password: ${{ secrets.DOCKER_REGISTRY_PASSWD }} @@ -172,7 +173,7 @@ jobs: - name: Get linked branch of centreon repository id: centreon_repo_linked_branch run: | - CENTREON_REPO_LINKED_BRANCH=$(git ls-remote -h https://github.com/centreon/centreon.git | grep -E "refs/heads/dev-${{ needs.get-version.outputs.major_version }}\.x$" >/dev/null 2>&1 && echo "dev-${{ needs.get-version.outputs.major_version }}.x" || echo develop) + CENTREON_REPO_LINKED_BRANCH=$(git ls-remote -h https://github.com/centreon/centreon.git | grep -E "refs/heads/dev-${{ needs.get-environment.outputs.major_version }}\.x$" >/dev/null 2>&1 && echo "dev-${{ needs.get-environment.outputs.major_version }}.x" || echo develop) GIT_BRANCH_EXISTS=$(git ls-remote -h https://github.com/centreon/centreon.git | grep -E "refs/heads/${{ github.head_ref || github.ref_name }}$" >/dev/null 2>&1 && echo yes || echo no) if [[ "$GIT_BRANCH_EXISTS" == "yes" ]]; then @@ -245,8 +246,8 @@ jobs: deliver-sources: runs-on: [self-hosted, common] - needs: [get-version, package] - if: ${{ contains(fromJson('["stable"]'), needs.get-version.outputs.stability) && github.event_name != 'workflow_dispatch' }} + needs: [get-environment, package] + if: ${{ contains(fromJson('["stable"]'), needs.get-environment.outputs.stability) && github.event_name != 'workflow_dispatch' }} steps: - name: Checkout sources @@ -258,14 +259,14 @@ jobs: bucket_directory: centreon-gorgone module_directory: gorgone module_name: centreon-gorgone - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} token_download_centreon_com: ${{ secrets.TOKEN_DOWNLOAD_CENTREON_COM }} deliver-rpm: runs-on: [self-hosted, common] - needs: [get-version, package] - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} + needs: [get-environment, package] + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} strategy: matrix: @@ -280,17 +281,17 @@ jobs: with: module_name: gorgone distrib: ${{ matrix.distrib }} - version: ${{ needs.get-version.outputs.major_version }} + version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.sha }}-${{ github.run_id }}-rpm-${{ matrix.distrib }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} deliver-deb: runs-on: [self-hosted, common] - needs: [get-version, package] - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} + needs: [get-environment, package] + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} strategy: matrix: @@ -305,17 +306,17 @@ jobs: with: module_name: gorgone distrib: ${{ matrix.distrib }} - version: ${{ needs.get-version.outputs.major_version }} + version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.sha }}-${{ github.run_id }}-deb-${{ matrix.distrib }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} promote: - needs: [get-version, deliver-rpm, deliver-deb] + needs: [get-environment, deliver-rpm, deliver-deb] if: | - (contains(fromJson('["stable", "testing"]'), needs.get-version.outputs.stability) && github.event_name != 'workflow_dispatch') && + (contains(fromJson('["stable", "testing"]'), needs.get-environment.outputs.stability) && github.event_name != 'workflow_dispatch') && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') @@ -334,8 +335,8 @@ jobs: artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} module_name: gorgone distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} - stability: ${{ needs.get-version.outputs.stability }} + major_version: ${{ needs.get-environment.outputs.major_version }} + stability: ${{ needs.get-environment.outputs.stability }} github_ref_name: ${{ github.ref_name }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} diff --git a/.github/workflows/libzmq.yml b/.github/workflows/libzmq.yml index 1f385411bde..8a2edc89659 100644 --- a/.github/workflows/libzmq.yml +++ b/.github/workflows/libzmq.yml @@ -19,11 +19,11 @@ on: - '.github/workflows/libzmq.yml' jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml package-rpm: - needs: [get-version] + needs: [get-environment] strategy: fail-fast: false @@ -36,10 +36,10 @@ jobs: distrib: el9 arch: amd64 - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-environment.outputs.major_version }} credentials: username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} @@ -73,7 +73,7 @@ jobs: key: ${{ github.run_id }}-${{ github.sha }}-rpm-libzmq-${{ matrix.distrib }}-${{ matrix.arch }} package-deb: - needs: [get-version] + needs: [get-environment] strategy: fail-fast: false @@ -81,17 +81,17 @@ jobs: include: - image: packaging-nfpm-bookworm distrib: bookworm - runner: ubuntu-22.04 + runner: ubuntu-24.04 arch: amd64 - image: packaging-nfpm-jammy distrib: jammy - runner: ubuntu-22.04 + runner: ubuntu-24.04 arch: amd64 runs-on: ${{ matrix.runner }} container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.major_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-environment.outputs.major_version }} credentials: username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} @@ -138,9 +138,8 @@ jobs: key: ${{ github.run_id }}-${{ github.sha }}-deb-libzmq-${{ matrix.distrib }}-${{ matrix.arch }} deliver-rpm: - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} - needs: [get-version, package-rpm] - environment: ${{ needs.get-version.outputs.environment }} + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} + needs: [get-environment, package-rpm] runs-on: [self-hosted, common] strategy: matrix: @@ -161,17 +160,16 @@ jobs: with: module_name: libzmq distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.run_id }}-${{ github.sha }}-rpm-libzmq-${{ matrix.distrib }}-${{ matrix.arch }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} deliver-deb: - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} - needs: [get-version, package-deb] - environment: ${{ needs.get-version.outputs.environment }} + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} + needs: [get-environment, package-deb] runs-on: [self-hosted, common] strategy: matrix: @@ -190,17 +188,17 @@ jobs: with: module_name: libzmq distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} + major_version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.run_id }}-${{ github.sha }}-deb-libzmq-${{ matrix.distrib }}-${{ matrix.arch }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} promote: - needs: [get-version, deliver-rpm, deliver-deb] + needs: [get-environment, deliver-rpm, deliver-deb] if: | - (contains(fromJson('["stable", "testing"]'), needs.get-version.outputs.stability) && github.event_name != 'workflow_dispatch') && + (contains(fromJson('["stable", "testing"]'), needs.get-environment.outputs.stability) && github.event_name != 'workflow_dispatch') && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') @@ -219,8 +217,8 @@ jobs: artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} module_name: libzmq distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} - stability: ${{ needs.get-version.outputs.stability }} + major_version: ${{ needs.get-environment.outputs.major_version }} + stability: ${{ needs.get-environment.outputs.stability }} github_ref_name: ${{ github.ref_name }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} diff --git a/.github/workflows/lua-curl.yml b/.github/workflows/lua-curl.yml index 60e7cda2a0f..90d98e32487 100644 --- a/.github/workflows/lua-curl.yml +++ b/.github/workflows/lua-curl.yml @@ -24,12 +24,12 @@ env: release: 21 # 10 for openssl 1.1.1 / 20 for openssl system / 21 for openssl system and possible issue with ~ jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml package: - needs: [get-version] - if: ${{ needs.get-version.outputs.stability != 'stable' }} + needs: [get-environment] + if: ${{ needs.get-environment.outputs.stability != 'stable' }} strategy: fail-fast: false @@ -69,7 +69,7 @@ jobs: runs-on: ${{ matrix.runner }} container: - image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-version.outputs.img_version }} + image: ${{ vars.DOCKER_INTERNAL_REGISTRY_URL }}/${{ matrix.image }}:${{ needs.get-environment.outputs.img_version }} credentials: username: ${{ secrets.HARBOR_CENTREON_PULL_USERNAME }} password: ${{ secrets.HARBOR_CENTREON_PULL_TOKEN }} @@ -126,12 +126,12 @@ jobs: rpm_gpg_key: ${{ secrets.RPM_GPG_SIGNING_KEY }} rpm_gpg_signing_key_id: ${{ secrets.RPM_GPG_SIGNING_KEY_ID }} rpm_gpg_signing_passphrase: ${{ secrets.RPM_GPG_SIGNING_PASSPHRASE }} - stability: ${{ needs.get-version.outputs.stability }} + stability: ${{ needs.get-environment.outputs.stability }} deliver-rpm: - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} - needs: [get-version, package] - runs-on: ubuntu-22.04 + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} + needs: [get-environment, package] + runs-on: ubuntu-24.04 strategy: matrix: include: @@ -150,17 +150,17 @@ jobs: with: module_name: lua-curl distrib: ${{ matrix.distrib }} - version: ${{ needs.get-version.outputs.major_version }} + version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.sha }}-${{ github.run_id }}-rpm-lua-curl-${{ matrix.distrib }}-${{ matrix.arch }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} deliver-deb: - if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-version.outputs.stability) }} - needs: [get-version, package] - runs-on: ubuntu-22.04 + if: ${{ contains(fromJson('["testing", "unstable"]'), needs.get-environment.outputs.stability) }} + needs: [get-environment, package] + runs-on: ubuntu-24.04 strategy: matrix: include: @@ -178,17 +178,17 @@ jobs: with: module_name: lua-curl distrib: ${{ matrix.distrib }} - version: ${{ needs.get-version.outputs.major_version }} + version: ${{ needs.get-environment.outputs.major_version }} artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} cache_key: ${{ github.sha }}-${{ github.run_id }}-deb-lua-curl-${{ matrix.distrib }}-${{ matrix.arch }} - stability: ${{ needs.get-version.outputs.stability }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + stability: ${{ needs.get-environment.outputs.stability }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} promote: - needs: [get-version, deliver-rpm, deliver-deb] + needs: [get-environment, deliver-rpm, deliver-deb] if: | - (contains(fromJson('["stable", "testing"]'), needs.get-version.outputs.stability) && github.event_name != 'workflow_dispatch') && + (contains(fromJson('["stable", "testing"]'), needs.get-environment.outputs.stability) && github.event_name != 'workflow_dispatch') && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') @@ -207,8 +207,8 @@ jobs: artifactory_token: ${{ secrets.ARTIFACTORY_ACCESS_TOKEN }} module_name: lua-curl distrib: ${{ matrix.distrib }} - major_version: ${{ needs.get-version.outputs.major_version }} - stability: ${{ needs.get-version.outputs.stability }} + major_version: ${{ needs.get-environment.outputs.major_version }} + stability: ${{ needs.get-environment.outputs.stability }} github_ref_name: ${{ github.ref_name }} - release_type: ${{ needs.get-version.outputs.release_type }} - release_cloud: ${{ needs.get-version.outputs.release_cloud }} + release_type: ${{ needs.get-environment.outputs.release_type }} + is_cloud: ${{ needs.get-environment.outputs.is_cloud }} diff --git a/.github/workflows/rebase-master.yml b/.github/workflows/rebase-master.yml index 03520557266..16a8191744b 100644 --- a/.github/workflows/rebase-master.yml +++ b/.github/workflows/rebase-master.yml @@ -12,7 +12,7 @@ on: jobs: main: name: Sync Stable Branches - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: github.event.pull_request.merged == true steps: - name: git checkout diff --git a/.github/workflows/rebase-version.yml b/.github/workflows/rebase-version.yml index c89b3fe98b5..a261d5a96fd 100644 --- a/.github/workflows/rebase-version.yml +++ b/.github/workflows/rebase-version.yml @@ -12,7 +12,7 @@ on: jobs: main: name: Sync Stable Branches - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: github.event.pull_request.merged == true steps: - name: git checkout diff --git a/.github/workflows/release-trigger-builds.yml b/.github/workflows/release-trigger-builds.yml index 3769c527747..34eb52be98e 100644 --- a/.github/workflows/release-trigger-builds.yml +++ b/.github/workflows/release-trigger-builds.yml @@ -17,7 +17,7 @@ on: jobs: release-trigger-builds: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 594d0392f0a..d165240e9e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,7 +21,7 @@ on: jobs: release: if: ${{ github.event.pull_request.merged == true }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Check base_ref run: | diff --git a/.github/workflows/robot-test.yml b/.github/workflows/robot-test.yml index d6f8bc622e3..e9b0ea330a6 100644 --- a/.github/workflows/robot-test.yml +++ b/.github/workflows/robot-test.yml @@ -47,7 +47,7 @@ on: jobs: test-image-to-cache: - runs-on: ${{ contains(inputs.image, 'arm') && fromJson('["self-hosted", "collect-arm64"]') || 'ubuntu-22.04' }} + runs-on: ${{ contains(inputs.image, 'arm') && fromJson('["self-hosted", "collect-arm64"]') || 'ubuntu-24.04' }} steps: - name: Checkout sources uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 @@ -84,7 +84,7 @@ jobs: robot-test-list: needs: [test-image-to-cache] - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 outputs: features: ${{ steps.list-features.outputs.features }} @@ -100,7 +100,7 @@ jobs: robot-test: needs: [robot-test-list] - runs-on: ${{ contains(inputs.image, 'arm') && fromJson('["self-hosted", "collect-arm64"]') || 'ubuntu-22.04' }} + runs-on: ${{ contains(inputs.image, 'arm') && fromJson('["self-hosted", "collect-arm64"]') || 'ubuntu-24.04' }} strategy: fail-fast: false @@ -191,7 +191,7 @@ jobs: robot-test-report: needs: [robot-test] if: ${{ failure() }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/veracode-analysis.yml b/.github/workflows/veracode-analysis.yml index 23361521e81..99c81a3ca20 100644 --- a/.github/workflows/veracode-analysis.yml +++ b/.github/workflows/veracode-analysis.yml @@ -32,7 +32,7 @@ on: jobs: routing: name: Check before analysis - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 outputs: development_stage: ${{ steps.routing-mode.outputs.development_stage }} skip_analysis: ${{ steps.routing-mode.outputs.skip_analysis }} @@ -169,7 +169,7 @@ jobs: name: Sandbox scan needs: [routing, build] if: needs.routing.outputs.development_stage != 'Development' - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Promote latest scan diff --git a/.github/workflows/windows-agent-robot-test.yml b/.github/workflows/windows-agent-robot-test.yml index 30abb02db7b..20224adcc89 100644 --- a/.github/workflows/windows-agent-robot-test.yml +++ b/.github/workflows/windows-agent-robot-test.yml @@ -10,21 +10,21 @@ on: - cron: '30 0 * * *' jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml with: version_file: CMakeLists.txt build-collect: - needs: [get-version] + needs: [get-environment] uses: ./.github/workflows/package-collect.yml with: - major_version: ${{ needs.get-version.outputs.major_version }} - minor_version: ${{ needs.get-version.outputs.minor_version }} - img_version: ${{ needs.get-version.outputs.img_version }} - release: ${{ needs.get-version.outputs.release }} + major_version: ${{ needs.get-environment.outputs.major_version }} + minor_version: ${{ needs.get-environment.outputs.minor_version }} + img_version: ${{ needs.get-environment.outputs.img_version }} + release: ${{ needs.get-environment.outputs.release }} commit_hash: ${{ github.sha }} - stability: ${{ needs.get-version.outputs.stability }} + stability: ${{ needs.get-environment.outputs.stability }} legacy_engine: 'ON' packages_in_artifact: false image: centreon-collect-debian-bullseye diff --git a/.github/workflows/windows-agent.yml b/.github/workflows/windows-agent.yml index ffd3033a623..766da891cba 100644 --- a/.github/workflows/windows-agent.yml +++ b/.github/workflows/windows-agent.yml @@ -34,13 +34,13 @@ on: - vcpkg.json jobs: - get-version: - uses: ./.github/workflows/get-version.yml + get-environment: + uses: ./.github/workflows/get-environment.yml with: version_file: CMakeLists.txt build-and-test-agent: - needs: [get-version] + needs: [get-environment] runs-on: windows-latest env: AWS_ACCESS_KEY_ID: ${{ secrets.COLLECT_S3_ACCESS_KEY }} @@ -87,19 +87,19 @@ jobs: - name: Deliver if: | - contains(fromJson('["unstable", "testing"]'), needs.get-version.outputs.stability) && + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && ! cancelled() && ! contains(needs.*.result, 'failure') && ! contains(needs.*.result, 'cancelled') run: | - Write-Host "[DEBUG] deliver to testing - Major version: ${{ needs.get-version.outputs.major_version }}" - Write-Host "[DEBUG] deliver to testing - Minor version: ${{ needs.get-version.outputs.minor_version }}" + Write-Host "[DEBUG] deliver to testing - Major version: ${{ needs.get-environment.outputs.major_version }}" + Write-Host "[DEBUG] deliver to testing - Minor version: ${{ needs.get-environment.outputs.minor_version }}" - $VERSION = "${{ needs.get-version.outputs.version }}" + $VERSION = "${{ needs.get-environment.outputs.major_version }}.${{ needs.get-environment.outputs.minor_version }}" $MODULE_NAME = "monitoring-agent-$VERSION" - $STABILITY = "${{ needs.get-version.outputs.stability }}" + $STABILITY = "${{ needs.get-environment.outputs.stability }}" - $TARGET_PATH = "installers/monitoring-agent/${{ needs.get-version.outputs.major_version }}/$STABILITY/$MODULE_NAME/" + $TARGET_PATH = "installers/monitoring-agent/${{ needs.get-environment.outputs.major_version }}/$STABILITY/$MODULE_NAME/" $VERSION_EXE = "centreon-monitoring-agent-${VERSION}.exe" @@ -110,17 +110,17 @@ jobs: - name: Promote testing to stable if: | - needs.get-version.outputs.stability == 'stable' && github.event_name != 'workflow_dispatch' && ! cancelled() + needs.get-environment.outputs.stability == 'stable' && github.event_name != 'workflow_dispatch' && ! cancelled() run: | - Write-Host "[DEBUG] promote to stable - Major version: ${{ needs.get-version.outputs.major_version }}" - Write-Host "[DEBUG] promote to stable - Minor version: ${{ needs.get-version.outputs.minor_version }}" + Write-Host "[DEBUG] promote to stable - Major version: ${{ needs.get-environment.outputs.major_version }}" + Write-Host "[DEBUG] promote to stable - Minor version: ${{ needs.get-environment.outputs.minor_version }}" - $VERSION= "${{ needs.get-version.outputs.version }}" - $MODULE_NAME= "monitoring-agent-${{ needs.get-version.outputs.version }}" - $STABILITY= "${{ needs.get-version.outputs.stability }}" + $VERSION= "${{ needs.get-environment.outputs.major_version }}.${{ needs.get-environment.outputs.minor_version }}" + $MODULE_NAME= "monitoring-agent-${{ needs.get-environment.outputs.major_version }}.${{ needs.get-environment.outputs.minor_version }}" + $STABILITY= "${{ needs.get-environment.outputs.stability }}" - $SRC_PATH = "installers/monitoring-agent/${{ needs.get-version.outputs.major_version }}/testing/$MODULE_NAME/" - $TARGET_PATH = "installers/monitoring-agent/${{ needs.get-version.outputs.major_version }}/$STABILITY/$MODULE_NAME/" + $SRC_PATH = "installers/monitoring-agent/${{ needs.get-environment.outputs.major_version }}/testing/$MODULE_NAME/" + $TARGET_PATH = "installers/monitoring-agent/${{ needs.get-environment.outputs.major_version }}/$STABILITY/$MODULE_NAME/" $VERSION_EXE = "centreon-monitoring-agent-${VERSION}.exe" From aa9cd53729535a2051198b7fc851534f030009bc Mon Sep 17 00:00:00 2001 From: jean-christophe81 <98889244+jean-christophe81@users.noreply.github.com> Date: Tue, 5 Nov 2024 09:00:05 +0100 Subject: [PATCH 03/13] MON-150893 sign agent installer and modifier backport (#1756) (#1770) * build agent and then build installer (#1756) * fix compile and robot tests --- .github/scripts/agent_robot_test.ps1 | 2 +- .github/workflows/robot-test.yml | 16 ++-- .../workflows/windows-agent-robot-test.yml | 5 +- .github/workflows/windows-agent.yml | 90 +++++++++++++++++-- CMakeListsWindows.txt | 3 + agent/CMakeLists.txt | 4 +- agent/installer/CMakeLists.txt | 7 +- agent/native_linux/src/check_cpu.cc | 23 +++++ 8 files changed, 130 insertions(+), 20 deletions(-) diff --git a/.github/scripts/agent_robot_test.ps1 b/.github/scripts/agent_robot_test.ps1 index 37345457f0a..ba6fd13381c 100644 --- a/.github/scripts/agent_robot_test.ps1 +++ b/.github/scripts/agent_robot_test.ps1 @@ -82,7 +82,7 @@ Start-Sleep -Seconds 1 Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name ca_certificate -Value "" Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name encryption -Value 0 Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name endpoint -Value 0.0.0.0:4320 -Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name reverse_connection -Value 1 +Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name reversed_grpc_streaming -Value 1 $agent_log_path = $current_dir + "\reports\reverse_centagent.log" Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name log_file -Value $agent_log_path diff --git a/.github/workflows/robot-test.yml b/.github/workflows/robot-test.yml index e9b0ea330a6..2b34168ae21 100644 --- a/.github/workflows/robot-test.yml +++ b/.github/workflows/robot-test.yml @@ -50,7 +50,7 @@ jobs: runs-on: ${{ contains(inputs.image, 'arm') && fromJson('["self-hosted", "collect-arm64"]') || 'ubuntu-24.04' }} steps: - name: Checkout sources - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Login to Registry uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 @@ -90,7 +90,7 @@ jobs: steps: - name: Checkout sources - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: List features id: list-features @@ -111,7 +111,7 @@ jobs: steps: - name: Checkout sources - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 @@ -182,7 +182,7 @@ jobs: - name: Upload Test Results if: ${{ failure() }} - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: reports-${{inputs.test_group_name}}-${{ steps.feature-path.outputs.feature_name_with_dash }} path: reports @@ -194,7 +194,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Download Artifacts uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -204,7 +204,7 @@ jobs: merge-multiple: true - name: Upload the regrouped artifact - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: reports-${{inputs.test_group_name}} path: reports/ @@ -243,12 +243,12 @@ jobs: shell: bash # setup-python v5.0.0 relies on node20 which is not supported by el7 distributions - - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 if: ${{ inputs.distrib == 'el7'}} with: python-version: "3.10" - - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 if: ${{ inputs.distrib != 'el7' }} with: python-version: "3.10" diff --git a/.github/workflows/windows-agent-robot-test.yml b/.github/workflows/windows-agent-robot-test.yml index 20224adcc89..31cd5c7d5ff 100644 --- a/.github/workflows/windows-agent-robot-test.yml +++ b/.github/workflows/windows-agent-robot-test.yml @@ -48,8 +48,7 @@ jobs: run: git config --system core.autocrlf false - name: Checkout sources - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: distrib availables run: wsl --list --online @@ -123,7 +122,7 @@ jobs: - name: Upload Test Results if: ${{ failure() }} - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: reports-cma-windows path: reports diff --git a/.github/workflows/windows-agent.yml b/.github/workflows/windows-agent.yml index 766da891cba..89edf270ec3 100644 --- a/.github/workflows/windows-agent.yml +++ b/.github/workflows/windows-agent.yml @@ -16,6 +16,7 @@ on: pull_request: paths: - agent/** + - common/** - custom-triplets/** - CMakeLists.txt - CMakeListsWindows.txt @@ -28,6 +29,7 @@ on: - "[2-9][0-9].[0-9][0-9].x" paths: - agent/** + - common/** - custom-triplets/** - CMakeLists.txt - CMakeListsWindows.txt @@ -57,9 +59,9 @@ jobs: run: git config --system core.autocrlf false - name: Checkout sources - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - name: Compile Agent + - name: Compile Agent only run: .github/scripts/windows-agent-compile.ps1 shell: powershell @@ -73,17 +75,95 @@ jobs: cd build_windows tests/ut_agent + - name: Sign agent + if: | + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && + ! cancelled() && + ! contains(needs.*.result, 'failure') && + ! contains(needs.*.result, 'cancelled') + uses: azure/trusted-signing-action@v0.4.0 + with: + azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }} + azure-client-id: ${{ secrets.AZURE_CLIENT_ID }} + azure-client-secret: ${{ secrets.AZURE_CLIENT_SECRET }} + endpoint: https://weu.codesigning.azure.net/ + trusted-signing-account-name: Centreon-signature-RD + certificate-profile-name: Cert-Signature-RD + files-folder: build_windows\agent\Release + files-folder-filter: centagent.exe + files-folder-recurse: false + file-digest: SHA256 + timestamp-rfc3161: http://timestamp.acs.microsoft.com + timestamp-digest: SHA256 + + - name: Build modifier + run: | + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=On -DWINDOWS=On -DBUILD_FROM_CACHE=On -S. -DVCPKG_CRT_LINKAGE=dynamic -DBUILD_SHARED_LIBS=OFF -DWITH_BUILD_AGENT_MODIFIER=On -Bbuild_windows + cmake --build build_windows --config Release + + - name: Sign modifier + if: | + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && + ! cancelled() && + ! contains(needs.*.result, 'failure') && + ! contains(needs.*.result, 'cancelled') + uses: azure/trusted-signing-action@v0.4.0 + with: + azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }} + azure-client-id: ${{ secrets.AZURE_CLIENT_ID }} + azure-client-secret: ${{ secrets.AZURE_CLIENT_SECRET }} + endpoint: https://weu.codesigning.azure.net/ + trusted-signing-account-name: Centreon-signature-RD + certificate-profile-name: Cert-Signature-RD + files-folder: agent\installer + files-folder-filter: centreon-monitoring-agent-modify.exe + file-digest: SHA256 + timestamp-rfc3161: http://timestamp.acs.microsoft.com + timestamp-digest: SHA256 + + - name: Build installer + run: | + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=On -DWINDOWS=On -DBUILD_FROM_CACHE=On -S. -DVCPKG_CRT_LINKAGE=dynamic -DBUILD_SHARED_LIBS=OFF -DWITH_BUILD_AGENT_INSTALLER=On -Bbuild_windows + cmake --build build_windows --config Release + + - name: Sign installer + if: | + contains(fromJson('["unstable", "testing"]'), needs.get-environment.outputs.stability) && + ! cancelled() && + ! contains(needs.*.result, 'failure') && + ! contains(needs.*.result, 'cancelled') + uses: azure/trusted-signing-action@v0.4.0 + with: + azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }} + azure-client-id: ${{ secrets.AZURE_CLIENT_ID }} + azure-client-secret: ${{ secrets.AZURE_CLIENT_SECRET }} + endpoint: https://weu.codesigning.azure.net/ + trusted-signing-account-name: Centreon-signature-RD + certificate-profile-name: Cert-Signature-RD + files-folder: agent\installer + files-folder-filter: centreon-monitoring-agent.exe + file-digest: SHA256 + timestamp-rfc3161: http://timestamp.acs.microsoft.com + timestamp-digest: SHA256 + - name: Installer test run: .github/scripts/agent_installer_test.ps1 shell: powershell - name: Upload package artifacts if: | - inputs.installer_in_artifact == true - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + inputs.installer_in_artifact == true || + (github.event_name != 'workflow_dispatch' && + contains(fromJson('["stable"]'), needs.get-environment.outputs.stability) && + ! cancelled() && + ! contains(needs.*.result, 'failure') && + ! contains(needs.*.result, 'cancelled')) + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: packages-centreon-monitoring-agent-windows - path: agent\installer\centreon-monitoring-agent.exe + path: | + agent\installer\centreon-monitoring-agent.exe + build_windows\agent\Release\centagent.exe - name: Deliver if: | diff --git a/CMakeListsWindows.txt b/CMakeListsWindows.txt index f3d9d8de57a..c693c450963 100644 --- a/CMakeListsWindows.txt +++ b/CMakeListsWindows.txt @@ -59,6 +59,9 @@ set(VCPKG_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) include(GNUInstallDirs) option(WITH_TESTING "Build unit tests." OFF) +option(WITH_BUILD_AGENT_INSTALLER "Build agent windows installer." OFF) +option(WITH_BUILD_AGENT_MODIFIER "Build agent windows config update program (not needed if WITH_BUILD_INSTALLER=ON)." OFF) + set(protobuf_MODULE_COMPATIBLE True) diff --git a/agent/CMakeLists.txt b/agent/CMakeLists.txt index d2af2dfaae8..37f3f8a1a77 100644 --- a/agent/CMakeLists.txt +++ b/agent/CMakeLists.txt @@ -191,7 +191,9 @@ else() Boost::program_options fmt::fmt) - add_subdirectory(installer) + if(WITH_BUILD_AGENT_INSTALLER OR WITH_BUILD_AGENT_MODIFIER) + add_subdirectory(installer) + endif() endif() diff --git a/agent/installer/CMakeLists.txt b/agent/installer/CMakeLists.txt index 7f9cd769439..a91f4eab724 100644 --- a/agent/installer/CMakeLists.txt +++ b/agent/installer/CMakeLists.txt @@ -51,14 +51,17 @@ add_custom_command( WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" COMMAND ${MKNSIS} "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent-modify.nsi") +add_custom_target("centreon-monitoring-agent-modifier" ALL DEPENDS "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent-modify.exe") #final installer add_custom_command( - DEPENDS "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.nsi" "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent-modify.exe" ${COMMON_INSTALLERS_FILES} + DEPENDS "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.nsi" "centreon-monitoring-agent-modifier" ${COMMON_INSTALLERS_FILES} COMMENT "--------- Generating cma configuration installer --------" OUTPUT "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.exe" WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" COMMAND ${MKNSIS} "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.nsi") -add_custom_target("centreon-monitoring-agent-installer" ALL DEPENDS "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.exe") +if (WITH_BUILD_AGENT_INSTALLER) + add_custom_target("centreon-monitoring-agent-installer" ALL DEPENDS "${PROJECT_SOURCE_DIR}/centreon-monitoring-agent.exe") +endif() diff --git a/agent/native_linux/src/check_cpu.cc b/agent/native_linux/src/check_cpu.cc index e69de29bb2d..85fba903e50 100644 --- a/agent/native_linux/src/check_cpu.cc +++ b/agent/native_linux/src/check_cpu.cc @@ -0,0 +1,23 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +/** + * @brief in order to compile waiting for nex implementation + * + */ +class dummy {}; \ No newline at end of file From eb347f1a1d15e4ee031c66f141559a5d8c92fcb7 Mon Sep 17 00:00:00 2001 From: jean-christophe81 <98889244+jean-christophe81@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:35:16 +0100 Subject: [PATCH 04/13] retention.dat is not referenced in centreon-collect.yaml, so it's not deleted on uninstall or upgrade (#1772) (#1823) REFS:MON-152188 --- packaging/centreon-collect.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/packaging/centreon-collect.yaml b/packaging/centreon-collect.yaml index 264f42f8640..f1f925d8655 100644 --- a/packaging/centreon-collect.yaml +++ b/packaging/centreon-collect.yaml @@ -30,9 +30,6 @@ contents: owner: centreon-engine group: centreon-engine - - dst: "/var/log/centreon-engine/retention.dat" - type: ghost - - src: "files/empty_file" dst: "/var/log/centreon-engine/status.dat" file_info: From 25c7dea04eb819d46eaeed768aae2c1c2ff9c238 Mon Sep 17 00:00:00 2001 From: jean-christophe81 <98889244+jean-christophe81@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:05:56 +0100 Subject: [PATCH 05/13] MON-152186 cma improve scheduler and agent native linux check cpu backport 24.10 (#1827) MON-152186 * implement linux check_cpu (#1763) * agent scheduler modified to spread checks over period and fix a crash in opentelemetry module (#1775) process execution is killed on timeout some reverse_connection were forgotten, reverse_connection => reversed_grpc_streaming --- agent/doc/agent-doc.md | 42 +- agent/inc/com/centreon/agent/check.hh | 86 ++- agent/inc/com/centreon/agent/check_exec.hh | 11 +- agent/inc/com/centreon/agent/scheduler.hh | 11 +- agent/installer/silent.nsi | 2 +- .../inc/com/centreon/agent/check_cpu.hh | 190 ++++- agent/native_linux/src/check_cpu.cc | 545 ++++++++++++++- agent/precomp_inc/precomp.hh | 8 + agent/src/check.cc | 37 +- agent/src/check_exec.cc | 48 +- agent/src/config.cc | 4 +- agent/src/config_win.cc | 2 +- agent/src/main_win.cc | 4 + agent/src/scheduler.cc | 91 ++- agent/test/CMakeLists.txt | 2 +- agent/test/check_exec_test.cc | 28 +- agent/test/check_linux_cpu_test.cc | 652 ++++++++++++++++++ agent/test/check_test.cc | 5 +- agent/test/scheduler_test.cc | 77 ++- common/inc/com/centreon/common/defer.hh | 23 + common/inc/com/centreon/common/perfdata.hh | 2 +- .../com/centreon/common/process/process.hh | 2 + common/process/src/process.cc | 15 + common/tests/process_test.cc | 28 + common/tests/scripts/sleep.bat | 2 + .../centreon_agent/agent_config.hh | 2 + .../src/centreon_agent/agent_config.cc | 28 +- .../modules/opentelemetry/src/otl_config.cc | 4 + tests/broker-engine/cma.robot | 66 ++ tests/resources/Agent.py | 2 +- tests/resources/Broker.py | 40 ++ 31 files changed, 1921 insertions(+), 138 deletions(-) create mode 100644 agent/test/check_linux_cpu_test.cc create mode 100644 common/tests/scripts/sleep.bat diff --git a/agent/doc/agent-doc.md b/agent/doc/agent-doc.md index 7f279210860..52a0de209ee 100644 --- a/agent/doc/agent-doc.md +++ b/agent/doc/agent-doc.md @@ -22,8 +22,10 @@ We don't care about the duration of tests, we work with time points. In the previous example, the second check for the first service will be scheduled at 12:00:10 even if all other checks has not been yet started. In case of check duration is too long, we might exceed maximum of concurrent checks. In that case checks will be executed as soon one will be ended. -This means that the second check may start later than the scheduled time point (12:00:10) if the other first checks are too long. The order of checks is always respected even in case of a bottleneck. -For example, a check lambda has a start_expected to 12:00, because of bottleneck, it starts at 12:15. Next start_expected of check lambda will then be 12:15 + check_period. + +This means that the second check may start later than the scheduled time point (12:00:10) if the first checks take too long. + +When a check completes, it is inserted into _waiting_check_queue, and its start will be scheduled as soon as a slot in the queue is available (the queue is a set indexed by expected_start) minus old_start plus check_period. ## native checks @@ -33,13 +35,12 @@ Then you have to override constructor and start_check method. All is asynchronous. When start_check is called, it must not block caller for a long time. At the end of measure, it must call check::on_completion(). That method need 4 arguments: -* start_check_index: For long asynchronous operation, at the beginning, asynchronous job must store running_index and use it when he have to call check::on_completion(). It is useful for scheduler to check is the result is the result of the last asynchronous job start. The new class can get running index with check::_get_running_check_index() +* start_check_index: For long asynchronous operation, at the beginning, asynchronous job must store running_index and use it when he has to call check::on_completion(). It is useful for scheduler to check if it's the result of the last asynchronous job start. The new class can get running index with check::_get_running_check_index() + An example, checks starts a first measure, the timeout expires, a second measure starts, the first measure ends,we don't take into account his result and we wait for the end off second one. * status: plugins status equivalent. Values are 0:Ok, 1: warning, 2: critical, 3: unknown (https://nagios-plugins.org/doc/guidelines.html#AEN41) * perfdata: a list of com::centreon::common::perfdata objects * outputs: equivalent of plugins output as "CPU 54% OK" -BEWARE, in some cases, we can have recursion, check::on_completion can call start_check - A little example: ```c++ class dummy_check : public check { @@ -48,7 +49,9 @@ class dummy_check : public check { public: void start_check(const duration& timeout) override { - check::start_check(timeout); + if (!check::start_check(timeout)) { + return; + } _command_timer.expires_from_now(_command_duration); _command_timer.async_wait([me = shared_from_this(), this, running_index = _get_running_check_index()]( @@ -71,6 +74,7 @@ class dummy_check : public check { : check(g_io_context, spdlog::default_logger(), std::chrono::system_clock::now(), + std::chrono::seconds(1), serv, command_name, command_line, @@ -79,4 +83,28 @@ class dummy_check : public check { _command_duration(command_duration), _command_timer(*g_io_context) {} }; -``` \ No newline at end of file +``` + +### native_check_cpu (linux version) +It uses /proc/stat to measure cpu statistics. When start_check is called, a first snapshot of /proc/stat is done. Then a timer is started and will expires at max time_out or check_interval minus 1 second. When this timer expires, we do a second snapshot and create plugin output and perfdata from this difference. +The arguments accepted by this check (in json format) are: +* cpu-detailed: + * if false, produces only average cpu usage perfdata per processor and one for the average + * if true, produces per processor and average one perfdata for user, nice, system, idle, iowait, irq, soft_irq, steal, guest, guest_nice and total used counters + +Output is inspired from centreon local cpu and cpu-detailed plugins +Examples of output: +* OK: CPU(s) average usage is 24.08% +* CRITICAL: CPU'0' Usage: 24.66%, User 17.58%, Nice 0.00%, System 5.77%, Idle 75.34%, IOWait 0.39%, Interrupt 0.00%, Soft Irq 0.91%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU'2' Usage: 24.18%, User 17.69%, Nice 0.00%, System 5.99%, Idle 75.82%, IOWait 0.38%, Interrupt 0.00%, Soft Irq 0.12%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% CRITICAL: CPU(s) average Usage: 24.08%, User 17.65%, Nice 0.00%, System 5.80%, Idle 75.92%, IOWait 0.36%, Interrupt 0.00%, Soft Irq 0.27%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% + +Example of perfdatas in not cpu-detailed mode: +* cpu.utilization.percentage +* 0#core.cpu.utilization.percentage +* 1#core.cpu.utilization.percentage + +Example of perfdatas in cpu-detailed mode: +* 0~user#core.cpu.utilization.percentage +* 0~system#core.cpu.utilization.percentage +* 1~interrupt#core.cpu.utilization.percentage +* iowait#cpu.utilization.percentage +* used#cpu.utilization.percentage \ No newline at end of file diff --git a/agent/inc/com/centreon/agent/check.hh b/agent/inc/com/centreon/agent/check.hh index c2808293e0e..132009e161a 100644 --- a/agent/inc/com/centreon/agent/check.hh +++ b/agent/inc/com/centreon/agent/check.hh @@ -30,6 +30,68 @@ using engine_to_agent_request_ptr = using time_point = std::chrono::system_clock::time_point; using duration = std::chrono::system_clock::duration; +/** + * @brief nagios status values + * + */ +enum e_status : unsigned { ok = 0, warning = 1, critical = 2, unknown = 3 }; + +/** + * @brief in order to have a non derive scheduling, we use this class to iterate + * time to time in case of we want to schedule an event every 30s for example + * + */ +class time_step { + time_point _start_point; + duration _step; + uint64_t _step_index = 0; + + public: + /** + * @brief Construct a new time step object + * + * @param start_point this time_point is the first time_point of the sequence + * @param step value() will return start_point + step * step_index + */ + time_step(time_point start_point, duration step) + : _start_point(start_point), _step(step) {} + + time_step() : _start_point(), _step() {} + + /** + * @brief increment time of one duration (one step) + * + * @return time_step& + */ + time_step& operator++() { + ++_step_index; + return *this; + } + + /** + * @brief set _step_index to the first step after or equal to now + * + */ + void increment_to_after_now() { + time_point now = std::chrono::system_clock::now(); + _step_index = + (now - _start_point + _step - std::chrono::microseconds(1)) / _step; + } + + /** + * @brief set _step_index to the first step after or equal to min_tp + * + */ + void increment_to_after_min(time_point min_tp) { + _step_index = + (min_tp - _start_point + _step - std::chrono::microseconds(1)) / _step; + } + + time_point value() const { return _start_point + _step_index * _step; } + + uint64_t get_step_index() const { return _step_index; } +}; + /** * @brief base class for check * start_expected is set by scheduler and increased by check_period on each @@ -46,8 +108,9 @@ class check : public std::enable_shared_from_this { private: //_start_expected is set on construction on config receive - // it's updated on check_start and added of check_period on check completion - time_point _start_expected; + // it's updated on check_start and added of multiple of check_interval + // (check_period / nb_check) on check completion + time_step _start_expected; const std::string& _service; const std::string& _command_name; const std::string& _command_line; @@ -79,12 +142,17 @@ class check : public std::enable_shared_from_this { virtual void _timeout_timer_handler(const boost::system::error_code& err, unsigned start_check_index); + bool _start_check(const duration& timeout); + + virtual void _on_timeout(){}; + public: using pointer = std::shared_ptr; check(const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& command_name, const std::string& cmd_line, @@ -96,13 +164,17 @@ class check : public std::enable_shared_from_this { struct pointer_start_compare { bool operator()(const check::pointer& left, const check::pointer& right) const { - return left->_start_expected < right->_start_expected; + return left->_start_expected.value() < right->_start_expected.value(); } }; - void add_duration_to_start_expected(const duration& to_add); + void increment_start_expected_to_after_min_timepoint(time_point min_tp) { + _start_expected.increment_to_after_min(min_tp); + } + + void add_check_interval_to_start_expected() { ++_start_expected; } - time_point get_start_expected() const { return _start_expected; } + time_point get_start_expected() const { return _start_expected.value(); } const std::string& get_service() const { return _service; } @@ -117,7 +189,7 @@ class check : public std::enable_shared_from_this { const std::list& perfdata, const std::list& outputs); - virtual void start_check(const duration& timeout); + virtual void start_check(const duration& timeout) = 0; }; } // namespace com::centreon::agent diff --git a/agent/inc/com/centreon/agent/check_exec.hh b/agent/inc/com/centreon/agent/check_exec.hh index c458194bb18..49cdc2c04d2 100644 --- a/agent/inc/com/centreon/agent/check_exec.hh +++ b/agent/inc/com/centreon/agent/check_exec.hh @@ -84,15 +84,15 @@ class check_exec : public check { protected: using check::completion_handler; - void _timeout_timer_handler(const boost::system::error_code& err, - unsigned start_check_index) override; + void _on_timeout() override; void _init(); public: check_exec(const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -102,7 +102,8 @@ class check_exec : public check { static std::shared_ptr load( const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -111,6 +112,8 @@ class check_exec : public check { void start_check(const duration& timeout) override; + int get_pid() const; + void on_completion(unsigned running_index); }; diff --git a/agent/inc/com/centreon/agent/scheduler.hh b/agent/inc/com/centreon/agent/scheduler.hh index bc96f39477b..6e18e473581 100644 --- a/agent/inc/com/centreon/agent/scheduler.hh +++ b/agent/inc/com/centreon/agent/scheduler.hh @@ -37,6 +37,7 @@ class scheduler : public std::enable_shared_from_this { const std::shared_ptr&, const std::shared_ptr& /*logger*/, time_point /* start expected*/, + duration /* check interval */, const std::string& /*service*/, const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, @@ -44,9 +45,10 @@ class scheduler : public std::enable_shared_from_this { check::completion_handler&&)>; private: - using check_queue = std::set; + using check_queue = + absl::btree_set; - check_queue _check_queue; + check_queue _waiting_check_queue; // running check counter that must not exceed max_concurrent_check unsigned _active_check = 0; bool _alive = true; @@ -72,6 +74,8 @@ class scheduler : public std::enable_shared_from_this { metric_sender _metric_sender; asio::system_timer _send_timer; asio::system_timer _check_timer; + time_step + _check_time_step; // time point used when too many checks are running check_builder _check_builder; // in order to send check_results at regular intervals, we work with absolute // time points that we increment @@ -154,7 +158,8 @@ class scheduler : public std::enable_shared_from_this { static std::shared_ptr default_check_builder( const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, + time_point first_start_expected, + duration check_interval, const std::string& service, const std::string& cmd_name, const std::string& cmd_line, diff --git a/agent/installer/silent.nsi b/agent/installer/silent.nsi index f7e0c9477dd..1ac6a4e43f1 100644 --- a/agent/installer/silent.nsi +++ b/agent/installer/silent.nsi @@ -47,7 +47,7 @@ Function show_help FileWrite $0 "usage: centreon-monitoring-agent.exe args$\n" FileWrite $0 "This installer works into mode:$\n" FileWrite $0 " - Without argument: interactive windows UI$\n" - FileWrite $0 " - Silent mode with the /S flag$\n" + FileWrite $0 " - Silent mode with the /S flag in first position, before others arguments$\n" FileWrite $0 "Silent mode arguments:$\n" ${If} $1 != "" FileWrite $0 "$1$\n" diff --git a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh index f11f02b039e..e328055dae1 100644 --- a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh @@ -19,6 +19,194 @@ #ifndef CENTREON_AGENT_CHECK_CPU_HH #define CENTREON_AGENT_CHECK_CPU_HH -namespace com::centreon::agent {} +#include "check.hh" +namespace com::centreon::agent { + +namespace check_cpu_detail { + +// all data is indexed by processor number, this fake index points to cpus +// average value +constexpr unsigned average_cpu_index = std::numeric_limits::max(); + +enum e_proc_stat_index { + user = 0, + nice, + system, + idle, + iowait, + irq, + soft_irq, + steal, + guest, + guest_nice, + used, // used = user + nice + system + iowait+ irq + soft_irq + steal + + // guest+ guest_nice + nb_field +}; + +/** + * @brief this class is the result of /proc/stat one line parsing like + * cpu0 2930565 15541 1250726 10453908 54490 0 27068 0 0 0 + * if _cpu_index == std::numeric_limits::max(), it represents the sum + * of all cpus + * + */ +class per_cpu_time { + static constexpr size_t nb_field = e_proc_stat_index::nb_field; + unsigned _data[nb_field]; + unsigned _cpu_index = 0; + unsigned _total = 0; + + public: + per_cpu_time(const std::string_view& line); + per_cpu_time() {} + unsigned get_cpu_index() const { return _cpu_index; } + unsigned get_user() const { return _data[e_proc_stat_index::user]; } + unsigned get_nice() const { return _data[e_proc_stat_index::nice]; } + unsigned get_idle() const { return _data[e_proc_stat_index::idle]; } + unsigned get_iowait() const { return _data[e_proc_stat_index::iowait]; } + unsigned get_irq() const { return _data[e_proc_stat_index::irq]; } + unsigned get_soft_irq() const { return _data[e_proc_stat_index::soft_irq]; } + unsigned get_steal() const { return _data[e_proc_stat_index::steal]; } + unsigned get_guest() const { return _data[e_proc_stat_index::guest]; } + unsigned get_guest_nice() const { + return _data[e_proc_stat_index::guest_nice]; + } + + unsigned get_value(e_proc_stat_index data_index) const { + return _data[data_index]; + } + + double get_proportional_value(unsigned data_index) const { + return (static_cast(_data[data_index])) / _total; + } + + unsigned get_total() const { return _total; } + + per_cpu_time& operator-=(const per_cpu_time& to_add); + + void dump(std::string* output) const; +}; + +/** + * @brief cpu statistics index by cpu number (cpu0,1...) + * a special index average_cpu_index is the cpus average given by first line of + * /proc/stat + * + */ +using index_to_cpu = boost::container::flat_map; + +void dump(const index_to_cpu& cpus, std::string* output); + +/** + * @brief datas of /proc/stat + * + */ +class proc_stat_file { + index_to_cpu _values; + + public: + proc_stat_file(size_t nb_to_reserve) + : proc_stat_file("/proc/stat", nb_to_reserve) {} + + proc_stat_file(const char* proc_file, size_t nb_to_reserve); + + const index_to_cpu& get_values() const { return _values; } + + index_to_cpu operator-(const proc_stat_file& right) const; + + void dump(std::string* output) const; +}; + +/** + * @brief this little class compare cpu usages values to threshold and set + * plugin status + * + */ +class cpu_to_status { + e_status _status; + e_proc_stat_index _data_index; + bool _average; + double _threshold; + + public: + cpu_to_status(e_status status, + e_proc_stat_index data_index, + bool average, + double threshold) + : _status(status), + _data_index(data_index), + _average(average), + _threshold(threshold) {} + + e_proc_stat_index get_proc_stat_index() const { return _data_index; } + bool is_critical() const { return _status == e_status::critical; } + bool is_average() const { return _average; } + double get_threshold() const { return _threshold; } + e_status get_status() const { return _status; } + + void compute_status( + const index_to_cpu& to_test, + boost::container::flat_map* per_cpu_status) const; +}; + +}; // namespace check_cpu_detail + +/** + * @brief native linux check_cpu + * every _measure_interval, we read /proc/stat and we calculate cpu usage + * when a check starts, we read last measure and passed it to completion_handler + * If we not have yet done a measure, we wait to timeout to calculate cpu usage + */ +class check_cpu : public check { + unsigned _nb_core; + + bool _cpu_detailed; + + /** + * @brief key used to store cpu_to_status + * @tparam 1 index (user, system, iowait.... and idle for all except idle) + * @tparam 2 true if average, false if per core + * @tparam 3 e_status warning or critical + * + */ + using cpu_to_status_key = std::tuple; + + boost::container::flat_map + _cpu_to_status; + + asio::system_timer _measure_timer; + + void _measure_timer_handler( + const boost::system::error_code& err, + unsigned start_check_index, + std::unique_ptr&& first_measure); + + public: + check_cpu(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler); + + static void help(std::ostream& help_stream); + + void start_check(const duration& timeout) override; + + std::shared_ptr shared_from_this() { + return std::static_pointer_cast(check::shared_from_this()); + } + + e_status compute(const check_cpu_detail::proc_stat_file& first_measure, + const check_cpu_detail::proc_stat_file& second_measure, + std::string* output, + std::list* perfs); +}; +} // namespace com::centreon::agent #endif diff --git a/agent/native_linux/src/check_cpu.cc b/agent/native_linux/src/check_cpu.cc index 85fba903e50..153f9c66d95 100644 --- a/agent/native_linux/src/check_cpu.cc +++ b/agent/native_linux/src/check_cpu.cc @@ -16,8 +16,549 @@ * For more information : contact@centreon.com */ +#include "check_cpu.hh" +#include "com/centreon/common/rapidjson_helper.hh" + +using namespace com::centreon::agent; +using namespace com::centreon::agent::check_cpu_detail; + +/** + * @brief Construct a new per cpu time::per cpu time object + * it parses a line like cpu0 2930565 15541 1250726 10453908 54490 0 27068 0 0 0 + * + * @param line + */ +per_cpu_time::per_cpu_time(const std::string_view& line) { + using namespace std::literals; + auto split_res = absl::StrSplit(line, ' ', absl::SkipEmpty()); + auto field_iter = split_res.begin(); + + if ((*field_iter).substr(0, 3) != "cpu"sv) { + throw std::invalid_argument("no cpu"); + } + if (!absl::SimpleAtoi(field_iter->substr(3), &_cpu_index)) { + _cpu_index = check_cpu_detail::average_cpu_index; + } + + unsigned* to_fill = _data; + unsigned* end = _data + used; // used will be calculated after + + for (++field_iter; field_iter != split_res.end(); ++field_iter, ++to_fill) { + unsigned counter; + if (!absl::SimpleAtoi(*field_iter, &counter)) { + throw std::invalid_argument("not a number"); + } + // On some OS we may have more fields than user to guest_nice, we have to + // take them into account only for total compute + if (to_fill < end) { + *to_fill = counter; + } + _total += counter; + } + + // On some OS, we might have fewer fields than expected, so we initialize + // the remaining fields + for (; to_fill < end; ++to_fill) + *to_fill = 0; + + // Calculate the 'used' CPU time by subtracting idle time from total time + _data[e_proc_stat_index::used] = _total - _data[e_proc_stat_index::idle]; +} + +/** + * @brief substract all fields and _total + * + * @param to_add + * @return per_cpu_time& (this) + */ +per_cpu_time& per_cpu_time::operator-=(const per_cpu_time& to_substract) { + unsigned* res = _data; + unsigned* end = _data + nb_field; + const unsigned* val_to_substract = to_substract._data; + for (; res < end; ++res, ++val_to_substract) { + if (*res > *val_to_substract) { + *res -= *val_to_substract; + } else { + *res = 0; + } + } + if (_total > to_substract._total) { + _total -= to_substract._total; + } else { + _total = 1; // no 0 divide + } + return *this; +} + +constexpr std::array + _sz_stat_index = {", User ", ", Nice ", ", System ", ", Idle ", + ", IOWait ", ", Interrupt ", ", Soft Irq ", ", Steal ", + ", Guest ", ", Guest Nice ", ", Usage"}; + +/** + * @brief print values summary to plugin output + * + * @param output plugin out + */ +void per_cpu_time::dump(std::string* output) const { + using namespace std::literals; + if (_cpu_index == check_cpu_detail::average_cpu_index) { + *output += + fmt::format("CPU(s) average Usage: {:.2f}%", + get_proportional_value(e_proc_stat_index::used) * 100); + } else { + *output += + fmt::format("CPU'{}' Usage: {:.2f}%", _cpu_index, + get_proportional_value(e_proc_stat_index::used) * 100); + } + + for (unsigned field_index = 0; field_index < e_proc_stat_index::used; + ++field_index) { + *output += _sz_stat_index[field_index]; + *output += + fmt::format("{:.2f}%", get_proportional_value(field_index) * 100); + } +} + +void com::centreon::agent::check_cpu_detail::dump(const index_to_cpu& cpus, + std::string* output) { + output->reserve(output->length() + cpus.size() * 256); + for (const auto& cpu : cpus) { + cpu.second.dump(output); + output->push_back('\n'); + } +} + +/** + * @brief Construct a new proc stat file::proc stat file object + * + * @param proc_file path of the proc file usually: /proc/stat, other for unit + * tests + * @param nb_to_reserve nb host cores + */ +proc_stat_file::proc_stat_file(const char* proc_file, size_t nb_to_reserve) { + _values.reserve(nb_to_reserve); + std::ifstream proc_stat(proc_file); + char line_buff[1024]; + while (1) { + try { + proc_stat.getline(line_buff, sizeof(line_buff)); + line_buff[1023] = 0; + per_cpu_time to_ins(line_buff); + _values.emplace(to_ins.get_cpu_index(), to_ins); + } catch (const std::exception&) { + return; + } + } +} + +/** + * @brief computes difference between two snapshots of /proc/stat + * + * @param right (older snapshot) + * @return index_to_cpu by cpu difference + */ +index_to_cpu proc_stat_file::operator-(const proc_stat_file& right) const { + index_to_cpu ret; + const auto& latest_values = _values; + const auto& older_values = right.get_values(); + for (const auto& latest_cpu : latest_values) { + auto search = older_values.find(latest_cpu.first); + if (search != older_values.end()) { + per_cpu_time to_ins(latest_cpu.second); + to_ins -= search->second; + ret.emplace(latest_cpu.first, to_ins); + } + } + return ret; +} + +/** + * @brief dump + * + * @param output + */ +void proc_stat_file::dump(std::string* output) const { + for (const auto& cpu : _values) { + cpu.second.dump(output); + output->push_back('\n'); + } +} + +/** + * @brief compare cpu values to a threshold and update cpu status if field value + * > threshold + * + * @param to_test cpus usage to compare + * @param per_cpu_status out parameter that contains per cpu worst status + */ +void cpu_to_status::compute_status( + const index_to_cpu& to_test, + boost::container::flat_map* per_cpu_status) const { + auto check_threshold = [&, this](const index_to_cpu::value_type& values) { + double val = values.second.get_proportional_value(_data_index); + if (val > _threshold) { + auto& to_update = (*per_cpu_status)[values.first]; + // if ok (=0) and _status is warning (=1) or critical(=2), we update + if (_status > to_update) { + to_update = _status; + } + } + }; + + if (_average) { + index_to_cpu::const_iterator avg = + to_test.find(check_cpu_detail::average_cpu_index); + if (avg == to_test.end()) { + return; + } + check_threshold(*avg); + } else { + for (const auto& by_cpu : to_test) { + if (by_cpu.first == check_cpu_detail::average_cpu_index) { + continue; + } + check_threshold(by_cpu); + } + } +} + +using cpu_to_status_constructor = + std::function; + +#define BY_TYPE_CPU_TO_STATUS(TYPE_METRIC) \ + {"warning-core-" #TYPE_METRIC, \ + [](double threshold) { \ + return cpu_to_status(e_status::warning, e_proc_stat_index::TYPE_METRIC, \ + false, threshold); \ + }}, \ + {"critical-core-" #TYPE_METRIC, \ + [](double threshold) { \ + return cpu_to_status(e_status::critical, \ + e_proc_stat_index::TYPE_METRIC, false, \ + threshold); \ + }}, \ + {"warning-average-" #TYPE_METRIC, \ + [](double threshold) { \ + return cpu_to_status(e_status::warning, \ + e_proc_stat_index::TYPE_METRIC, true, \ + threshold); \ + }}, \ + { \ + "critical-average-" #TYPE_METRIC, [](double threshold) { \ + return cpu_to_status(e_status::critical, e_proc_stat_index::TYPE_METRIC, \ + true, threshold); \ + } \ + } + +/** + * @brief this map is used to generate cpus values comparator from check + * configuration fields + * + */ +static const absl::flat_hash_map + _label_to_cpu_to_status = { + {"warning-core", + [](double threshold) { + return cpu_to_status(e_status::warning, e_proc_stat_index::used, + false, threshold); + }}, + {"critical-core", + [](double threshold) { + return cpu_to_status(e_status::critical, e_proc_stat_index::used, + false, threshold); + }}, + {"warning-average", + [](double threshold) { + return cpu_to_status(e_status::warning, e_proc_stat_index::used, + true, threshold); + }}, + {"critical-average", + [](double threshold) { + return cpu_to_status(e_status::critical, e_proc_stat_index::used, + true, threshold); + }}, + BY_TYPE_CPU_TO_STATUS(user), + BY_TYPE_CPU_TO_STATUS(nice), + BY_TYPE_CPU_TO_STATUS(system), + BY_TYPE_CPU_TO_STATUS(iowait), + BY_TYPE_CPU_TO_STATUS(guest)}; + +/** + * @brief Construct a new check cpu::check cpu object + * + * @param io_context + * @param logger + * @param first_start_expected start expected + * @param check_interval check interval between two checks (not only this but + * also others) + * @param serv service + * @param cmd_name + * @param cmd_line + * @param args native plugin arguments + * @param cnf engine configuration received object + * @param handler called at measure completion + */ +check_cpu::check_cpu(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler) + : check(io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + cnf, + std::move(handler)), + + _nb_core(std::thread::hardware_concurrency()), + _cpu_detailed(false), + _measure_timer(*io_context) { + com::centreon::common::rapidjson_helper arg(args); + if (args.IsObject()) { + for (auto member_iter = args.MemberBegin(); member_iter != args.MemberEnd(); + ++member_iter) { + auto cpu_to_status_search = _label_to_cpu_to_status.find( + absl::AsciiStrToLower(member_iter->name.GetString())); + if (cpu_to_status_search != _label_to_cpu_to_status.end()) { + const rapidjson::Value& val = member_iter->value; + if (val.IsFloat() || val.IsInt() || val.IsUint() || val.IsInt64() || + val.IsUint64()) { + check_cpu_detail::cpu_to_status cpu_checker = + cpu_to_status_search->second(member_iter->value.GetDouble() / + 100); + _cpu_to_status.emplace( + std::make_tuple(cpu_checker.get_proc_stat_index(), + cpu_checker.is_average(), + cpu_checker.get_status()), + cpu_checker); + } else if (val.IsString()) { + auto to_conv = val.GetString(); + double dval; + if (absl::SimpleAtod(to_conv, &dval)) { + check_cpu_detail::cpu_to_status cpu_checker = + cpu_to_status_search->second(dval / 100); + _cpu_to_status.emplace( + std::make_tuple(cpu_checker.get_proc_stat_index(), + cpu_checker.is_average(), + cpu_checker.get_status()), + cpu_checker); + } else { + SPDLOG_LOGGER_ERROR( + logger, + "command: {}, value is not a number for parameter {}: {}", + cmd_name, member_iter->name, val); + } + + } else { + SPDLOG_LOGGER_ERROR(logger, + "command: {}, bad value for parameter {}: {}", + cmd_name, member_iter->name, val); + } + } else if (member_iter->name == "cpu-detailed") { + _cpu_detailed = arg.get_bool("cpu-detailed", false); + } else { + SPDLOG_LOGGER_ERROR(logger, "command: {}, unknown parameter: {}", + cmd_name, member_iter->name); + } + } + } +} + /** - * @brief in order to compile waiting for nex implementation + * @brief start a measure + * measure duration is the min of timeout - 1s, check_interval - 1s * + * @param timeout */ -class dummy {}; \ No newline at end of file +void check_cpu::start_check(const duration& timeout) { + if (!check::_start_check(timeout)) { + return; + } + + std::unique_ptr begin = + std::make_unique(_nb_core); + + time_point end_measure = std::chrono::system_clock::now() + timeout; + time_point end_measure_period = + get_start_expected() + + std::chrono::seconds(get_conf()->config().check_interval()); + + if (end_measure > end_measure_period) { + end_measure = end_measure_period; + } + + end_measure -= std::chrono::seconds(1); + + _measure_timer.expires_at(end_measure); + _measure_timer.async_wait([me = shared_from_this(), + first_measure = std::move(begin), + start_check_index = _get_running_check_index()]( + const boost::system::error_code& err) mutable { + me->_measure_timer_handler(err, start_check_index, + std::move(first_measure)); + }); +} + +constexpr std::array _sz_status = { + "OK: ", "WARNING: ", "CRITICAL: ", "UNKNOWN: "}; + +constexpr std::array + _sz_measure_name = {"user", "nice", "system", "idle", + "iowait", "interrupt", "softirq", "steal", + "guest", "guestnice", "used"}; + +/** + * @brief called at measure timer expiration + * Then we take a new snapshot of /proc/stat, compute difference with + * first_measure and generate output and perfdatas + * + * @param err asio error + * @param start_check_index passed to on_completion to validate result + * @param first_measure first snapshot to compare + */ +void check_cpu::_measure_timer_handler( + const boost::system::error_code& err, + unsigned start_check_index, + std::unique_ptr&& first_measure) { + if (err) { + return; + } + + std::string output; + std::list perfs; + + proc_stat_file new_measure(_nb_core); + + e_status worst = compute(*first_measure, new_measure, &output, &perfs); + + on_completion(start_check_index, worst, perfs, {output}); +} + +/** + * @brief compute the difference between second_measure and first_measure and + * generate status, output and perfdatas + * + * @param first_measure first snapshot of /proc/stat + * @param second_measure second snapshot of /proc/stat + * @param output out plugin output + * @param perfs perfdatas + * @return e_status plugin out status + */ +e_status check_cpu::compute( + const check_cpu_detail::proc_stat_file& first_measure, + const check_cpu_detail::proc_stat_file& second_measure, + std::string* output, + std::list* perfs) { + index_to_cpu delta = second_measure - first_measure; + + // we need to know per cpu status to provide no ok cpu details + boost::container::flat_map by_proc_status; + + for (const auto& checker : _cpu_to_status) { + checker.second.compute_status(delta, &by_proc_status); + } + + e_status worst = e_status::ok; + for (const auto& to_cmp : by_proc_status.sequence()) { + if (to_cmp.second > worst) { + worst = to_cmp.second; + } + } + + if (worst == e_status::ok) { // all is ok + auto average_data = delta.find(check_cpu_detail::average_cpu_index); + if (average_data != delta.end()) { + *output = fmt::format( + "OK: CPU(s) average usage is {:.2f}%", + average_data->second.get_proportional_value(e_proc_stat_index::used) * + 100); + } else { + *output = "OK: CPUs usages are ok."; + } + } else { + bool first = true; + // not all cpus ok => display detail per cpu nok + for (const auto& cpu_status : by_proc_status) { + if (cpu_status.second != e_status::ok) { + if (first) { + first = false; + } else { + output->push_back(' '); + } + *output += _sz_status[cpu_status.second]; + delta[cpu_status.first].dump(output); + } + } + } + + auto fill_perfdata = [&, this](const std::string_view& label, unsigned index, + unsigned cpu_index, + const per_cpu_time& per_cpu_data) { + double val = per_cpu_data.get_proportional_value(index); + bool is_average = cpu_index == check_cpu_detail::average_cpu_index; + common::perfdata to_add; + to_add.name(label); + to_add.unit("%"); + to_add.min(0); + to_add.max(100); + to_add.value(val * 100); + // we search cpu_to_status to get warning and critical thresholds + // warning + auto cpu_to_status_search = _cpu_to_status.find( + std::make_tuple(index, is_average, e_status::warning)); + if (cpu_to_status_search != _cpu_to_status.end()) { + to_add.warning_low(0); + to_add.warning(100 * cpu_to_status_search->second.get_threshold()); + } + // critical + cpu_to_status_search = _cpu_to_status.find( + std::make_tuple(index, is_average, e_status::critical)); + if (cpu_to_status_search != _cpu_to_status.end()) { + to_add.critical_low(0); + to_add.critical(100 * cpu_to_status_search->second.get_threshold()); + } + perfs->emplace_back(std::move(to_add)); + }; + + if (_cpu_detailed) { + for (const auto& by_core : delta) { + std::string cpu_name; + const char* suffix; + if (by_core.first != check_cpu_detail::average_cpu_index) { + absl::StrAppend(&cpu_name, by_core.first, "~"); + suffix = "#core.cpu.utilization.percentage"; + } else { + suffix = "#cpu.utilization.percentage"; + } + for (unsigned stat_ind = e_proc_stat_index::user; + stat_ind < e_proc_stat_index::nb_field; ++stat_ind) { + fill_perfdata((cpu_name + _sz_measure_name[stat_ind]) + suffix, + stat_ind, by_core.first, by_core.second); + } + } + + } else { + for (const auto& by_core : delta) { + std::string cpu_name; + if (by_core.first != check_cpu_detail::average_cpu_index) { + absl::StrAppend(&cpu_name, by_core.first, + "#core.cpu.utilization.percentage"); + } else { + cpu_name = "cpu.utilization.percentage"; + } + + fill_perfdata(cpu_name, e_proc_stat_index::used, by_core.first, + by_core.second); + } + } + return worst; +} diff --git a/agent/precomp_inc/precomp.hh b/agent/precomp_inc/precomp.hh index cffb6f5b781..095a128a609 100644 --- a/agent/precomp_inc/precomp.hh +++ b/agent/precomp_inc/precomp.hh @@ -21,9 +21,11 @@ #include #include +#include #include #include #include +#include #include #include @@ -31,6 +33,11 @@ #include #include +#include +#include +#include +#include +#include #include #include @@ -39,6 +46,7 @@ namespace asio = boost::asio; #include +#include #include #include diff --git a/agent/src/check.cc b/agent/src/check.cc index 27c29701f16..9da93141e85 100644 --- a/agent/src/check.cc +++ b/agent/src/check.cc @@ -25,7 +25,9 @@ using namespace com::centreon::agent; * * @param io_context * @param logger - * @param exp + * @param first_start_expected start expected + * @param check_interval check interval between two checks (not only this but + * also others) * @param serv * @param command_name * @param cmd_line @@ -34,13 +36,14 @@ using namespace com::centreon::agent; */ check::check(const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, completion_handler&& handler) - : _start_expected(exp), + : _start_expected(first_start_expected, check_interval), _service(serv), _command_name(command_name), _command_line(cmd_line), @@ -51,20 +54,21 @@ check::check(const std::shared_ptr& io_context, _completion_handler(handler) {} /** - * @brief scheduler uses this method to increase start_expected + * @brief start timeout timer and init some flags used by timeout and completion + * must be called first by daughter check class + * @code {.c++} + * void my_check::start_check(const duration & timeout) { + * if (!_start_check(timeout)) + * return; + * ....do job.... + * } + * @endcode * - * @param to_add - */ -void check::add_duration_to_start_expected(const duration& to_add) { - _start_expected += to_add; -} - -/** - * @brief start a asynchronous check * * @param timeout + * @return true if check can be done, false otherwise */ -void check::start_check(const duration& timeout) { +bool check::_start_check(const duration& timeout) { if (_running_check) { SPDLOG_LOGGER_ERROR(_logger, "check for service {} is already running", _service); @@ -73,14 +77,12 @@ void check::start_check(const duration& timeout) { to_call(me, 3, std::list(), {"a check is already running"}); }); - return; + return false; } - // we refresh start expected in order that next call will occur at now + check - // period - _start_expected = std::chrono::system_clock::now(); _running_check = true; _start_timeout_timer(timeout); SPDLOG_LOGGER_TRACE(_logger, "start check for service {}", _service); + return true; } /** @@ -111,6 +113,7 @@ void check::_timeout_timer_handler(const boost::system::error_code& err, if (start_check_index == _running_check_index) { SPDLOG_LOGGER_ERROR(_logger, "check timeout for service {} cmd: {}", _service, _command_name); + this->_on_timeout(); on_completion(start_check_index, 3 /*unknown*/, std::list(), {"Timeout at execution of " + _command_line}); diff --git a/agent/src/check_exec.cc b/agent/src/check_exec.cc index bd475ef5d08..281a4eaf9e0 100644 --- a/agent/src/check_exec.cc +++ b/agent/src/check_exec.cc @@ -116,7 +116,8 @@ void detail::process::_on_completion() { check_exec::check_exec(const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -124,7 +125,8 @@ check_exec::check_exec(const std::shared_ptr& io_context, check::completion_handler&& handler) : check(io_context, logger, - exp, + first_start_expected, + check_interval, serv, cmd_name, cmd_line, @@ -137,7 +139,9 @@ check_exec::check_exec(const std::shared_ptr& io_context, * @tparam handler_type * @param io_context * @param logger - * @param exp start expected + * @param first_start_expected start expected + * @param check_interval check interval between two checks (not only this but + * also others) * @param serv * @param cmd_name * @param cmd_line @@ -148,15 +152,16 @@ check_exec::check_exec(const std::shared_ptr& io_context, std::shared_ptr check_exec::load( const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point exp, + time_point first_start_expected, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, check::completion_handler&& handler) { - std::shared_ptr ret = - std::make_shared(io_context, logger, exp, serv, cmd_name, - cmd_line, cnf, std::move(handler)); + std::shared_ptr ret = std::make_shared( + io_context, logger, first_start_expected, check_interval, serv, cmd_name, + cmd_line, cnf, std::move(handler)); ret->_init(); return ret; } @@ -185,7 +190,9 @@ void check_exec::_init() { * @param timeout */ void check_exec::start_check(const duration& timeout) { - check::start_check(timeout); + if (!check::_start_check(timeout)) { + return; + } if (!_process) { _io_context->post([me = check::shared_from_this(), start_check_index = _get_running_check_index()]() { @@ -220,24 +227,25 @@ void check_exec::start_check(const duration& timeout) { } } +/** + * @brief get process id of the check (only used by tests) + * + * @return int + */ +int check_exec::get_pid() const { + if (!_process) { + return 0; + } + return _process->get_pid(); +} /** * @brief process is killed in case of timeout and handler is called * * @param err * @param start_check_index */ -void check_exec::_timeout_timer_handler(const boost::system::error_code& err, - unsigned start_check_index) { - if (err) { - return; - } - if (start_check_index == _get_running_check_index()) { - _process->kill(); - check::_timeout_timer_handler(err, start_check_index); - } else { - SPDLOG_LOGGER_ERROR(_logger, "start_check_index={}, running_index={}", - start_check_index, _get_running_check_index()); - } +void check_exec::_on_timeout() { + _process->kill(); } /** diff --git a/agent/src/config.cc b/agent/src/config.cc index d15de69aead..334eaf4f616 100644 --- a/agent/src/config.cc +++ b/agent/src/config.cc @@ -61,7 +61,7 @@ const std::string_view config::config_schema(R"( "description": "Name of the SSL certification authority", "type": "string" }, - "reverse_connection": { + "reversed_grpc_streaming": { "description": "Set to true to make Engine connect to the agent. Requires the agent to be configured as a server. Default: false", "type": "boolean" }, @@ -144,5 +144,5 @@ config::config(const std::string& path) { if (_host.empty()) { _host = boost::asio::ip::host_name(); } - _reverse_connection = json_config.get_bool("reverse_connection", false); + _reverse_connection = json_config.get_bool("reversed_grpc_streaming", false); } diff --git a/agent/src/config_win.cc b/agent/src/config_win.cc index 9fe35068904..a1315c3697d 100644 --- a/agent/src/config_win.cc +++ b/agent/src/config_win.cc @@ -103,7 +103,7 @@ config::config(const std::string& registry_key) { if (_host.empty()) { _host = boost::asio::ip::host_name(); } - _reverse_connection = get_bool("reverse_connection"); + _reverse_connection = get_bool("reversed_grpc_streaming"); RegCloseKey(h_key); } diff --git a/agent/src/main_win.cc b/agent/src/main_win.cc index e551c5164fa..0fb1d67b1ee 100644 --- a/agent/src/main_win.cc +++ b/agent/src/main_win.cc @@ -227,6 +227,10 @@ int main(int argc, char* argv[]) { return _main(false); } + SPDLOG_INFO( + "centagent.exe will start in service mode, if you launch it from command " + "line, use --standalone flag"); + SERVICE_TABLE_ENTRY DispatchTable[] = { {SERVICE_NAME, (LPSERVICE_MAIN_FUNCTION)SvcMain}, {NULL, NULL}}; diff --git a/agent/src/scheduler.cc b/agent/src/scheduler.cc index 42b30224b8c..02ac1e903ad 100644 --- a/agent/src/scheduler.cc +++ b/agent/src/scheduler.cc @@ -17,10 +17,10 @@ */ #include "scheduler.hh" +#include "check_cpu.hh" #include "check_exec.hh" #include "com/centreon/common/rapidjson_helper.hh" #include "com/centreon/common/utf8.hh" -#include "com/centreon/exceptions/msg_fmt.hh" using namespace com::centreon::agent; @@ -31,6 +31,8 @@ using namespace com::centreon::agent; void scheduler::_start() { _init_export_request(); _next_send_time_point = std::chrono::system_clock::now(); + _check_time_step = + time_step(_next_send_time_point, std::chrono::milliseconds(100)); update(_conf); _start_send_timer(); _start_check_timer(); @@ -97,11 +99,13 @@ scheduler::default_config() { * */ void scheduler::_start_check_timer() { - if (_check_queue.empty() || + if (_waiting_check_queue.empty() || _active_check >= _conf->config().max_concurrent_checks()) { - _check_timer.expires_from_now(std::chrono::milliseconds(100)); + _check_time_step.increment_to_after_now(); + _check_timer.expires_at(_check_time_step.value()); } else { - _check_timer.expires_at((*_check_queue.begin())->get_start_expected()); + _check_timer.expires_at( + (*_waiting_check_queue.begin())->get_start_expected()); } _check_timer.async_wait( [me = shared_from_this()](const boost::system::error_code& err) { @@ -129,13 +133,14 @@ void scheduler::_check_timer_handler(const boost::system::error_code& err) { */ void scheduler::_start_waiting_check() { time_point now = std::chrono::system_clock::now(); - if (!_check_queue.empty()) { - for (check_queue::iterator to_check = _check_queue.begin(); - !_check_queue.empty() && to_check != _check_queue.end() && + if (!_waiting_check_queue.empty()) { + for (check_queue::iterator to_check = _waiting_check_queue.begin(); + !_waiting_check_queue.empty() && + to_check != _waiting_check_queue.end() && (*to_check)->get_start_expected() <= now && _active_check < _conf->config().max_concurrent_checks();) { _start_check(*to_check); - to_check = _check_queue.erase(to_check); + to_check = _waiting_check_queue.erase(to_check); } } } @@ -149,7 +154,7 @@ void scheduler::_start_waiting_check() { * @param conf */ void scheduler::update(const engine_to_agent_request_ptr& conf) { - _check_queue.clear(); + _waiting_check_queue.clear(); _active_check = 0; size_t nb_check = conf->config().services().size(); @@ -163,11 +168,13 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { conf->config().check_interval()); if (nb_check > 0) { - duration check_interval = + duration time_between_check = std::chrono::microseconds(conf->config().check_interval() * 1000000) / nb_check; time_point next = std::chrono::system_clock::now(); + _check_time_step = time_step(next, time_between_check); + auto last_inserted_iter = _waiting_check_queue.end(); for (const auto& serv : conf->config().services()) { if (_logger->level() == spdlog::level::trace) { SPDLOG_LOGGER_TRACE( @@ -180,16 +187,18 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { } try { auto check_to_schedule = _check_builder( - _io_context, _logger, next, serv.service_description(), - serv.command_name(), serv.command_line(), conf, + _io_context, _logger, next, time_between_check, + serv.service_description(), serv.command_name(), + serv.command_line(), conf, [me = shared_from_this()]( const std::shared_ptr& check, unsigned status, const std::list& perfdata, const std::list& outputs) { me->_check_handler(check, status, perfdata, outputs); }); - _check_queue.emplace(check_to_schedule); - next += check_interval; + last_inserted_iter = _waiting_check_queue.emplace_hint( + last_inserted_iter, check_to_schedule); + next += time_between_check; } catch (const std::exception& e) { SPDLOG_LOGGER_ERROR( _logger, "service: {} command:{} won't be scheduled cause: {}", @@ -199,6 +208,8 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { } _conf = conf; + + _start_waiting_check(); } /** @@ -249,13 +260,19 @@ void scheduler::_check_handler( --_active_check; if (_alive) { - // repush for next check - check->add_duration_to_start_expected( - std::chrono::seconds(_conf->config().check_interval())); - - _check_queue.insert(check); - // we have decreased _active_check, so we can launch another check - _start_waiting_check(); + time_point min_next_start = + check->get_start_expected() + + std::chrono::seconds(_conf->config().check_interval()); + time_point now = std::chrono::system_clock::now(); + if (min_next_start < now) + min_next_start = now; + + // repush for next check and search a free start slot in queue + check->increment_start_expected_to_after_min_timepoint(min_next_start); + while (!_waiting_check_queue.insert(check).second) { + // slot yet reserved => try next + check->add_check_interval_to_start_expected(); + } } } @@ -435,6 +452,11 @@ void scheduler::_add_metric_to_scope( attrib_type->set_key("auto"); break; } + case com::centreon::common::perfdata::gauge: { + auto attrib_type = data_point->add_attributes(); + attrib_type->set_key("gauge"); + break; + } } if (perf.critical() <= std::numeric_limits::max()) { _add_exemplar(perf.critical_mode() ? "crit_ge" : "crit_gt", perf.critical(), @@ -510,7 +532,8 @@ void scheduler::_add_exemplar( std::shared_ptr scheduler::default_check_builder( const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, + time_point first_start_expected, + duration check_interval, const std::string& service, const std::string& cmd_name, const std::string& cmd_line, @@ -523,16 +546,30 @@ std::shared_ptr scheduler::default_check_builder( common::rapidjson_helper::read_from_string(cmd_line); common::rapidjson_helper native_params(native_check_info); std::string_view check_type = native_params.get_string("check"); - const rapidjson::Value& args = native_params.get_member("args"); - - if (check_type == "cpu"sv) { + const rapidjson::Value* args; + if (native_params.has_member("args")) { + args = &native_params.get_member("args"); + } else { + static const rapidjson::Value no_arg; + args = &no_arg; + } +#ifdef _WINDOWS + throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, + cmd_line); +#else + if (check_type == "cpu_percentage"sv) { + return std::make_shared( + io_context, logger, first_start_expected, check_interval, service, + cmd_name, cmd_line, *args, conf, std::move(handler)); } else { throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, cmd_line); } +#endif } catch (const std::exception&) { - return check_exec::load(io_context, logger, start_expected, service, - cmd_name, cmd_line, conf, std::move(handler)); + return check_exec::load(io_context, logger, first_start_expected, + check_interval, service, cmd_name, cmd_line, conf, + std::move(handler)); } } diff --git a/agent/test/CMakeLists.txt b/agent/test/CMakeLists.txt index 897aea3b643..b038ccfaf0a 100644 --- a/agent/test/CMakeLists.txt +++ b/agent/test/CMakeLists.txt @@ -24,7 +24,7 @@ set( SRC_COMMON ) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - set(SRC ${SRC_COMMON} config_test.cc) + set(SRC ${SRC_COMMON} config_test.cc check_linux_cpu_test.cc) else() set(SRC ${SRC_COMMON}) endif() diff --git a/agent/test/check_exec_test.cc b/agent/test/check_exec_test.cc index b3b547cfd13..b115d4db9c1 100644 --- a/agent/test/check_exec_test.cc +++ b/agent/test/check_exec_test.cc @@ -45,7 +45,7 @@ TEST(check_exec_test, echo) { std::mutex mut; std::condition_variable cond; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, @@ -68,12 +68,12 @@ TEST(check_exec_test, echo) { } TEST(check_exec_test, timeout) { - command_line = SLEEP_PATH " 5"; + command_line = SLEEP_PATH " 120"; int status; std::list outputs; std::condition_variable cond; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, @@ -85,13 +85,29 @@ TEST(check_exec_test, timeout) { }); check->start_check(std::chrono::seconds(1)); + int pid = check->get_pid(); + std::mutex mut; std::unique_lock l(mut); cond.wait(l); ASSERT_NE(status, 0); ASSERT_EQ(outputs.size(), 1); - ASSERT_EQ(*outputs.begin(), "Timeout at execution of " SLEEP_PATH " 5"); + ASSERT_EQ(*outputs.begin(), "Timeout at execution of " SLEEP_PATH " 120"); + ASSERT_GT(pid, 0); + std::this_thread::sleep_for(std::chrono::seconds(1)); + +#ifdef _WINDOWS + auto process_handle = + OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid); + ASSERT_NE(process_handle, nullptr); + DWORD exit_code; + ASSERT_EQ(GetExitCodeProcess(process_handle, &exit_code), TRUE); + ASSERT_NE(exit_code, STILL_ACTIVE); + CloseHandle(process_handle); +#else + ASSERT_EQ(kill(pid, 0), -1); +#endif } TEST(check_exec_test, bad_command) { @@ -101,7 +117,7 @@ TEST(check_exec_test, bad_command) { std::condition_variable cond; std::mutex mut; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, @@ -137,7 +153,7 @@ TEST(check_exec_test, recurse_not_lock) { std::condition_variable cond; unsigned cpt = 0; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int, const std::list& perfdata, diff --git a/agent/test/check_linux_cpu_test.cc b/agent/test/check_linux_cpu_test.cc new file mode 100644 index 00000000000..aedeffd32a1 --- /dev/null +++ b/agent/test/check_linux_cpu_test.cc @@ -0,0 +1,652 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include +#include +#include + +#include "check.hh" +#include "check_cpu.hh" +#include "com/centreon/common/rapidjson_helper.hh" + +extern std::shared_ptr g_io_context; + +using namespace com::centreon::agent; + +const char* proc_sample = + R"(cpu 4360186 24538 1560174 17996659 64169 0 93611 0 0 0 +cpu0 1089609 6082 396906 4497394 15269 0 11914 0 0 0 +cpu1 1082032 5818 391692 4456828 16624 0 72471 0 0 0 +cpu2 1095585 6334 386205 4524762 16543 0 1774 0 0 0 +cpu3 1092959 6304 385370 4517673 15732 0 7451 0 0 0 +intr 213853764 0 35 0 0 0 0 0 0 0 56927 0 0 134 0 0 0 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29851994 30 0 408 411 0 0 0 0 0 0 0 0 0 0 0 0 0 0 43 26 529900 571944 554845 556829 19615758 7070 8 0 0 0 0 2 15 3220 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 529237135 +btime 1728880818 +processes 274444 +procs_running 2 +procs_blocked 0 +softirq 160085949 64462978 14075755 1523012 4364896 33 0 17578206 28638313 73392 29369364 +)"; + +TEST(proc_stat_file_test, read_sample) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + + check_cpu_detail::proc_stat_file to_compare(test_file_path, 4); + + for (const auto& by_cpu : to_compare.get_values()) { + switch (by_cpu.first) { + case 0: + ASSERT_EQ(by_cpu.second.get_total(), 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::used), + (6017174.0 - 4497394.0) / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::user), + 1089609.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::nice), + 6082.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::system), + 396906.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::idle), + 4497394.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::iowait), + 15269.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::irq), + 0); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::soft_irq), + 11914.0 / 6017174); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::steal), + 0); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::guest), + 0); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::guest_nice), + 0); + break; + case 1: + ASSERT_EQ(by_cpu.second.get_total(), 6025465); + break; + case 2: + ASSERT_EQ(by_cpu.second.get_total(), 6031203); + break; + case 3: + ASSERT_EQ(by_cpu.second.get_total(), 6025489); + break; + case check_cpu_detail::average_cpu_index: + ASSERT_EQ(by_cpu.second.get_total(), 24099337); + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( + check_cpu_detail::e_proc_stat_index::system), + 1560174.0 / 24099337); + break; + default: + FAIL() << "unexpected cpu:" << by_cpu.first; + break; + } + } +} + +const char* proc_sample_2 = + R"(cpu 4574560 24547 1630654 18918908 68531 0 96832 0 0 0 +cpu0 1143030 6086 414440 4726292 16461 0 14668 0 0 0 +cpu1 1135947 5820 409352 4687911 17696 0 72516 0 0 0 +cpu2 1149227 6335 404370 4754742 17697 0 2149 0 0 0 +cpu3 1146355 6305 402491 4749962 16675 0 7498 0 0 0 +intr 224918652 0 35 0 0 0 0 0 0 0 57636 0 0 134 0 0 0 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30628697 30 0 408 411 0 0 0 0 0 0 0 0 0 0 0 0 0 0 43 26 564911 598184 598096 594403 20270994 8610 8 0 0 0 0 2 15 3220 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 558464714 +btime 1728880818 +processes 289981 +procs_running 1 +procs_blocked 0 +softirq 166407220 66442046 14763247 1577070 4447556 33 0 18081353 30219191 75659 30801065 +)"; + +TEST(proc_stat_file_test, no_threshold) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + rapidjson::Document check_args; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::ok); + ASSERT_EQ(output, "OK: CPU(s) average usage is 24.08%"); + + ASSERT_EQ(perfs.size(), 5); + + constexpr float nan_to_cmp = NAN; + + for (const auto& perf : perfs) { + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + if (perf.name() == "0#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[0].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "1#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[1].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "2#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[2].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "3#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[3].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR( + perf.value(), + delta[check_cpu_detail::average_cpu_index].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +constexpr std::array proc_index = { + 0, 1, 2, 3, check_cpu_detail::average_cpu_index}; + +TEST(proc_stat_file_test, no_threshold_detailed) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + static const char* conf_doc = R"({"cpu-detailed":true})"; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = R"({"cpu-detailed":true})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::ok); + ASSERT_EQ(output, "OK: CPU(s) average usage is 24.08%"); + + ASSERT_EQ(perfs.size(), 55); + + for (const auto& perf : perfs) { + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + + unsigned cpu_index = check_cpu_detail::average_cpu_index; + std::string counter_type; + if (std::isdigit(perf.name()[0])) { + cpu_index = perf.name()[0] - '0'; + counter_type = perf.name().substr(2, perf.name().find('#') - 2); + } else { + counter_type = perf.name().substr(0, perf.name().find('#')); + } + const auto& cpu_data = delta[cpu_index]; + if (counter_type == "user") { + ASSERT_NEAR(perf.value(), + (cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::user) * + 100), + 0.01); + } else if (counter_type == "nice") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::nice) * + 100, + 0.01); + } else if (counter_type == "system") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::system) * + 100, + 0.01); + } else if (counter_type == "idle") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::idle) * + 100, + 0.01); + } else if (counter_type == "iowait") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::iowait) * + 100, + 0.01); + } else if (counter_type == "interrupt") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::irq) * + 100, + 0.01); + } else if (counter_type == "softirq") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::soft_irq) * + 100, + 0.01); + } else if (counter_type == "steal") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::steal) * + 100, + 0.01); + } else if (counter_type == "guest") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::guest) * + 100, + 0.01); + } else if (counter_type == "guestnice") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::guest_nice) * + 100, + 0.01); + } else if (counter_type == "used") { + ASSERT_NEAR(perf.value(), + cpu_data.get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(proc_stat_file_test, threshold_nodetailed) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"warning-core" : "24.1", "critical-core" : "24.4", "warning-average" : "10", "critical-average" : "20"})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::critical); + ASSERT_EQ( + output, + R"(CRITICAL: CPU'0' Usage: 24.66%, User 17.58%, Nice 0.00%, System 5.77%, Idle 75.34%, IOWait 0.39%, Interrupt 0.00%, Soft Irq 0.91%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU'2' Usage: 24.18%, User 17.69%, Nice 0.00%, System 5.99%, Idle 75.82%, IOWait 0.38%, Interrupt 0.00%, Soft Irq 0.12%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% CRITICAL: CPU(s) average Usage: 24.08%, User 17.65%, Nice 0.00%, System 5.80%, Idle 75.92%, IOWait 0.36%, Interrupt 0.00%, Soft Irq 0.27%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00%)"); + + ASSERT_EQ(perfs.size(), 5); + + for (const auto& perf : perfs) { + ASSERT_EQ(perf.critical_low(), 0); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_EQ(perf.warning_low(), 0); + if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR(perf.warning(), 10, 0.01); + ASSERT_NEAR(perf.critical(), 20, 0.01); + } else { + ASSERT_NEAR(perf.warning(), 24.1, 0.01); + ASSERT_NEAR(perf.critical(), 24.4, 0.01); + } + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + if (perf.name() == "0#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[0].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "1#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[1].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "2#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[2].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "3#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), + delta[3].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR( + perf.value(), + delta[check_cpu_detail::average_cpu_index].get_proportional_value( + check_cpu_detail::e_proc_stat_index::used) * + 100, + 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(proc_stat_file_test, threshold_nodetailed2) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"warning-core-iowait" : "0.36", "critical-core-iowait" : "0.39", "warning-average-iowait" : "0.3", "critical-average-iowait" : "0.4"})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::critical); + ASSERT_EQ( + output, + R"(CRITICAL: CPU'0' Usage: 24.66%, User 17.58%, Nice 0.00%, System 5.77%, Idle 75.34%, IOWait 0.39%, Interrupt 0.00%, Soft Irq 0.91%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU'2' Usage: 24.18%, User 17.69%, Nice 0.00%, System 5.99%, Idle 75.82%, IOWait 0.38%, Interrupt 0.00%, Soft Irq 0.12%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU(s) average Usage: 24.08%, User 17.65%, Nice 0.00%, System 5.80%, Idle 75.92%, IOWait 0.36%, Interrupt 0.00%, Soft Irq 0.27%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00%)"); + + ASSERT_EQ(perfs.size(), 5); + + for (const auto& perf : perfs) { + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + } +} + +TEST(proc_stat_file_test, threshold_detailed) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"cpu-detailed":true, "warning-core" : "24.1", "critical-core" : "24.4", "warning-average" : "10", "critical-average" : "20"})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::critical); + ASSERT_EQ( + output, + R"(CRITICAL: CPU'0' Usage: 24.66%, User 17.58%, Nice 0.00%, System 5.77%, Idle 75.34%, IOWait 0.39%, Interrupt 0.00%, Soft Irq 0.91%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU'2' Usage: 24.18%, User 17.69%, Nice 0.00%, System 5.99%, Idle 75.82%, IOWait 0.38%, Interrupt 0.00%, Soft Irq 0.12%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% CRITICAL: CPU(s) average Usage: 24.08%, User 17.65%, Nice 0.00%, System 5.80%, Idle 75.92%, IOWait 0.36%, Interrupt 0.00%, Soft Irq 0.27%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00%)"); + + ASSERT_EQ(perfs.size(), 55); + + for (const auto& perf : perfs) { + ASSERT_FALSE(perf.critical_mode()); + if (perf.name().find("used#core.cpu.utilization.percentage") != + std::string::npos || + perf.name().find("used#cpu.utilization.percentage") != + std::string::npos) { + ASSERT_EQ(perf.critical_low(), 0); + ASSERT_EQ(perf.warning_low(), 0); + if (!std::isdigit(perf.name()[0])) { + ASSERT_NEAR(perf.warning(), 10, 0.01); + ASSERT_NEAR(perf.critical(), 20, 0.01); + } else { + ASSERT_NEAR(perf.warning(), 24.1, 0.01); + ASSERT_NEAR(perf.critical(), 24.4, 0.01); + } + } else { + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.critical_low())); + } + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + } +} + +TEST(proc_stat_file_test, threshold_detailed2) { + constexpr const char* test_file_path = "/tmp/proc_stat_test"; + { + ::remove(test_file_path); + std::ofstream f(test_file_path); + f.write(proc_sample, strlen(proc_sample)); + } + constexpr const char* test_file_path2 = "/tmp/proc_stat_test2"; + { + ::remove(test_file_path2); + std::ofstream f(test_file_path2); + f.write(proc_sample_2, strlen(proc_sample_2)); + } + + check_cpu_detail::proc_stat_file first_measure(test_file_path, 4); + + check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); + + auto delta = second_measure - first_measure; + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"cpu-detailed":"true", "warning-core-iowait" : "0.36", "critical-core-iowait" : "0.39", "warning-average-iowait" : "0.3", "critical-average-iowait" : "0.4"})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + [](const std::shared_ptr& caller, int status, + const std::list& perfdata, + const std::list& outputs) {}); + + e_status status = + checker.compute(first_measure, second_measure, &output, &perfs); + ASSERT_EQ(status, e_status::critical); + ASSERT_EQ( + output, + R"(CRITICAL: CPU'0' Usage: 24.66%, User 17.58%, Nice 0.00%, System 5.77%, Idle 75.34%, IOWait 0.39%, Interrupt 0.00%, Soft Irq 0.91%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU'2' Usage: 24.18%, User 17.69%, Nice 0.00%, System 5.99%, Idle 75.82%, IOWait 0.38%, Interrupt 0.00%, Soft Irq 0.12%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00% WARNING: CPU(s) average Usage: 24.08%, User 17.65%, Nice 0.00%, System 5.80%, Idle 75.92%, IOWait 0.36%, Interrupt 0.00%, Soft Irq 0.27%, Steal 0.00%, Guest 0.00%, Guest Nice 0.00%)"); + + ASSERT_EQ(perfs.size(), 55); + + for (const auto& perf : perfs) { + ASSERT_FALSE(perf.critical_mode()); + if (perf.name().find("iowait#core.cpu.utilization.percentage") != + std::string::npos || + perf.name().find("iowait#cpu.utilization.percentage") != + std::string::npos) { + ASSERT_EQ(perf.critical_low(), 0); + ASSERT_EQ(perf.warning_low(), 0); + if (!std::isdigit(perf.name()[0])) { + ASSERT_NEAR(perf.warning(), 0.3, 0.01); + ASSERT_NEAR(perf.critical(), 0.4, 0.01); + } else { + ASSERT_NEAR(perf.warning(), 0.36, 0.01); + ASSERT_NEAR(perf.critical(), 0.39, 0.01); + } + } else { + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.critical_low())); + } + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + } +} diff --git a/agent/test/check_test.cc b/agent/test/check_test.cc index 1a09b0761cf..71ec5999f0e 100644 --- a/agent/test/check_test.cc +++ b/agent/test/check_test.cc @@ -30,7 +30,9 @@ class dummy_check : public check { public: void start_check(const duration& timeout) override { - check::start_check(timeout); + if (!_start_check(timeout)) { + return; + } _command_timer.expires_from_now(_command_duration); _command_timer.async_wait([me = shared_from_this(), this, running_index = _get_running_check_index()]( @@ -53,6 +55,7 @@ class dummy_check : public check { : check(g_io_context, spdlog::default_logger(), std::chrono::system_clock::now(), + std::chrono::seconds(1), serv, command_name, command_line, diff --git a/agent/test/scheduler_test.cc b/agent/test/scheduler_test.cc index 5af1a86f4dd..63354e85246 100644 --- a/agent/test/scheduler_test.cc +++ b/agent/test/scheduler_test.cc @@ -17,6 +17,7 @@ */ #include +#include "check.hh" #include "scheduler.hh" @@ -37,6 +38,7 @@ class tempo_check : public check { tempo_check(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -47,6 +49,7 @@ class tempo_check : public check { : check(io_context, logger, exp, + check_interval, serv, cmd_name, cmd_line, @@ -62,7 +65,9 @@ class tempo_check : public check { SPDLOG_INFO("start tempo check"); check_starts.emplace_back(this, std::chrono::system_clock::now()); } - check::start_check(timeout); + if (!_start_check(timeout)) { + return; + } _completion_timer.expires_from_now(_completion_delay); _completion_timer.async_wait([me = shared_from_this(), this, check_running_index = @@ -137,8 +142,8 @@ TEST_F(scheduler_test, no_config) { [](const std::shared_ptr&) {}, [](const std::shared_ptr&, const std::shared_ptr&, time_point /* start expected*/, - const std::string& /*service*/, const std::string& /*cmd_name*/, - const std::string& /*cmd_line*/, + duration /* check interval */, const std::string& /*service*/, + const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, check::completion_handler&&) { return std::shared_ptr(); }); @@ -168,32 +173,30 @@ static bool tempo_check_assert_pred(const time_point& after, } TEST_F(scheduler_test, correct_schedule) { + { + std::lock_guard l(tempo_check::check_starts_m); + tempo_check::check_starts.clear(); + } + std::shared_ptr sched = scheduler::load( g_io_context, spdlog::default_logger(), "my_host", create_conf(20, 10, 1, 50, 1), [](const std::shared_ptr&) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& service, - const std::string& cmd_name, const std::string& cmd_line, + time_point start_expected, duration check_interval, + const std::string& service, const std::string& cmd_name, + const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, service, cmd_name, cmd_line, - engine_to_agent_request, 0, std::chrono::milliseconds(50), - std::move(handler)); + io_context, logger, start_expected, check_interval, service, + cmd_name, cmd_line, engine_to_agent_request, 0, + std::chrono::milliseconds(50), std::move(handler)); }); - { - std::lock_guard l(tempo_check::check_starts_m); - tempo_check::check_starts.clear(); - } - std::this_thread::sleep_for(std::chrono::milliseconds(10100)); - // we have 2 * 10 = 20 checks spread over 10 second - duration expected_interval = std::chrono::milliseconds(1000); - { std::lock_guard l(tempo_check::check_starts_m); ASSERT_GE(tempo_check::check_starts.size(), 20); @@ -204,6 +207,7 @@ TEST_F(scheduler_test, correct_schedule) { first = false; } else { ASSERT_NE(previous.first, check_time.first); + // check if we have a delay of 500ms between two checks ASSERT_PRED2(tempo_check_assert_pred, check_time.second, previous.second); } @@ -253,14 +257,15 @@ TEST_F(scheduler_test, time_out) { }, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& service, - const std::string& cmd_name, const std::string& cmd_line, + time_point start_expected, duration check_interval, + const std::string& service, const std::string& cmd_name, + const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, service, cmd_name, cmd_line, - engine_to_agent_request, 0, std::chrono::milliseconds(1500), - std::move(handler)); + io_context, logger, start_expected, check_interval, service, + cmd_name, cmd_line, engine_to_agent_request, 0, + std::chrono::milliseconds(1500), std::move(handler)); }); std::unique_lock l(m); export_cond.wait(l); @@ -306,14 +311,15 @@ TEST_F(scheduler_test, correct_output_examplar) { }, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& service, - const std::string& cmd_name, const std::string& cmd_line, + time_point start_expected, duration check_interval, + const std::string& service, const std::string& cmd_name, + const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, service, cmd_name, cmd_line, - engine_to_agent_request, 0, std::chrono::milliseconds(10), - std::move(handler)); + io_context, logger, start_expected, check_interval, service, + cmd_name, cmd_line, engine_to_agent_request, 0, + std::chrono::milliseconds(10), std::move(handler)); }); std::mutex m; std::unique_lock l(m); @@ -385,6 +391,7 @@ class concurent_check : public check { concurent_check(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, + duration check_interval, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -395,6 +402,7 @@ class concurent_check : public check { : check(io_context, logger, exp, + check_interval, serv, cmd_name, cmd_line, @@ -405,7 +413,9 @@ class concurent_check : public check { _completion_delay(completion_delay) {} void start_check(const duration& timeout) override { - check::start_check(timeout); + if (!_start_check(timeout)) { + return; + } active_checks.insert(this); if (active_checks.size() > max_active_check) { max_active_check = active_checks.size(); @@ -441,13 +451,18 @@ TEST_F(scheduler_test, max_concurent) { [&](const std::shared_ptr& req) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& service, - const std::string& cmd_name, const std::string& cmd_line, + time_point start_expected, duration check_interval, + const std::string& service, const std::string& cmd_name, + const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, service, cmd_name, cmd_line, - engine_to_agent_request, 0, std::chrono::milliseconds(750), + io_context, logger, start_expected, check_interval, service, + cmd_name, cmd_line, engine_to_agent_request, 0, + std::chrono::milliseconds(750 - + 10) /*the - 10 is for some delay in test + execution from start expected*/ + , std::move(handler)); }); diff --git a/common/inc/com/centreon/common/defer.hh b/common/inc/com/centreon/common/defer.hh index 788a708de91..862ce7d0860 100644 --- a/common/inc/com/centreon/common/defer.hh +++ b/common/inc/com/centreon/common/defer.hh @@ -44,6 +44,29 @@ void defer(const std::shared_ptr& io_context, }); }; +/** + * @brief this function executes the handler action in delay + * + * @tparam handler_type + * @param io_context + * @param tp the time point when to execute handler + * @param handler job to do + */ +template +void defer(const std::shared_ptr& io_context, + const std::chrono::system_clock::time_point& tp, + handler_type&& handler) { + std::shared_ptr timer( + std::make_shared(*io_context)); + timer->expires_at(tp); + timer->async_wait([io_context, timer, m_handler = std::move(handler)]( + const boost::system::error_code& err) { + if (!err) { + m_handler(); + } + }); +}; + } // namespace com::centreon::common #endif diff --git a/common/inc/com/centreon/common/perfdata.hh b/common/inc/com/centreon/common/perfdata.hh index cc863df3d21..8ad6e3905a7 100644 --- a/common/inc/com/centreon/common/perfdata.hh +++ b/common/inc/com/centreon/common/perfdata.hh @@ -59,7 +59,7 @@ class perfdata { float min() const { return _min; } void min(float val) { _min = val; } const std::string& name() const { return _name; } - void name(const std::string&& val) { _name = val; } + void name(std::string_view val) { _name = val; } void resize_name(size_t new_size); const std::string& unit() const { return _unit; } void resize_unit(size_t new_size); diff --git a/common/process/inc/com/centreon/common/process/process.hh b/common/process/inc/com/centreon/common/process/process.hh index 06a6799bd3b..830ad975488 100644 --- a/common/process/inc/com/centreon/common/process/process.hh +++ b/common/process/inc/com/centreon/common/process/process.hh @@ -129,6 +129,8 @@ class process : public std::enable_shared_from_this> { virtual ~process() = default; + int get_pid(); + template void write_to_stdin(const string_class& content); diff --git a/common/process/src/process.cc b/common/process/src/process.cc index 6036a0fca19..cd6a78c0bae 100644 --- a/common/process/src/process.cc +++ b/common/process/src/process.cc @@ -167,6 +167,21 @@ process::process( } } +/** + * @brief returns pid of process, -1 otherwise + * + * @tparam use_mutex + * @return int + */ +template +int process::get_pid() { + detail::lock l(&_protect); + if (_proc) { + return _proc->proc.id(); + } + return -1; +} + /** * @brief start a new process, if a previous one is running, it's killed * In this function, we start child process and stdout, stderr asynchronous read diff --git a/common/tests/process_test.cc b/common/tests/process_test.cc index 325524a406a..bdf0ed82420 100644 --- a/common/tests/process_test.cc +++ b/common/tests/process_test.cc @@ -18,6 +18,8 @@ #include #include +#include +#include #include "com/centreon/common/process/process.hh" @@ -25,9 +27,11 @@ using namespace com::centreon::common; #ifdef _WINDOWS #define ECHO_PATH "tests\\echo.bat" +#define SLEEP_PATH "tests\\sleep.bat" #define END_OF_LINE "\r\n" #else #define ECHO_PATH "/bin/echo" +#define SLEEP_PATH "/bin/sleep" #define END_OF_LINE "\n" #endif @@ -233,3 +237,27 @@ TEST_F(process_test, shell_stdin_to_stdout) { } #endif + +TEST_F(process_test, kill_process) { + std::shared_ptr to_wait( + new process_wait(g_io_context, _logger, SLEEP_PATH, {"10"})); + to_wait->start_process(true); + + // wait process starts + std::this_thread::sleep_for(std::chrono::seconds(1)); + int pid = to_wait->get_pid(); + // kill process + to_wait->kill(); + std::this_thread::sleep_for(std::chrono::seconds(1)); +#ifdef _WINDOWS + auto process_handle = + OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid); + ASSERT_NE(process_handle, nullptr); + DWORD exit_code; + ASSERT_EQ(GetExitCodeProcess(process_handle, &exit_code), TRUE); + ASSERT_NE(exit_code, STILL_ACTIVE); + CloseHandle(process_handle); +#else + ASSERT_EQ(kill(pid, 0), -1); +#endif +} diff --git a/common/tests/scripts/sleep.bat b/common/tests/scripts/sleep.bat new file mode 100644 index 00000000000..0866e1576ff --- /dev/null +++ b/common/tests/scripts/sleep.bat @@ -0,0 +1,2 @@ +@echo off +ping 127.0.0.1 -n1 %~1 diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh index f65940cbf92..41eaa24e675 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh @@ -46,6 +46,8 @@ class agent_config { public: agent_config(const rapidjson::Value& json_config_v); + agent_config(); + // used for tests agent_config(uint32_t check_interval, uint32_t max_concurrent_checks, diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc index 0d49927f5c7..cbea64c98f4 100644 --- a/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc @@ -62,6 +62,11 @@ static constexpr std::string_view _config_schema(R"( } )"); +constexpr unsigned default_check_interval = 60; +constexpr unsigned default_max_concurrent_checks = 100; +constexpr unsigned default_export_period = 60; +constexpr unsigned default_check_timeout = 30; + /** * @brief Construct a new agent config::agent from json data * @@ -74,11 +79,14 @@ agent_config::agent_config(const rapidjson::Value& json_config_v) { file_content.validate(validator); - _check_interval = file_content.get_unsigned("check_interval", 60); - _max_concurrent_checks = - file_content.get_unsigned("max_concurrent_checks", 100); - _export_period = file_content.get_unsigned("export_period", 60); - _check_timeout = file_content.get_unsigned("check_timeout", 30); + _check_interval = + file_content.get_unsigned("check_interval", default_check_interval); + _max_concurrent_checks = file_content.get_unsigned( + "max_concurrent_checks", default_max_concurrent_checks); + _export_period = + file_content.get_unsigned("export_period", default_export_period); + _check_timeout = + file_content.get_unsigned("check_timeout", default_check_timeout); if (file_content.has_member("reverse_connections")) { const auto& reverse_array = file_content.get_member("reverse_connections"); @@ -90,6 +98,16 @@ agent_config::agent_config(const rapidjson::Value& json_config_v) { } } +/** + * @brief default constructor with the same values as default json values + * + */ +agent_config::agent_config() + : _check_interval(default_check_interval), + _max_concurrent_checks(default_max_concurrent_checks), + _export_period(default_export_period), + _check_timeout(default_check_timeout) {} + /** * @brief Constructor used by tests * diff --git a/engine/modules/opentelemetry/src/otl_config.cc b/engine/modules/opentelemetry/src/otl_config.cc index 386615aaf19..e93d5210fbd 100644 --- a/engine/modules/opentelemetry/src/otl_config.cc +++ b/engine/modules/opentelemetry/src/otl_config.cc @@ -119,6 +119,10 @@ otl_config::otl_config(const std::string_view& file_path, "nor an grpc server, nor a reverse client configured"); } + if (!_centreon_agent_config) { + _centreon_agent_config = std::make_shared(); + } + if (file_content.has_member("telegraf_conf_server")) { try { _telegraf_conf_server_config = diff --git a/tests/broker-engine/cma.robot b/tests/broker-engine/cma.robot index 624c52bcfc9..f8c8e5ba5f7 100644 --- a/tests/broker-engine/cma.robot +++ b/tests/broker-engine/cma.robot @@ -346,6 +346,72 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST_CRYPTED ${result} Ctn Check Host Output Resource Status With Timeout host_1 120 ${start_int} 0 HARD OK - 127.0.0.1 Should Be True ${result} resources table not updated +BEOTEL_CENTREON_AGENT_CHECK_NATIVE_CPU + [Documentation] agent check service with native check cpu and we expect to get it in check result + [Tags] broker engine opentelemetry MON-149536 + Ctn Config Engine ${1} ${2} ${2} + Ctn Add Otl ServerModule + ... 0 + ... {"otel_server":{"host": "0.0.0.0","port": 4317},"max_length_grpc_log":0,"centreon_agent":{"check_interval":10, "export_period":15}} + Ctn Config Add Otl Connector + ... 0 + ... OTEL connector + ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name + Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check + Ctn Set Services Passive 0 service_1 + + Ctn Engine Config Add Command ${0} otel_check {"check": "cpu_percentage"} OTEL connector + + Ctn Engine Config Set Value 0 log_level_checks trace + + Ctn Clear Db metrics + + Ctn Config Broker central + Ctn Config Broker module + Ctn Config Broker rrd + Ctn Config Centreon Agent + Ctn Broker Config Log central sql trace + + Ctn Config BBDO3 1 + Ctn Clear Retention + + ${start} Ctn Get Round Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Start Agent + + # Let's wait for the otel server start + ${content} Create List unencrypted server listening on 0.0.0.0:4317 + ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 + Should Be True ${result} "unencrypted server listening on 0.0.0.0:4317" should be available. + + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 0 60 HARD + Should Be True ${result} resources table not updated + + ${metrics_list} Create List cpu.utilization.percentage 0#core.cpu.utilization.percentage + ${result} Ctn Compare Metrics Of Service 1 ${metrics_list} 10 + Should Be True ${result} metrics not updated + + + #a small threshold to make service_1 warning + Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check2 + + Ctn Engine Config Add Command ${0} otel_check2 {"check": "cpu_percentage", "args": {"warning-average" : "0.1"}} OTEL connector + + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 1 60 SOFT + Should Be True ${result} resources table not updated + + #a small threshold to make service_1 critical + Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check3 + + Ctn Engine Config Add Command ${0} otel_check3 {"check": "cpu_percentage", "args": {"critical-average" : "0.2", "warning-average" : "0.1"}} OTEL connector + + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 2 60 SOFT + Should Be True ${result} resources table not updated + + *** Keywords *** Ctn Create Cert And Init [Documentation] create key and certificates used by agent and engine on linux side diff --git a/tests/resources/Agent.py b/tests/resources/Agent.py index 118351afe29..219e9a8078d 100644 --- a/tests/resources/Agent.py +++ b/tests/resources/Agent.py @@ -129,7 +129,7 @@ def ctn_config_reverse_centreon_agent(key_path:str = None, cert_path:str = None, ff.write(reversed_agent_encrypted_config) else: ff.write(reversed_agent_config) - ff.write(",\n \"reverse_connection\":true") + ff.write(",\n \"reversed_grpc_streaming\":true") if key_path is not None or cert_path is not None or ca_path is not None: ff.write(",\n \"encryption\":true") if key_path is not None: diff --git a/tests/resources/Broker.py b/tests/resources/Broker.py index e5cb6e829ca..e23c0fd98a7 100755 --- a/tests/resources/Broker.py +++ b/tests/resources/Broker.py @@ -1741,6 +1741,46 @@ def ctn_get_metrics_for_service(service_id: int, metric_name: str = "%", timeout return None +def ctn_compare_metrics_of_service(service_id: int, metrics: list, timeout: int = 60): + """ + check if the metrics of a service contains the list passed in param + + Warning: + A service is identified by a host ID and a service ID. This function should be used with caution. + + Args: + service_id (int): The ID of the service. + metrics (str): expected metrics. + timeout (int, optional): Defaults to 60. + + Returns: + A list of metric IDs or None if no metric found. + """ + + limit = time.time() + timeout + + select_request = f"SELECT metric_name FROM metrics JOIN index_data ON index_id=id WHERE service_id={service_id}" + while time.time() < limit: + # Connect to the database + connection = pymysql.connect(host=DB_HOST, + user=DB_USER, + password=DB_PASS, + database=DB_NAME_STORAGE, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + with connection: + with connection.cursor() as cursor: + cursor.execute(select_request) + result = cursor.fetchall() + metric_in_db = [r['metric_name'] for r in result] + if set(metrics).issubset(set(metric_in_db)): + return True + time.sleep(10) + logger.console(f"no metric found for service_id={service_id}") + return False + + + def ctn_get_not_existing_metrics(count: int): """ Return a list of metrics that does not exist. From 52c86c45ee69cab84a749992c6ec09c182feee9a Mon Sep 17 00:00:00 2001 From: Kevin Duret Date: Wed, 6 Nov 2024 09:21:13 +0100 Subject: [PATCH 06/13] fix(gorgone): move centreon-common dependency (#1792) (#1822) --- .github/CODEOWNERS | 15 ++++++++------- .github/workflows/gorgone.yml | 6 ++---- .../centreon-gorgone-centreon-config.yaml | 2 ++ gorgone/packaging/centreon-gorgone.yaml | 4 +--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8d774885215..fa12f2f70e1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,14 +1,15 @@ * @centreon/owners-cpp -.github/** @centreon/owners-pipelines -packaging/** @centreon/owners-pipelines -selinux/** @centreon/owners-pipelines - -tests/** @centreon/owners-robot-e2e +*.pm @centreon/owners-perl +*.pl @centreon/owners-perl gorgone/ @centreon/owners-perl gorgone/docs/ @centreon/owners-doc +.github/** @centreon/owners-pipelines +**/packaging/** @centreon/owners-pipelines +**/selinux/** @centreon/owners-pipelines + +tests/** @centreon/owners-robot-e2e + gorgone/tests/robot/config/ @centreon/owners-perl -*.pm @centreon/owners-perl -*.pl @centreon/owners-perl diff --git a/.github/workflows/gorgone.yml b/.github/workflows/gorgone.yml index a39dde94490..b4725d17df7 100644 --- a/.github/workflows/gorgone.yml +++ b/.github/workflows/gorgone.yml @@ -214,11 +214,9 @@ jobs: run: | if [[ "${{ matrix.package_extension }}" == "deb" ]]; then apt update - apt install -y ./centreon-gorgone*${{ steps.parse-distrib.outputs.package_distrib_name }}* + apt install -y ./centreon-gorgone*.deb else - dnf install -y ./centreon-gorgone*${{ steps.parse-distrib.outputs.package_distrib_name }}* ./centreon-gorgone-centreon-config*${{ steps.parse-distrib.outputs.package_distrib_name }}* - # in el8 at least, there is a package for the configuration and a package for the actual code. - # this is not the case for debian, and for now I don't know why it was made any different between the 2 Os. + dnf install -y ./centreon-gorgone*.rpm fi - name: Create databases diff --git a/gorgone/packaging/centreon-gorgone-centreon-config.yaml b/gorgone/packaging/centreon-gorgone-centreon-config.yaml index b6485a84883..85b0872b544 100644 --- a/gorgone/packaging/centreon-gorgone-centreon-config.yaml +++ b/gorgone/packaging/centreon-gorgone-centreon-config.yaml @@ -53,9 +53,11 @@ overrides: rpm: depends: - centreon-gorgone = ${VERSION}-${RELEASE}${DIST} + - centreon-common deb: depends: - centreon-gorgone (= ${VERSION}-${RELEASE}${DIST}) + - centreon-common replaces: - centreon-gorgone (<< 24.10.0) diff --git a/gorgone/packaging/centreon-gorgone.yaml b/gorgone/packaging/centreon-gorgone.yaml index 16e86d297b1..1c11036142a 100644 --- a/gorgone/packaging/centreon-gorgone.yaml +++ b/gorgone/packaging/centreon-gorgone.yaml @@ -156,7 +156,6 @@ scripts: overrides: rpm: depends: - - centreon-common - bzip2 - perl-Libssh-Session >= 0.8 - perl-CryptX @@ -194,8 +193,7 @@ overrides: - perl(lib) - perl(Safe) deb: - depends: # those dependencies are taken from centreon-gorgone/packaging/debian/control - - centreon-common + depends: - libdatetime-perl - libtime-parsedate-perl - libtry-tiny-perl From 68f870978111c4052c48064f7c7b38653658145a Mon Sep 17 00:00:00 2001 From: Kevin Duret Date: Wed, 6 Nov 2024 11:41:31 +0100 Subject: [PATCH 07/13] fix(ci): remove remaining release type condition (#1839) --- .github/actions/delivery/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/delivery/action.yml b/.github/actions/delivery/action.yml index 9b6328723e9..663b1f35549 100644 --- a/.github/actions/delivery/action.yml +++ b/.github/actions/delivery/action.yml @@ -146,7 +146,7 @@ runs: echo "[DEBUG] - stability: ${{ inputs.stability }}" # Make sure all required inputs are NOT empty - if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z "${{ inputs.is_cloud }}" || -z "${{ inputs.release_type }}" ]]; then + if [[ -z "${{ inputs.module_name }}" || -z "${{ inputs.distrib }}" || -z ${{ inputs.stability }} || -z ${{ inputs.major_version }} || -z "${{ inputs.is_cloud }}" ]]; then echo "Some mandatory inputs are empty, please check the logs." exit 1 fi From c42030814b1df34c28edab8b74a0fad13eeec1f6 Mon Sep 17 00:00:00 2001 From: jean-christophe81 <98889244+jean-christophe81@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:18:43 +0100 Subject: [PATCH 08/13] Mon 152485 windows cpu native check backport 24.10 (#1854) * native cpu check code moved to a template (#1779) * MON-149537 windows check cpu (#1778) * implement windows native cpu add second_max_reconnect_backoff agent parameter a few things * fix compile and second_max_reconnect_backoff issues * fix clang compile issue --- agent/doc/agent-doc.md | 9 +- agent/inc/com/centreon/agent/config.hh | 4 + .../centreon/agent/native_check_cpu_base.hh | 246 +++++++++ .../inc/com/centreon/agent/check_cpu.hh | 135 +---- agent/native_linux/src/check_cpu.cc | 415 +++----------- .../inc/com/centreon/agent/check_cpu.hh | 137 ++++- agent/native_windows/src/check_cpu.cc | 520 ++++++++++++++++++ agent/src/config.cc | 7 + agent/src/config_win.cc | 7 +- agent/src/main.cc | 2 +- agent/src/main_win.cc | 2 +- agent/src/native_check_cpu_base.cc | 452 +++++++++++++++ agent/src/scheduler.cc | 6 - agent/test/CMakeLists.txt | 2 +- agent/test/check_linux_cpu_test.cc | 78 +-- agent/test/check_windows_cpu_test.cc | 501 +++++++++++++++++ bbdo/CMakeLists.txt | 2 +- .../com/centreon/common/grpc/grpc_config.hh | 50 +- common/grpc/src/grpc_client.cc | 5 + .../modules/opentelemetry/src/grpc_config.cc | 11 +- tests/broker-engine/cma.robot | 6 +- tests/resources/Common.py | 4 +- 22 files changed, 2053 insertions(+), 548 deletions(-) create mode 100644 agent/inc/com/centreon/agent/native_check_cpu_base.hh create mode 100644 agent/src/native_check_cpu_base.cc create mode 100644 agent/test/check_windows_cpu_test.cc diff --git a/agent/doc/agent-doc.md b/agent/doc/agent-doc.md index 52a0de209ee..cc6ea0a4eb6 100644 --- a/agent/doc/agent-doc.md +++ b/agent/doc/agent-doc.md @@ -107,4 +107,11 @@ Example of perfdatas in cpu-detailed mode: * 0~system#core.cpu.utilization.percentage * 1~interrupt#core.cpu.utilization.percentage * iowait#cpu.utilization.percentage -* used#cpu.utilization.percentage \ No newline at end of file +* used#cpu.utilization.percentage + +### native_check_cpu (windows version) +metrics aren't the same as linux version. We collect user, idle, kernel , interrupt and dpc times. + +There are two methods, you can use internal microsoft function NtQuerySystemInformation. Yes Microsoft says that they can change signature or data format at any moment, but it's quite stable for many years. A trick, idle time is included un kernel time, so we subtract first from the second. Dpc time is yet included in interrupt time, so we don't sum it to calculate total time. +The second one relies on performance data counters (pdh API), it gives us percentage despite that sum of percentage is not quite 100%. That's why the default method is the first one. +The choice between the two methods is done by 'use-nt-query-system-information' boolean parameter. \ No newline at end of file diff --git a/agent/inc/com/centreon/agent/config.hh b/agent/inc/com/centreon/agent/config.hh index 0cd7b9d4821..6808041f316 100644 --- a/agent/inc/com/centreon/agent/config.hh +++ b/agent/inc/com/centreon/agent/config.hh @@ -43,6 +43,7 @@ class config { std::string _ca_name; std::string _host; bool _reverse_connection; + unsigned _second_max_reconnect_backoff; public: config(const std::string& path); @@ -63,6 +64,9 @@ class config { const std::string& get_ca_name() const { return _ca_name; } const std::string& get_host() const { return _host; } bool use_reverse_connection() const { return _reverse_connection; } + unsigned get_second_max_reconnect_backoff() const { + return _second_max_reconnect_backoff; + } }; }; // namespace com::centreon::agent diff --git a/agent/inc/com/centreon/agent/native_check_cpu_base.hh b/agent/inc/com/centreon/agent/native_check_cpu_base.hh new file mode 100644 index 00000000000..32131bb30d8 --- /dev/null +++ b/agent/inc/com/centreon/agent/native_check_cpu_base.hh @@ -0,0 +1,246 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_NATIVE_CHECK_CPU_BASE_HH +#define CENTREON_AGENT_NATIVE_CHECK_CPU_BASE_HH + +#include "check.hh" + +namespace com::centreon::agent { + +namespace check_cpu_detail { +// all data is indexed by processor number, this fake index points to cpus +// average +constexpr unsigned average_cpu_index = std::numeric_limits::max(); + +/** + * @brief this class contains all counter for one core + * + * @tparam nb_metric number of metrics given by the kernel + */ +template +class per_cpu_time_base { + protected: + std::array _metrics; + uint64_t _total_used = 0; + uint64_t _total = 0; + + public: + per_cpu_time_base(); + + double get_proportional_value(unsigned data_index) const { + if (!_total || data_index >= nb_metric) { + return 0.0; + } + return (static_cast(_metrics[data_index])) / _total; + } + + double get_proportional_used() const { + if (!_total) { + return 0.0; + } + return (static_cast(_total_used)) / _total; + } + + /** + * @brief Set the metric object + * + * @param index index of the metric like user or cpu + * @param value + */ + void set_metric(unsigned index, uint64_t value) { + if (index < nb_metric) { + _metrics[index] = value; + } + } + + /** + * @brief Set the metric object and add value to the total + * + * @param index index of the metric like user or cpu + * @param value + */ + void set_metric_total(unsigned index, uint64_t value) { + if (index < nb_metric) { + _metrics[index] = value; + _total += value; + } + } + + /** + * @brief Set the metric object and add value to the total and total_used + * + * @param index index of the metric like user or cpu + * @param value + */ + void set_metric_total_used(unsigned index, uint64_t value) { + if (index < nb_metric) { + _metrics[index] = value; + _total_used += value; + _total += value; + } + } + + void set_total(uint64_t total) { _total = total; } + + void set_total_used(uint64_t total_used) { _total_used = total_used; } + + uint64_t get_total() const { return _total; } + + void dump(const unsigned& cpu_index, + const std::string_view metric_label[], + std::string* output) const; + + void dump_values(std::string* output) const; + + void subtract(const per_cpu_time_base& to_subtract); + + void add(const per_cpu_time_base& to_add); +}; + +template +using index_to_cpu = + boost::container::flat_map>; + +/** + * @brief contains one per_cpu_time_base per core and a total one + * + * @tparam nb_metric number of metrics given by the kernel + */ +template +class cpu_time_snapshot { + protected: + index_to_cpu _data; + + public: + index_to_cpu subtract(const cpu_time_snapshot& to_subtract) const; + + const index_to_cpu& get_values() const { return _data; } + + void dump(std::string* output) const; +}; + +/** + * @brief this little class compare cpu usages values to threshold and set + * plugin status + * + */ +template +class cpu_to_status { + e_status _status; + unsigned _data_index; + bool _average; + double _threshold; + + public: + cpu_to_status(e_status status, + unsigned data_index, + bool average, + double threshold) + : _status(status), + _data_index(data_index), + _average(average), + _threshold(threshold) {} + + unsigned get_proc_stat_index() const { return _data_index; } + bool is_critical() const { return _status == e_status::critical; } + bool is_average() const { return _average; } + double get_threshold() const { return _threshold; } + e_status get_status() const { return _status; } + + void compute_status( + const index_to_cpu& to_test, + boost::container::flat_map* per_cpu_status) const; +}; + +} // namespace check_cpu_detail + +/** + * @brief native cpu check base class + * + * @tparam nb_metric + */ +template +class native_check_cpu : public check { + protected: + unsigned _nb_core; + + /** + * @brief key used to store cpu_to_status + * @tparam 1 index (user, system, iowait.... and idle for all except idle) + * @tparam 2 true if average, false if per core + * @tparam 3 e_status warning or critical + * + */ + using cpu_to_status_key = std::tuple; + + boost::container::flat_map> + _cpu_to_status; + + bool _cpu_detailed; + + asio::system_timer _measure_timer; + + void _measure_timer_handler( + const boost::system::error_code& err, + unsigned start_check_index, + std::unique_ptr>&& + first_measure); + + e_status _compute( + const check_cpu_detail::cpu_time_snapshot& first_measure, + const check_cpu_detail::cpu_time_snapshot& second_measure, + const std::string_view summary_labels[], + const std::string_view perfdata_labels[], + std::string* output, + std::list* perfs); + + public: + native_check_cpu(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler); + + virtual ~native_check_cpu() = default; + + std::shared_ptr> shared_from_this() { + return std::static_pointer_cast>( + check::shared_from_this()); + } + + virtual std::unique_ptr> + get_cpu_time_snapshot(bool first_measure) = 0; + + void start_check(const duration& timeout) override; + + virtual e_status compute( + const check_cpu_detail::cpu_time_snapshot& first_measure, + const check_cpu_detail::cpu_time_snapshot& second_measure, + std::string* output, + std::list* perfs) = 0; +}; +} // namespace com::centreon::agent + +#endif diff --git a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh index e328055dae1..34750e2edad 100644 --- a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh @@ -19,15 +19,11 @@ #ifndef CENTREON_AGENT_CHECK_CPU_HH #define CENTREON_AGENT_CHECK_CPU_HH -#include "check.hh" +#include "native_check_cpu_base.hh" namespace com::centreon::agent { namespace check_cpu_detail { -// all data is indexed by processor number, this fake index points to cpus -// average value -constexpr unsigned average_cpu_index = std::numeric_limits::max(); - enum e_proc_stat_index { user = 0, nice, @@ -39,8 +35,6 @@ enum e_proc_stat_index { steal, guest, guest_nice, - used, // used = user + nice + system + iowait+ irq + soft_irq + steal + - // guest+ guest_nice nb_field }; @@ -51,103 +45,25 @@ enum e_proc_stat_index { * of all cpus * */ -class per_cpu_time { - static constexpr size_t nb_field = e_proc_stat_index::nb_field; - unsigned _data[nb_field]; +class per_cpu_time : public per_cpu_time_base { unsigned _cpu_index = 0; - unsigned _total = 0; public: + per_cpu_time() = default; per_cpu_time(const std::string_view& line); - per_cpu_time() {} unsigned get_cpu_index() const { return _cpu_index; } - unsigned get_user() const { return _data[e_proc_stat_index::user]; } - unsigned get_nice() const { return _data[e_proc_stat_index::nice]; } - unsigned get_idle() const { return _data[e_proc_stat_index::idle]; } - unsigned get_iowait() const { return _data[e_proc_stat_index::iowait]; } - unsigned get_irq() const { return _data[e_proc_stat_index::irq]; } - unsigned get_soft_irq() const { return _data[e_proc_stat_index::soft_irq]; } - unsigned get_steal() const { return _data[e_proc_stat_index::steal]; } - unsigned get_guest() const { return _data[e_proc_stat_index::guest]; } - unsigned get_guest_nice() const { - return _data[e_proc_stat_index::guest_nice]; - } - - unsigned get_value(e_proc_stat_index data_index) const { - return _data[data_index]; - } - - double get_proportional_value(unsigned data_index) const { - return (static_cast(_data[data_index])) / _total; - } - - unsigned get_total() const { return _total; } - - per_cpu_time& operator-=(const per_cpu_time& to_add); - - void dump(std::string* output) const; }; -/** - * @brief cpu statistics index by cpu number (cpu0,1...) - * a special index average_cpu_index is the cpus average given by first line of - * /proc/stat - * - */ -using index_to_cpu = boost::container::flat_map; - -void dump(const index_to_cpu& cpus, std::string* output); - /** * @brief datas of /proc/stat * */ -class proc_stat_file { - index_to_cpu _values; - +class proc_stat_file : public cpu_time_snapshot { public: proc_stat_file(size_t nb_to_reserve) : proc_stat_file("/proc/stat", nb_to_reserve) {} proc_stat_file(const char* proc_file, size_t nb_to_reserve); - - const index_to_cpu& get_values() const { return _values; } - - index_to_cpu operator-(const proc_stat_file& right) const; - - void dump(std::string* output) const; -}; - -/** - * @brief this little class compare cpu usages values to threshold and set - * plugin status - * - */ -class cpu_to_status { - e_status _status; - e_proc_stat_index _data_index; - bool _average; - double _threshold; - - public: - cpu_to_status(e_status status, - e_proc_stat_index data_index, - bool average, - double threshold) - : _status(status), - _data_index(data_index), - _average(average), - _threshold(threshold) {} - - e_proc_stat_index get_proc_stat_index() const { return _data_index; } - bool is_critical() const { return _status == e_status::critical; } - bool is_average() const { return _average; } - double get_threshold() const { return _threshold; } - e_status get_status() const { return _status; } - - void compute_status( - const index_to_cpu& to_test, - boost::container::flat_map* per_cpu_status) const; }; }; // namespace check_cpu_detail @@ -158,30 +74,8 @@ class cpu_to_status { * when a check starts, we read last measure and passed it to completion_handler * If we not have yet done a measure, we wait to timeout to calculate cpu usage */ -class check_cpu : public check { - unsigned _nb_core; - - bool _cpu_detailed; - - /** - * @brief key used to store cpu_to_status - * @tparam 1 index (user, system, iowait.... and idle for all except idle) - * @tparam 2 true if average, false if per core - * @tparam 3 e_status warning or critical - * - */ - using cpu_to_status_key = std::tuple; - - boost::container::flat_map - _cpu_to_status; - - asio::system_timer _measure_timer; - - void _measure_timer_handler( - const boost::system::error_code& err, - unsigned start_check_index, - std::unique_ptr&& first_measure); - +class check_cpu + : public native_check_cpu { public: check_cpu(const std::shared_ptr& io_context, const std::shared_ptr& logger, @@ -196,16 +90,21 @@ class check_cpu : public check { static void help(std::ostream& help_stream); - void start_check(const duration& timeout) override; - std::shared_ptr shared_from_this() { return std::static_pointer_cast(check::shared_from_this()); } - e_status compute(const check_cpu_detail::proc_stat_file& first_measure, - const check_cpu_detail::proc_stat_file& second_measure, - std::string* output, - std::list* perfs); + std::unique_ptr> + get_cpu_time_snapshot(bool first_measure) override; + + e_status compute( + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& first_measure, + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& second_measure, + std::string* output, + std::list* perfs) override; }; } // namespace com::centreon::agent diff --git a/agent/native_linux/src/check_cpu.cc b/agent/native_linux/src/check_cpu.cc index 153f9c66d95..89995fda77b 100644 --- a/agent/native_linux/src/check_cpu.cc +++ b/agent/native_linux/src/check_cpu.cc @@ -17,11 +17,16 @@ */ #include "check_cpu.hh" -#include "com/centreon/common/rapidjson_helper.hh" + +#include "native_check_cpu_base.cc" using namespace com::centreon::agent; using namespace com::centreon::agent::check_cpu_detail; +namespace com::centreon::agent::check_cpu_detail { +template class per_cpu_time_base; +} + /** * @brief Construct a new per cpu time::per cpu time object * it parses a line like cpu0 2930565 15541 1250726 10453908 54490 0 27068 0 0 0 @@ -40,9 +45,8 @@ per_cpu_time::per_cpu_time(const std::string_view& line) { _cpu_index = check_cpu_detail::average_cpu_index; } - unsigned* to_fill = _data; - unsigned* end = _data + used; // used will be calculated after - + auto to_fill = _metrics.begin(); + auto end = _metrics.end(); for (++field_iter; field_iter != split_res.end(); ++field_iter, ++to_fill) { unsigned counter; if (!absl::SimpleAtoi(*field_iter, &counter)) { @@ -62,71 +66,7 @@ per_cpu_time::per_cpu_time(const std::string_view& line) { *to_fill = 0; // Calculate the 'used' CPU time by subtracting idle time from total time - _data[e_proc_stat_index::used] = _total - _data[e_proc_stat_index::idle]; -} - -/** - * @brief substract all fields and _total - * - * @param to_add - * @return per_cpu_time& (this) - */ -per_cpu_time& per_cpu_time::operator-=(const per_cpu_time& to_substract) { - unsigned* res = _data; - unsigned* end = _data + nb_field; - const unsigned* val_to_substract = to_substract._data; - for (; res < end; ++res, ++val_to_substract) { - if (*res > *val_to_substract) { - *res -= *val_to_substract; - } else { - *res = 0; - } - } - if (_total > to_substract._total) { - _total -= to_substract._total; - } else { - _total = 1; // no 0 divide - } - return *this; -} - -constexpr std::array - _sz_stat_index = {", User ", ", Nice ", ", System ", ", Idle ", - ", IOWait ", ", Interrupt ", ", Soft Irq ", ", Steal ", - ", Guest ", ", Guest Nice ", ", Usage"}; - -/** - * @brief print values summary to plugin output - * - * @param output plugin out - */ -void per_cpu_time::dump(std::string* output) const { - using namespace std::literals; - if (_cpu_index == check_cpu_detail::average_cpu_index) { - *output += - fmt::format("CPU(s) average Usage: {:.2f}%", - get_proportional_value(e_proc_stat_index::used) * 100); - } else { - *output += - fmt::format("CPU'{}' Usage: {:.2f}%", _cpu_index, - get_proportional_value(e_proc_stat_index::used) * 100); - } - - for (unsigned field_index = 0; field_index < e_proc_stat_index::used; - ++field_index) { - *output += _sz_stat_index[field_index]; - *output += - fmt::format("{:.2f}%", get_proportional_value(field_index) * 100); - } -} - -void com::centreon::agent::check_cpu_detail::dump(const index_to_cpu& cpus, - std::string* output) { - output->reserve(output->length() + cpus.size() * 256); - for (const auto& cpu : cpus) { - cpu.second.dump(output); - output->push_back('\n'); - } + _total_used = _total - _metrics[e_proc_stat_index::idle]; } /** @@ -137,7 +77,7 @@ void com::centreon::agent::check_cpu_detail::dump(const index_to_cpu& cpus, * @param nb_to_reserve nb host cores */ proc_stat_file::proc_stat_file(const char* proc_file, size_t nb_to_reserve) { - _values.reserve(nb_to_reserve); + _data.reserve(nb_to_reserve + 1); std::ifstream proc_stat(proc_file); char line_buff[1024]; while (1) { @@ -145,109 +85,41 @@ proc_stat_file::proc_stat_file(const char* proc_file, size_t nb_to_reserve) { proc_stat.getline(line_buff, sizeof(line_buff)); line_buff[1023] = 0; per_cpu_time to_ins(line_buff); - _values.emplace(to_ins.get_cpu_index(), to_ins); + _data.emplace(to_ins.get_cpu_index(), to_ins); } catch (const std::exception&) { return; } } } -/** - * @brief computes difference between two snapshots of /proc/stat - * - * @param right (older snapshot) - * @return index_to_cpu by cpu difference - */ -index_to_cpu proc_stat_file::operator-(const proc_stat_file& right) const { - index_to_cpu ret; - const auto& latest_values = _values; - const auto& older_values = right.get_values(); - for (const auto& latest_cpu : latest_values) { - auto search = older_values.find(latest_cpu.first); - if (search != older_values.end()) { - per_cpu_time to_ins(latest_cpu.second); - to_ins -= search->second; - ret.emplace(latest_cpu.first, to_ins); - } - } - return ret; -} - -/** - * @brief dump - * - * @param output - */ -void proc_stat_file::dump(std::string* output) const { - for (const auto& cpu : _values) { - cpu.second.dump(output); - output->push_back('\n'); - } -} - -/** - * @brief compare cpu values to a threshold and update cpu status if field value - * > threshold - * - * @param to_test cpus usage to compare - * @param per_cpu_status out parameter that contains per cpu worst status - */ -void cpu_to_status::compute_status( - const index_to_cpu& to_test, - boost::container::flat_map* per_cpu_status) const { - auto check_threshold = [&, this](const index_to_cpu::value_type& values) { - double val = values.second.get_proportional_value(_data_index); - if (val > _threshold) { - auto& to_update = (*per_cpu_status)[values.first]; - // if ok (=0) and _status is warning (=1) or critical(=2), we update - if (_status > to_update) { - to_update = _status; - } - } - }; - - if (_average) { - index_to_cpu::const_iterator avg = - to_test.find(check_cpu_detail::average_cpu_index); - if (avg == to_test.end()) { - return; - } - check_threshold(*avg); - } else { - for (const auto& by_cpu : to_test) { - if (by_cpu.first == check_cpu_detail::average_cpu_index) { - continue; - } - check_threshold(by_cpu); - } - } -} +using linux_cpu_to_status = cpu_to_status; using cpu_to_status_constructor = - std::function; + std::function; #define BY_TYPE_CPU_TO_STATUS(TYPE_METRIC) \ {"warning-core-" #TYPE_METRIC, \ [](double threshold) { \ - return cpu_to_status(e_status::warning, e_proc_stat_index::TYPE_METRIC, \ - false, threshold); \ + return linux_cpu_to_status( \ + e_status::warning, e_proc_stat_index::TYPE_METRIC, false, threshold); \ }}, \ {"critical-core-" #TYPE_METRIC, \ [](double threshold) { \ - return cpu_to_status(e_status::critical, \ - e_proc_stat_index::TYPE_METRIC, false, \ - threshold); \ + return linux_cpu_to_status(e_status::critical, \ + e_proc_stat_index::TYPE_METRIC, false, \ + threshold); \ }}, \ {"warning-average-" #TYPE_METRIC, \ [](double threshold) { \ - return cpu_to_status(e_status::warning, \ - e_proc_stat_index::TYPE_METRIC, true, \ - threshold); \ + return linux_cpu_to_status(e_status::warning, \ + e_proc_stat_index::TYPE_METRIC, true, \ + threshold); \ }}, \ { \ "critical-average-" #TYPE_METRIC, [](double threshold) { \ - return cpu_to_status(e_status::critical, e_proc_stat_index::TYPE_METRIC, \ - true, threshold); \ + return linux_cpu_to_status(e_status::critical, \ + e_proc_stat_index::TYPE_METRIC, true, \ + threshold); \ } \ } @@ -260,23 +132,26 @@ static const absl::flat_hash_map _label_to_cpu_to_status = { {"warning-core", [](double threshold) { - return cpu_to_status(e_status::warning, e_proc_stat_index::used, - false, threshold); + return linux_cpu_to_status(e_status::warning, + e_proc_stat_index::nb_field, false, + threshold); }}, {"critical-core", [](double threshold) { - return cpu_to_status(e_status::critical, e_proc_stat_index::used, - false, threshold); + return linux_cpu_to_status(e_status::critical, + e_proc_stat_index::nb_field, false, + threshold); }}, {"warning-average", [](double threshold) { - return cpu_to_status(e_status::warning, e_proc_stat_index::used, - true, threshold); + return linux_cpu_to_status( + e_status::warning, e_proc_stat_index::nb_field, true, threshold); }}, {"critical-average", [](double threshold) { - return cpu_to_status(e_status::critical, e_proc_stat_index::used, - true, threshold); + return linux_cpu_to_status(e_status::critical, + e_proc_stat_index::nb_field, true, + threshold); }}, BY_TYPE_CPU_TO_STATUS(user), BY_TYPE_CPU_TO_STATUS(nice), @@ -309,19 +184,19 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, check::completion_handler&& handler) - : check(io_context, - logger, - first_start_expected, - check_interval, - serv, - cmd_name, - cmd_line, - cnf, - std::move(handler)), - - _nb_core(std::thread::hardware_concurrency()), - _cpu_detailed(false), - _measure_timer(*io_context) { + : native_check_cpu( + io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + args, + cnf, + std::move(handler)) + +{ com::centreon::common::rapidjson_helper arg(args); if (args.IsObject()) { for (auto member_iter = args.MemberBegin(); member_iter != args.MemberEnd(); @@ -363,9 +238,7 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, "command: {}, bad value for parameter {}: {}", cmd_name, member_iter->name, val); } - } else if (member_iter->name == "cpu-detailed") { - _cpu_detailed = arg.get_bool("cpu-detailed", false); - } else { + } else if (member_iter->name != "cpu-detailed") { SPDLOG_LOGGER_ERROR(logger, "command: {}, unknown parameter: {}", cmd_name, member_iter->name); } @@ -373,75 +246,22 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, } } -/** - * @brief start a measure - * measure duration is the min of timeout - 1s, check_interval - 1s - * - * @param timeout - */ -void check_cpu::start_check(const duration& timeout) { - if (!check::_start_check(timeout)) { - return; - } - - std::unique_ptr begin = - std::make_unique(_nb_core); - - time_point end_measure = std::chrono::system_clock::now() + timeout; - time_point end_measure_period = - get_start_expected() + - std::chrono::seconds(get_conf()->config().check_interval()); - - if (end_measure > end_measure_period) { - end_measure = end_measure_period; - } - - end_measure -= std::chrono::seconds(1); - - _measure_timer.expires_at(end_measure); - _measure_timer.async_wait([me = shared_from_this(), - first_measure = std::move(begin), - start_check_index = _get_running_check_index()]( - const boost::system::error_code& err) mutable { - me->_measure_timer_handler(err, start_check_index, - std::move(first_measure)); - }); +std::unique_ptr< + check_cpu_detail::cpu_time_snapshot> +check_cpu::get_cpu_time_snapshot([[maybe_unused]] bool first_measure) { + return std::make_unique(_nb_core); } -constexpr std::array _sz_status = { - "OK: ", "WARNING: ", "CRITICAL: ", "UNKNOWN: "}; - -constexpr std::array - _sz_measure_name = {"user", "nice", "system", "idle", - "iowait", "interrupt", "softirq", "steal", - "guest", "guestnice", "used"}; - -/** - * @brief called at measure timer expiration - * Then we take a new snapshot of /proc/stat, compute difference with - * first_measure and generate output and perfdatas - * - * @param err asio error - * @param start_check_index passed to on_completion to validate result - * @param first_measure first snapshot to compare - */ -void check_cpu::_measure_timer_handler( - const boost::system::error_code& err, - unsigned start_check_index, - std::unique_ptr&& first_measure) { - if (err) { - return; - } - - std::string output; - std::list perfs; - - proc_stat_file new_measure(_nb_core); - - e_status worst = compute(*first_measure, new_measure, &output, &perfs); +constexpr std::array + _sz_summary_labels = {", User ", ", Nice ", ", System ", + ", Idle ", ", IOWait ", ", Interrupt ", + ", Soft Irq ", ", Steal ", ", Guest ", + ", Guest Nice "}; - on_completion(start_check_index, worst, perfs, {output}); -} +constexpr std::array + _sz_perfdata_name = {"user", "nice", "system", "idle", + "iowait", "interrupt", "softirq", "steal", + "guest", "guestnice"}; /** * @brief compute the difference between second_measure and first_measure and @@ -454,111 +274,14 @@ void check_cpu::_measure_timer_handler( * @return e_status plugin out status */ e_status check_cpu::compute( - const check_cpu_detail::proc_stat_file& first_measure, - const check_cpu_detail::proc_stat_file& second_measure, + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& first_measure, + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& second_measure, std::string* output, std::list* perfs) { - index_to_cpu delta = second_measure - first_measure; + output->reserve(256 * _nb_core); - // we need to know per cpu status to provide no ok cpu details - boost::container::flat_map by_proc_status; - - for (const auto& checker : _cpu_to_status) { - checker.second.compute_status(delta, &by_proc_status); - } - - e_status worst = e_status::ok; - for (const auto& to_cmp : by_proc_status.sequence()) { - if (to_cmp.second > worst) { - worst = to_cmp.second; - } - } - - if (worst == e_status::ok) { // all is ok - auto average_data = delta.find(check_cpu_detail::average_cpu_index); - if (average_data != delta.end()) { - *output = fmt::format( - "OK: CPU(s) average usage is {:.2f}%", - average_data->second.get_proportional_value(e_proc_stat_index::used) * - 100); - } else { - *output = "OK: CPUs usages are ok."; - } - } else { - bool first = true; - // not all cpus ok => display detail per cpu nok - for (const auto& cpu_status : by_proc_status) { - if (cpu_status.second != e_status::ok) { - if (first) { - first = false; - } else { - output->push_back(' '); - } - *output += _sz_status[cpu_status.second]; - delta[cpu_status.first].dump(output); - } - } - } - - auto fill_perfdata = [&, this](const std::string_view& label, unsigned index, - unsigned cpu_index, - const per_cpu_time& per_cpu_data) { - double val = per_cpu_data.get_proportional_value(index); - bool is_average = cpu_index == check_cpu_detail::average_cpu_index; - common::perfdata to_add; - to_add.name(label); - to_add.unit("%"); - to_add.min(0); - to_add.max(100); - to_add.value(val * 100); - // we search cpu_to_status to get warning and critical thresholds - // warning - auto cpu_to_status_search = _cpu_to_status.find( - std::make_tuple(index, is_average, e_status::warning)); - if (cpu_to_status_search != _cpu_to_status.end()) { - to_add.warning_low(0); - to_add.warning(100 * cpu_to_status_search->second.get_threshold()); - } - // critical - cpu_to_status_search = _cpu_to_status.find( - std::make_tuple(index, is_average, e_status::critical)); - if (cpu_to_status_search != _cpu_to_status.end()) { - to_add.critical_low(0); - to_add.critical(100 * cpu_to_status_search->second.get_threshold()); - } - perfs->emplace_back(std::move(to_add)); - }; - - if (_cpu_detailed) { - for (const auto& by_core : delta) { - std::string cpu_name; - const char* suffix; - if (by_core.first != check_cpu_detail::average_cpu_index) { - absl::StrAppend(&cpu_name, by_core.first, "~"); - suffix = "#core.cpu.utilization.percentage"; - } else { - suffix = "#cpu.utilization.percentage"; - } - for (unsigned stat_ind = e_proc_stat_index::user; - stat_ind < e_proc_stat_index::nb_field; ++stat_ind) { - fill_perfdata((cpu_name + _sz_measure_name[stat_ind]) + suffix, - stat_ind, by_core.first, by_core.second); - } - } - - } else { - for (const auto& by_core : delta) { - std::string cpu_name; - if (by_core.first != check_cpu_detail::average_cpu_index) { - absl::StrAppend(&cpu_name, by_core.first, - "#core.cpu.utilization.percentage"); - } else { - cpu_name = "cpu.utilization.percentage"; - } - - fill_perfdata(cpu_name, e_proc_stat_index::used, by_core.first, - by_core.second); - } - } - return worst; + return _compute(first_measure, second_measure, _sz_summary_labels.data(), + _sz_perfdata_name.data(), output, perfs); } diff --git a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh index f11f02b039e..806a6cfca7b 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh @@ -19,6 +19,141 @@ #ifndef CENTREON_AGENT_CHECK_CPU_HH #define CENTREON_AGENT_CHECK_CPU_HH -namespace com::centreon::agent {} +#include "native_check_cpu_base.hh" +namespace com::centreon::agent { + +namespace check_cpu_detail { +enum e_proc_stat_index { user = 0, system, idle, interrupt, dpc, nb_field }; + +/**As winternl.h may be included, we define our own + * SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION */ +struct M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION { + LARGE_INTEGER IdleTime; + LARGE_INTEGER KernelTime; + LARGE_INTEGER UserTime; + LARGE_INTEGER DpcTime; + LARGE_INTEGER InterruptTime; + ULONG InterruptCount; +}; + +/** + * @brief this class contains all counter for one core contained in a + * M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION structure + */ +class kernel_per_cpu_time + : public per_cpu_time_base { + public: + kernel_per_cpu_time() = default; + + kernel_per_cpu_time(const M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION& info); +}; + +/** + * we can collect cpu metrics in two manners, the first one is to use +microsoft + * internal NtQuerySystemInformation, the second one is to use the official + * Performance Data Helper + * So we have two classes to collect metrics +** / + +/** + * @brief metrics collected by NtQuerySystemInformation + * + */ +class kernel_cpu_time_snapshot + : public cpu_time_snapshot { + public: + kernel_cpu_time_snapshot(unsigned nb_core); + + // used by TU + template + kernel_cpu_time_snapshot(processor_performance_info_iter begin, + processor_performance_info_iter end); + + void dump(std::string* output) const; +}; + +template +kernel_cpu_time_snapshot::kernel_cpu_time_snapshot( + processor_performance_info_iter begin, + processor_performance_info_iter end) { + unsigned cpu_index = 0; + for (processor_performance_info_iter it = begin; it != end; + ++it, ++cpu_index) { + _data[cpu_index] = kernel_per_cpu_time(*it); + } + + per_cpu_time_base& total = + _data[average_cpu_index]; + for (auto to_add_iter = _data.begin(); + to_add_iter != _data.end() && to_add_iter->first != average_cpu_index; + ++to_add_iter) { + total.add(to_add_iter->second); + } +} + +struct pdh_counters; + +/** + * @brief metrics collected by Performance Data Helper + * + */ +class pdh_cpu_time_snapshot + : public cpu_time_snapshot { + public: + pdh_cpu_time_snapshot(unsigned nb_core, + const pdh_counters& counters, + bool first_measure); +}; + +} // namespace check_cpu_detail + +/** + * @brief native windows check cpu + * + */ +class check_cpu + : public native_check_cpu { + // this check can collect metrics in two manners, the first one is to use the + // unofficial NtQuerySystemInformation, the second one is to use the official + // Performance Data Helper + bool _use_nt_query_system_information = true; + + std::unique_ptr _pdh_counters; + + public: + check_cpu(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler); + + ~check_cpu(); + + static void help(std::ostream& help_stream); + + std::shared_ptr shared_from_this() { + return std::static_pointer_cast(check::shared_from_this()); + } + + std::unique_ptr> + check_cpu::get_cpu_time_snapshot(bool first_measure) override; + + e_status compute( + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& first_measure, + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& second_measure, + std::string* output, + std::list* perfs) override; +}; + +} // namespace com::centreon::agent #endif diff --git a/agent/native_windows/src/check_cpu.cc b/agent/native_windows/src/check_cpu.cc index e69de29bb2d..37c47bfe8d9 100644 --- a/agent/native_windows/src/check_cpu.cc +++ b/agent/native_windows/src/check_cpu.cc @@ -0,0 +1,520 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include + +#include +#include + +#include "check_cpu.hh" +#include "native_check_cpu_base.cc" + +#include "com/centreon/common/rapidjson_helper.hh" + +#pragma comment(lib, "pdh.lib") + +using namespace com::centreon::agent; +using namespace com::centreon::agent::check_cpu_detail; + +/************************************************************************** + Kernel measure method +***************************************************************************/ + +namespace com::centreon::agent::check_cpu_detail { + +// ntdll.dll handle +static HMODULE _ntdll = nullptr; + +typedef LONG(WINAPI* NtQuerySystemInformationPtr)(ULONG SystemInformationClass, + PVOID SystemInformation, + ULONG SystemInformationLength, + PULONG ReturnLength); + +// NtQuerySystemInformation function address +static NtQuerySystemInformationPtr _nt_query_system_information = nullptr; + +constexpr ULONG SystemProcessorPerformanceInformationClass = 8; + +} // namespace com::centreon::agent::check_cpu_detail + +/** + * @brief Construct a kernel_per_cpu_time object from a + * SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION + * + * @param info SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION collected by + * NtQuerySystemInformation + */ +kernel_per_cpu_time::kernel_per_cpu_time( + const M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION& info) { + _metrics[e_proc_stat_index::user] = info.UserTime.QuadPart; + _metrics[e_proc_stat_index::system] = + info.KernelTime.QuadPart - info.IdleTime.QuadPart; + _metrics[e_proc_stat_index::idle] = info.IdleTime.QuadPart; + _metrics[e_proc_stat_index::interrupt] = info.InterruptTime.QuadPart; + _metrics[e_proc_stat_index::dpc] = info.DpcTime.QuadPart; + _total = _metrics[e_proc_stat_index::user] + + _metrics[e_proc_stat_index::system] + + _metrics[e_proc_stat_index::idle] + + _metrics[e_proc_stat_index::interrupt]; + _total_used = _total - _metrics[e_proc_stat_index::idle]; +} + +/** + * @brief load ntdll.dll and get NtQuerySystemInformation address + * + */ +static void _ntdll_init() { + if (!_ntdll) { + _ntdll = LoadLibraryA("ntdll.dll"); + if (!_ntdll) { + throw std::runtime_error("Failed to load ntdll.dll"); + } + } + + if (!_nt_query_system_information) + // Obtenir le pointeur de fonction NtQuerySystemInformation + _nt_query_system_information = (NtQuerySystemInformationPtr)GetProcAddress( + _ntdll, "NtQuerySystemInformation"); + if (!_nt_query_system_information) { + FreeLibrary(_ntdll); + throw std::runtime_error( + "Failed to get address of NtQuerySystemInformation"); + } +} + +/** + * @brief Construct a new kernel cpu time snapshot::kernel cpu time snapshot + * object it loads alls CPUs time and compute the average + * + * @param nb_core + */ +kernel_cpu_time_snapshot::kernel_cpu_time_snapshot(unsigned nb_core) { + std::unique_ptr buffer( + new M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION[nb_core]); + ULONG buffer_size = + sizeof(M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION) * nb_core; + ULONG return_length = 0; + + memset(buffer.get(), 0, buffer_size); + + if (_nt_query_system_information(SystemProcessorPerformanceInformationClass, + buffer.get(), buffer_size, + &return_length) != 0) { + throw std::runtime_error("Failed to get kernel cpu time"); + } + + for (unsigned i = 0; i < nb_core; ++i) { + _data[i] = kernel_per_cpu_time(buffer[i]); + } + per_cpu_time_base& total = + _data[average_cpu_index]; + for (auto to_add_iter = _data.begin(); + to_add_iter != _data.end() && to_add_iter->first != average_cpu_index; + ++to_add_iter) { + total.add(to_add_iter->second); + } +} + +/** + * @brief used for debug, dump all values + * + * @param output + */ +void kernel_cpu_time_snapshot::dump(std::string* output) const { + cpu_time_snapshot::dump(output); +} + +/************************************************************************** + Pdh measure method +***************************************************************************/ + +namespace com::centreon::agent::check_cpu_detail { +struct pdh_counters { + HQUERY query; + HCOUNTER user; + HCOUNTER idle; + HCOUNTER kernel; + HCOUNTER interrupt; + HCOUNTER dpc; + + pdh_counters(); + + ~pdh_counters(); +}; +} // namespace com::centreon::agent::check_cpu_detail + +pdh_counters::pdh_counters() : query(nullptr) { + if (PdhOpenQuery(nullptr, 0, &query) != ERROR_SUCCESS) { + throw std::runtime_error("Failed to open pdh query"); + } + + if (PdhAddEnglishCounterA(query, "\\Processor(*)\\% User Time", 0, &user) != + ERROR_SUCCESS) { + throw std::runtime_error("Failed to add counter user"); + } + + if (PdhAddEnglishCounterA(query, "\\Processor(*)\\% Idle Time", 0, &idle) != + ERROR_SUCCESS) { + throw std::runtime_error("Failed to add counter idle"); + } + + if (PdhAddEnglishCounterA(query, "\\Processor(*)\\% Privileged Time", 0, + &kernel) != ERROR_SUCCESS) { + throw std::runtime_error("Failed to add counter kernel"); + } + + if (PdhAddEnglishCounterA(query, "\\Processor(*)\\% Interrupt Time", 0, + &interrupt) != ERROR_SUCCESS) { + throw std::runtime_error("Failed to add counter interrupt"); + } + + if (PdhAddEnglishCounterA(query, "\\Processor(*)\\% DPC Time", 0, &dpc) != + ERROR_SUCCESS) { + throw std::runtime_error("Failed to add counter dpc"); + } +} + +pdh_counters::~pdh_counters() { + if (query) + PdhCloseQuery(query); +} + +/** + * @brief Construct a new pdh cpu time snapshot::pdh cpu time snapshot object + * when we use pdh, we collect data twice, the first time we only collect query, + * the second collect and get counters values + * @param nb_core + * @param first_measure if true, we only collect query data + */ +pdh_cpu_time_snapshot::pdh_cpu_time_snapshot(unsigned nb_core, + const pdh_counters& counters, + bool first_measure) { + if (PdhCollectQueryData(counters.query) != ERROR_SUCCESS) { + throw std::runtime_error("Failed to collect query data"); + } + + if (first_measure) { + return; + } + + DWORD orginal_buffer_size = 0; + DWORD item_count = 0; + unsigned cpu_index = 0; + + PDH_STATUS status = + PdhGetFormattedCounterArrayA(counters.user, PDH_FMT_DOUBLE, + &orginal_buffer_size, &item_count, nullptr); + if (status != PDH_MORE_DATA) { + throw exceptions::msg_fmt("Failed to get user pdh counter array size: {:x}", + static_cast(status)); + } + + orginal_buffer_size = + (orginal_buffer_size / sizeof(PDH_FMT_COUNTERVALUE_ITEM_A)) * + sizeof(PDH_FMT_COUNTERVALUE_ITEM_A) + + sizeof(PDH_FMT_COUNTERVALUE_ITEM_A); + std::unique_ptr buffer( + new PDH_FMT_COUNTERVALUE_ITEM_A[orginal_buffer_size / + sizeof(PDH_FMT_COUNTERVALUE_ITEM_A)]); + const PDH_FMT_COUNTERVALUE_ITEM_A* buffer_end = buffer.get() + nb_core + 1; + + DWORD buffer_size = orginal_buffer_size; + if (PdhGetFormattedCounterArrayA(counters.user, PDH_FMT_DOUBLE, &buffer_size, + &item_count, + buffer.get()) == ERROR_SUCCESS) { + for (const PDH_FMT_COUNTERVALUE_ITEM_A* it = buffer.get(); it < buffer_end; + ++it) { + if (!absl::SimpleAtoi(it->szName, &cpu_index)) { + cpu_index = average_cpu_index; + } + // we multiply by 100 to store 2 decimal after comma in an integer + _data[cpu_index].set_metric_total_used(e_proc_stat_index::user, + it->FmtValue.doubleValue * 100); + } + } + + buffer_size = orginal_buffer_size; + if (PdhGetFormattedCounterArrayA(counters.kernel, PDH_FMT_DOUBLE, + &buffer_size, &item_count, + buffer.get()) == ERROR_SUCCESS) { + for (const PDH_FMT_COUNTERVALUE_ITEM_A* it = buffer.get(); it < buffer_end; + ++it) { + if (!absl::SimpleAtoi(it->szName, &cpu_index)) { + cpu_index = average_cpu_index; + } + _data[cpu_index].set_metric_total_used(e_proc_stat_index::system, + it->FmtValue.doubleValue * 100); + } + } + + buffer_size = orginal_buffer_size; + if (PdhGetFormattedCounterArrayA(counters.idle, PDH_FMT_DOUBLE, &buffer_size, + &item_count, + buffer.get()) == ERROR_SUCCESS) { + for (const PDH_FMT_COUNTERVALUE_ITEM_A* it = buffer.get(); it < buffer_end; + ++it) { + if (!absl::SimpleAtoi(it->szName, &cpu_index)) { + cpu_index = average_cpu_index; + } + _data[cpu_index].set_metric_total(e_proc_stat_index::idle, + it->FmtValue.doubleValue * 100); + } + } + + buffer_size = orginal_buffer_size; + if (PdhGetFormattedCounterArrayA(counters.interrupt, PDH_FMT_DOUBLE, + &buffer_size, &item_count, + buffer.get()) == ERROR_SUCCESS) { + for (const PDH_FMT_COUNTERVALUE_ITEM_A* it = buffer.get(); it < buffer_end; + ++it) { + if (!absl::SimpleAtoi(it->szName, &cpu_index)) { + cpu_index = average_cpu_index; + } + _data[cpu_index].set_metric_total_used(e_proc_stat_index::interrupt, + it->FmtValue.doubleValue * 100); + } + } + + buffer_size = orginal_buffer_size; + if (PdhGetFormattedCounterArrayA(counters.dpc, PDH_FMT_DOUBLE, &buffer_size, + &item_count, + buffer.get()) == ERROR_SUCCESS) { + for (const PDH_FMT_COUNTERVALUE_ITEM_A* it = buffer.get(); it < buffer_end; + ++it) { + if (!absl::SimpleAtoi(it->szName, &cpu_index)) { + cpu_index = average_cpu_index; + } + _data[cpu_index].set_metric(e_proc_stat_index::dpc, + it->FmtValue.doubleValue * 100); + } + } +} + +/************************************************************************** + Check cpu +***************************************************************************/ +using windows_cpu_to_status = cpu_to_status; + +using cpu_to_status_constructor = + std::function; + +#define BY_TYPE_CPU_TO_STATUS(TYPE_METRIC) \ + {"warning-core-" #TYPE_METRIC, \ + [](double threshold) { \ + return windows_cpu_to_status( \ + e_status::warning, e_proc_stat_index::TYPE_METRIC, false, threshold); \ + }}, \ + {"critical-core-" #TYPE_METRIC, \ + [](double threshold) { \ + return windows_cpu_to_status(e_status::critical, \ + e_proc_stat_index::TYPE_METRIC, false, \ + threshold); \ + }}, \ + {"warning-average-" #TYPE_METRIC, \ + [](double threshold) { \ + return windows_cpu_to_status(e_status::warning, \ + e_proc_stat_index::TYPE_METRIC, true, \ + threshold); \ + }}, \ + { \ + "critical-average-" #TYPE_METRIC, [](double threshold) { \ + return windows_cpu_to_status(e_status::critical, \ + e_proc_stat_index::TYPE_METRIC, true, \ + threshold); \ + } \ + } + +/** + * @brief this map is used to generate cpus values comparator from check + * configuration fields + * + */ +static const absl::flat_hash_map + _label_to_cpu_to_status = { + {"warning-core", + [](double threshold) { + return windows_cpu_to_status(e_status::warning, + e_proc_stat_index::nb_field, false, + threshold); + }}, + {"critical-core", + [](double threshold) { + return windows_cpu_to_status(e_status::critical, + e_proc_stat_index::nb_field, false, + threshold); + }}, + {"warning-average", + [](double threshold) { + return windows_cpu_to_status( + e_status::warning, e_proc_stat_index::nb_field, true, threshold); + }}, + {"critical-average", + [](double threshold) { + return windows_cpu_to_status(e_status::critical, + e_proc_stat_index::nb_field, true, + threshold); + }}, + BY_TYPE_CPU_TO_STATUS(user), + BY_TYPE_CPU_TO_STATUS(system)}; + +/** + * @brief Construct a new check cpu::check cpu object + * + * @param io_context + * @param logger + * @param first_start_expected start expected + * @param check_interval check interval between two checks (not only this but + * also others) + * @param serv service + * @param cmd_name + * @param cmd_line + * @param args native plugin arguments + * @param cnf engine configuration received object + * @param handler called at measure completion + */ +check_cpu::check_cpu(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler) + : native_check_cpu( + io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + args, + cnf, + std::move(handler)) + +{ + com::centreon::common::rapidjson_helper arg(args); + if (args.IsObject()) { + for (auto member_iter = args.MemberBegin(); member_iter != args.MemberEnd(); + ++member_iter) { + auto cpu_to_status_search = _label_to_cpu_to_status.find( + absl::AsciiStrToLower(member_iter->name.GetString())); + if (cpu_to_status_search != _label_to_cpu_to_status.end()) { + const rapidjson::Value& val = member_iter->value; + if (val.IsFloat() || val.IsInt() || val.IsUint() || val.IsInt64() || + val.IsUint64()) { + check_cpu_detail::cpu_to_status cpu_checker = + cpu_to_status_search->second(member_iter->value.GetDouble() / + 100); + _cpu_to_status.emplace( + std::make_tuple(cpu_checker.get_proc_stat_index(), + cpu_checker.is_average(), + cpu_checker.get_status()), + cpu_checker); + } else if (val.IsString()) { + auto to_conv = val.GetString(); + double dval; + if (absl::SimpleAtod(to_conv, &dval)) { + check_cpu_detail::cpu_to_status cpu_checker = + cpu_to_status_search->second(dval / 100); + _cpu_to_status.emplace( + std::make_tuple(cpu_checker.get_proc_stat_index(), + cpu_checker.is_average(), + cpu_checker.get_status()), + cpu_checker); + } else { + SPDLOG_LOGGER_ERROR( + logger, + "command: {}, value is not a number for parameter {}: {}", + cmd_name, member_iter->name, val); + } + + } else { + SPDLOG_LOGGER_ERROR(logger, + "command: {}, bad value for parameter {}: {}", + cmd_name, member_iter->name, val); + } + } else if (member_iter->name == "use-nt-query-system-information") { + const rapidjson::Value& val = member_iter->value; + if (val.IsBool()) { + _use_nt_query_system_information = val.GetBool(); + } else { + SPDLOG_LOGGER_ERROR( + logger, + "command: {}, use-nt-query-system-information is not a boolean", + cmd_name); + } + } else if (member_iter->name != "cpu-detailed") { + SPDLOG_LOGGER_ERROR(logger, "command: {}, unknown parameter: {}", + cmd_name, member_iter->name); + } + } + } + if (_use_nt_query_system_information) { + _ntdll_init(); + } else { + _pdh_counters = std::make_unique(); + } +} + +check_cpu::~check_cpu() {} + +std::unique_ptr< + check_cpu_detail::cpu_time_snapshot> +check_cpu::get_cpu_time_snapshot(bool first_measure) { + if (_use_nt_query_system_information) { + return std::make_unique( + _nb_core); + } else { + return std::make_unique( + _nb_core, *_pdh_counters, first_measure); + } +} + +constexpr std::array + _sz_summary_labels = {", User ", ", System ", ", Idle ", ", Interrupt ", + ", Dpc Interrupt "}; + +constexpr std::array + _sz_perfdata_name = {"user", "system", "idle", "interrupt", + "dpc_interrupt"}; + +/** + * @brief compute the difference between second_measure and first_measure and + * generate status, output and perfdatas + * + * @param first_measure first snapshot of /proc/stat + * @param second_measure second snapshot of /proc/stat + * @param output out plugin output + * @param perfs perfdatas + * @return e_status plugin out status + */ +e_status check_cpu::compute( + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& first_measure, + const check_cpu_detail::cpu_time_snapshot< + check_cpu_detail::e_proc_stat_index::nb_field>& second_measure, + std::string* output, + std::list* perfs) { + output->reserve(256 * _nb_core); + + return _compute(first_measure, second_measure, _sz_summary_labels.data(), + _sz_perfdata_name.data(), output, perfs); +} diff --git a/agent/src/config.cc b/agent/src/config.cc index 334eaf4f616..79e2c99fade 100644 --- a/agent/src/config.cc +++ b/agent/src/config.cc @@ -89,6 +89,11 @@ const std::string_view config::config_schema(R"( "description:": "Maximum number of log files to keep. Supernumerary files will be deleted. To be valid, log_files_max_size must be also be provided", "type": "integer", "min": 1 + }, + "second_max_reconnect_backoff": { + "description": "Maximum time between subsequent connection attempts, in seconds. Default: 60s", + "type": "integer", + "min": 0 } }, "required": [ @@ -145,4 +150,6 @@ config::config(const std::string& path) { _host = boost::asio::ip::host_name(); } _reverse_connection = json_config.get_bool("reversed_grpc_streaming", false); + _second_max_reconnect_backoff = + json_config.get_unsigned("second_max_reconnect_backoff", 60); } diff --git a/agent/src/config_win.cc b/agent/src/config_win.cc index a1315c3697d..424765e83e8 100644 --- a/agent/src/config_win.cc +++ b/agent/src/config_win.cc @@ -61,12 +61,13 @@ config::config(const std::string& registry_key) { return result == ERROR_SUCCESS && value; }; - auto get_unsigned = [&](const char* value_name) -> uint32_t { + auto get_unsigned = [&](const char* value_name, + unsigned default_value = 0) -> uint32_t { uint32_t value; DWORD size = sizeof(value); LSTATUS result = RegGetValueA(h_key, nullptr, value_name, RRF_RT_DWORD, nullptr, &value, &size); - return result == ERROR_SUCCESS ? value : 0; + return result == ERROR_SUCCESS ? value : default_value; }; _endpoint = get_sz_reg_or_default("endpoint", ""); @@ -104,6 +105,8 @@ config::config(const std::string& registry_key) { _host = boost::asio::ip::host_name(); } _reverse_connection = get_bool("reversed_grpc_streaming"); + _second_max_reconnect_backoff = + get_unsigned("second_max_reconnect_backoff", 60); RegCloseKey(h_key); } diff --git a/agent/src/main.cc b/agent/src/main.cc index 34d11ab1874..1091832d011 100644 --- a/agent/src/main.cc +++ b/agent/src/main.cc @@ -174,7 +174,7 @@ int main(int argc, char* argv[]) { read_file(conf->get_public_cert_file()), read_file(conf->get_private_key_file()), read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30); + 30, conf->get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); diff --git a/agent/src/main_win.cc b/agent/src/main_win.cc index 0fb1d67b1ee..1609a879ab9 100644 --- a/agent/src/main_win.cc +++ b/agent/src/main_win.cc @@ -177,7 +177,7 @@ int _main(bool service_start) { read_file(conf->get_public_cert_file()), read_file(conf->get_private_key_file()), read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30); + 30, conf->get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); diff --git a/agent/src/native_check_cpu_base.cc b/agent/src/native_check_cpu_base.cc new file mode 100644 index 00000000000..984e13d47ac --- /dev/null +++ b/agent/src/native_check_cpu_base.cc @@ -0,0 +1,452 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "native_check_cpu_base.hh" +#include "com/centreon/common/rapidjson_helper.hh" + +using namespace com::centreon::agent; +using namespace com::centreon::agent::check_cpu_detail; + +/** + * @brief Construct a new per cpu time base::per cpu time base object + * all values are set to zero + * @tparam nb_metric + */ +template +per_cpu_time_base::per_cpu_time_base() { + _metrics.fill(0); +} + +/** + * @brief dump all values into plugin output + * + * @tparam nb_metric + * @param cpu_index cpu index or average_cpu_index for all cpus + * @param metric_label label for each metric + * @param output output string + */ +template +void per_cpu_time_base::dump(const unsigned& cpu_index, + const std::string_view metric_label[], + std::string* output) const { + if (cpu_index == average_cpu_index) { + *output += fmt::format("CPU(s) average Usage: {:.2f}%", + (static_cast(_total_used) / _total) * 100); + } else { + *output += fmt::format("CPU'{}' Usage: {:.2f}%", cpu_index, + (static_cast(_total_used) / _total) * 100); + } + + for (unsigned field_index = 0; field_index < nb_metric; ++field_index) { + *output += metric_label[field_index]; + *output += + fmt::format("{:.2f}%", get_proportional_value(field_index) * 100); + } +} + +/** + * @brief used for debugging + * + * @param output + */ +template +void per_cpu_time_base::dump_values(std::string* output) const { + for (unsigned field_index = 0; field_index < nb_metric; ++field_index) { + absl::StrAppend(output, " ", _metrics[field_index]); + } + absl::StrAppend(output, " used:", _total_used); + absl::StrAppend(output, " total:", _total); +} + +/** + * @brief subtract a per_cpu_time_base from this + * + * @tparam nb_metric + * @param to_subtract + */ +template +void per_cpu_time_base::subtract( + const per_cpu_time_base& to_subtract) { + typename std::array::iterator dest = _metrics.begin(); + typename std::array::const_iterator src = + to_subtract._metrics.begin(); + for (; dest < _metrics.end(); ++dest, ++src) { + *dest -= *src; + } + _total_used -= to_subtract._total_used; + _total -= to_subtract._total; +} + +/** + * @brief add a per_cpu_time_base to this + * + * @tparam nb_metric + * @param to_add + */ +template +void per_cpu_time_base::add(const per_cpu_time_base& to_add) { + typename std::array::iterator dest = _metrics.begin(); + typename std::array::const_iterator src = + to_add._metrics.begin(); + for (; dest < _metrics.end(); ++dest, ++src) { + *dest += *src; + } + _total_used += to_add._total_used; + _total += to_add._total; +} + +/** + * @brief subtract a cpu snapshot from this + * + * @tparam nb_metric + * @param to_subtract + * @return index_to_cpu + */ +template +index_to_cpu cpu_time_snapshot::subtract( + const cpu_time_snapshot& to_subtract) const { + index_to_cpu result; + // in case of pdh, first measure is empty, so we use only second sample + if (to_subtract._data.empty()) { + return _data; + } + for (const auto& left_it : _data) { + const auto& right_it = to_subtract._data.find(left_it.first); + if (right_it == to_subtract._data.end()) { + continue; + } + per_cpu_time_base& res = result[left_it.first]; + res = left_it.second; + res.subtract(right_it->second); + } + return result; +} + +/** + * @brief used for debug, dump all values + * + * @tparam nb_metric + * @param cpus + * @param output + */ +template +void cpu_time_snapshot::dump(std::string* output) const { + output->reserve(output->size() + _data.size() * 256); + for (const auto& cpu : _data) { + output->push_back(cpu.first + '0'); + output->append(":{"); + for (unsigned i = 0; i < nb_metric; ++i) { + absl::StrAppend(output, " ", cpu.second.get_proportional_value(i)); + } + absl::StrAppend(output, " used:", cpu.second.get_proportional_used()); + output->push_back('\n'); + cpu.second.dump_values(output); + + output->append("}\n"); + } +} + +/** + * @brief update status of each cpu or all cpus if metric > threshold + * + * @tparam nb_metric + * @param to_test metrics + * @param per_cpu_status out: status per cpu index + */ +template +void cpu_to_status::compute_status( + const index_to_cpu& to_test, + boost::container::flat_map* per_cpu_status) const { + auto check_threshold = + [&, this](const typename index_to_cpu::value_type& values) { + double val = _data_index >= nb_metric + ? values.second.get_proportional_used() + : values.second.get_proportional_value(_data_index); + if (val > _threshold) { + auto& to_update = (*per_cpu_status)[values.first]; + // if ok (=0) and _status is warning (=1) or critical(=2), we update + if (_status > to_update) { + to_update = _status; + } + } + }; + + if (_average) { + auto avg = to_test.find(average_cpu_index); + if (avg == to_test.end()) { + return; + } + check_threshold(*avg); + } else { + for (const auto& by_cpu : to_test) { + if (by_cpu.first == average_cpu_index) { + continue; + } + check_threshold(by_cpu); + } + } +} + +/** + * @brief Construct a new check native_check_cpu cpu object + * + * @param io_context + * @param logger + * @param first_start_expected start expected + * @param check_interval check interval between two checks (not only this but + * also others) + * @param serv service + * @param cmd_name + * @param cmd_line + * @param args native plugin arguments + * @param cnf engine configuration received object + * @param handler called at measure completion + */ +template +native_check_cpu::native_check_cpu( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler) + : check(io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + cnf, + std::move(handler)), + + _nb_core(std::thread::hardware_concurrency()), + _cpu_detailed(false), + _measure_timer(*io_context) { + if (args.IsObject()) { + com::centreon::common::rapidjson_helper arg(args); + _cpu_detailed = arg.get_bool("cpu-detailed", false); + } +} + +/** + * @brief start a measure + * measure duration is the min of timeout - 1s, check_interval - 1s + * + * @param timeout + */ +template +void native_check_cpu::start_check(const duration& timeout) { + if (!check::_start_check(timeout)) { + return; + } + + try { + std::unique_ptr> begin = + get_cpu_time_snapshot(true); + + time_point end_measure = std::chrono::system_clock::now() + timeout; + time_point end_measure_period = + get_start_expected() + + std::chrono::seconds(get_conf()->config().check_interval()); + + if (end_measure > end_measure_period) { + end_measure = end_measure_period; + } + + end_measure -= std::chrono::seconds(1); + + _measure_timer.expires_at(end_measure); + _measure_timer.async_wait( + [me = shared_from_this(), first_measure = std::move(begin), + start_check_index = _get_running_check_index()]( + const boost::system::error_code& err) mutable { + me->_measure_timer_handler(err, start_check_index, + std::move(first_measure)); + }); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(_logger, "{} fail to start check: {}", + get_command_name(), e.what()); + _io_context->post([me = shared_from_this(), + start_check_index = _get_running_check_index(), + err = e.what()] { + me->on_completion(start_check_index, e_status::unknown, {}, {err}); + }); + } +} + +constexpr std::array _sz_status = { + "OK: ", "WARNING: ", "CRITICAL: ", "UNKNOWN: "}; + +/** + * @brief called at measure timer expiration + * Then we take a new snapshot of /proc/stat, compute difference with + * first_measure and generate output and perfdatas + * + * @param err asio error + * @param start_check_index passed to on_completion to validate result + * @param first_measure first snapshot to compare + */ +template +void native_check_cpu::_measure_timer_handler( + const boost::system::error_code& err, + unsigned start_check_index, + std::unique_ptr>&& + first_measure) { + if (err) { + return; + } + + std::string output; + std::list perfs; + + std::unique_ptr> new_measure = + get_cpu_time_snapshot(false); + + e_status worst = compute(*first_measure, *new_measure, &output, &perfs); + + on_completion(start_check_index, worst, perfs, {output}); +} + +/** + * @brief compute the difference between second_measure and first_measure and + * generate status, output and perfdatas + * + * @param first_measure first snapshot of /proc/stat + * @param second_measure second snapshot of /proc/stat + * @param output out plugin output + * @param perfs perfdatas + * @return e_status plugin out status + */ +template +e_status native_check_cpu::_compute( + const check_cpu_detail::cpu_time_snapshot& first_measure, + const check_cpu_detail::cpu_time_snapshot& second_measure, + const std::string_view summary_labels[], + const std::string_view perfdata_labels[], + std::string* output, + std::list* perfs) { + index_to_cpu delta = second_measure.subtract(first_measure); + + // we need to know per cpu status to provide no ok cpu details + boost::container::flat_map by_proc_status; + + for (const auto& checker : _cpu_to_status) { + checker.second.compute_status(delta, &by_proc_status); + } + + e_status worst = e_status::ok; + for (const auto& to_cmp : by_proc_status.sequence()) { + if (to_cmp.second > worst) { + worst = to_cmp.second; + } + } + + if (worst == e_status::ok) { // all is ok + auto average_data = delta.find(check_cpu_detail::average_cpu_index); + if (average_data != delta.end()) { + *output = fmt::format("OK: CPU(s) average usage is {:.2f}%", + average_data->second.get_proportional_used() * 100); + } else { + *output = "OK: CPUs usages are ok."; + } + } else { + bool first = true; + // not all cpus ok => display detail per cpu nok + for (const auto& cpu_status : by_proc_status) { + if (cpu_status.second != e_status::ok) { + if (first) { + first = false; + } else { + output->push_back(' '); + } + *output += _sz_status[cpu_status.second]; + delta[cpu_status.first].dump(cpu_status.first, summary_labels, output); + } + } + } + + auto fill_perfdata = [&, this]( + const std::string_view& label, unsigned index, + unsigned cpu_index, + const per_cpu_time_base& per_cpu_data) { + double val = index >= nb_metric + ? per_cpu_data.get_proportional_used() + : per_cpu_data.get_proportional_value(index); + bool is_average = cpu_index == check_cpu_detail::average_cpu_index; + common::perfdata to_add; + to_add.name(label); + to_add.unit("%"); + to_add.min(0); + to_add.max(100); + to_add.value(val * 100); + // we search cpu_to_status to get warning and critical thresholds + // warning + auto cpu_to_status_search = _cpu_to_status.find( + std::make_tuple(index, is_average, e_status::warning)); + if (cpu_to_status_search != _cpu_to_status.end()) { + to_add.warning_low(0); + to_add.warning(100 * cpu_to_status_search->second.get_threshold()); + } + // critical + cpu_to_status_search = _cpu_to_status.find( + std::make_tuple(index, is_average, e_status::critical)); + if (cpu_to_status_search != _cpu_to_status.end()) { + to_add.critical_low(0); + to_add.critical(100 * cpu_to_status_search->second.get_threshold()); + } + perfs->emplace_back(std::move(to_add)); + }; + + if (_cpu_detailed) { + for (const auto& by_core : delta) { + std::string cpu_name; + const char* suffix; + if (by_core.first != check_cpu_detail::average_cpu_index) { + absl::StrAppend(&cpu_name, by_core.first, "~"); + suffix = "#core.cpu.utilization.percentage"; + } else { + suffix = "#cpu.utilization.percentage"; + } + for (unsigned stat_ind = 0; stat_ind < nb_metric; ++stat_ind) { + fill_perfdata((cpu_name + perfdata_labels[stat_ind].data()) + suffix, + stat_ind, by_core.first, by_core.second); + } + fill_perfdata((cpu_name + "used") + suffix, nb_metric, by_core.first, + by_core.second); + } + + } else { + for (const auto& by_core : delta) { + std::string cpu_name; + if (by_core.first != check_cpu_detail::average_cpu_index) { + absl::StrAppend(&cpu_name, by_core.first, + "#core.cpu.utilization.percentage"); + } else { + cpu_name = "cpu.utilization.percentage"; + } + + fill_perfdata(cpu_name, nb_metric, by_core.first, by_core.second); + } + } + return worst; +} \ No newline at end of file diff --git a/agent/src/scheduler.cc b/agent/src/scheduler.cc index 02ac1e903ad..05847e76ed9 100644 --- a/agent/src/scheduler.cc +++ b/agent/src/scheduler.cc @@ -553,10 +553,6 @@ std::shared_ptr scheduler::default_check_builder( static const rapidjson::Value no_arg; args = &no_arg; } -#ifdef _WINDOWS - throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, - cmd_line); -#else if (check_type == "cpu_percentage"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, @@ -565,8 +561,6 @@ std::shared_ptr scheduler::default_check_builder( throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, cmd_line); } -#endif - } catch (const std::exception&) { return check_exec::load(io_context, logger, first_start_expected, check_interval, service, cmd_name, cmd_line, conf, diff --git a/agent/test/CMakeLists.txt b/agent/test/CMakeLists.txt index b038ccfaf0a..a86abf25050 100644 --- a/agent/test/CMakeLists.txt +++ b/agent/test/CMakeLists.txt @@ -26,7 +26,7 @@ set( SRC_COMMON if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(SRC ${SRC_COMMON} config_test.cc check_linux_cpu_test.cc) else() - set(SRC ${SRC_COMMON}) + set(SRC ${SRC_COMMON} check_windows_cpu_test.cc) endif() diff --git a/agent/test/check_linux_cpu_test.cc b/agent/test/check_linux_cpu_test.cc index aedeffd32a1..067419c11b2 100644 --- a/agent/test/check_linux_cpu_test.cc +++ b/agent/test/check_linux_cpu_test.cc @@ -17,10 +17,7 @@ */ #include -#include -#include -#include "check.hh" #include "check_cpu.hh" #include "com/centreon/common/rapidjson_helper.hh" @@ -56,8 +53,7 @@ TEST(proc_stat_file_test, read_sample) { switch (by_cpu.first) { case 0: ASSERT_EQ(by_cpu.second.get_total(), 6017174); - ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( - check_cpu_detail::e_proc_stat_index::used), + ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_used(), (6017174.0 - 4497394.0) / 6017174); ASSERT_DOUBLE_EQ(by_cpu.second.get_proportional_value( check_cpu_detail::e_proc_stat_index::user), @@ -145,7 +141,7 @@ TEST(proc_stat_file_test, no_threshold) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; @@ -180,34 +176,17 @@ TEST(proc_stat_file_test, no_threshold) { ASSERT_EQ(perf.unit(), "%"); ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); if (perf.name() == "0#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[0].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[0].get_proportional_used() * 100, 0.01); } else if (perf.name() == "1#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[1].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[1].get_proportional_used() * 100, 0.01); } else if (perf.name() == "2#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[2].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[2].get_proportional_used() * 100, 0.01); } else if (perf.name() == "3#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[3].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[3].get_proportional_used() * 100, 0.01); } else if (perf.name() == "cpu.utilization.percentage") { ASSERT_NEAR( perf.value(), - delta[check_cpu_detail::average_cpu_index].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * + delta[check_cpu_detail::average_cpu_index].get_proportional_used() * 100, 0.01); } else { @@ -237,7 +216,7 @@ TEST(proc_stat_file_test, no_threshold_detailed) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; @@ -343,11 +322,7 @@ TEST(proc_stat_file_test, no_threshold_detailed) { 100, 0.01); } else if (counter_type == "used") { - ASSERT_NEAR(perf.value(), - cpu_data.get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), cpu_data.get_proportional_used() * 100, 0.01); } else { FAIL() << "unexpected perfdata name:" << perf.name(); } @@ -372,7 +347,7 @@ TEST(proc_stat_file_test, threshold_nodetailed) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; @@ -414,34 +389,17 @@ TEST(proc_stat_file_test, threshold_nodetailed) { ASSERT_EQ(perf.unit(), "%"); ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); if (perf.name() == "0#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[0].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[0].get_proportional_used() * 100, 0.01); } else if (perf.name() == "1#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[1].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[1].get_proportional_used() * 100, 0.01); } else if (perf.name() == "2#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[2].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[2].get_proportional_used() * 100, 0.01); } else if (perf.name() == "3#core.cpu.utilization.percentage") { - ASSERT_NEAR(perf.value(), - delta[3].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * - 100, - 0.01); + ASSERT_NEAR(perf.value(), delta[3].get_proportional_used() * 100, 0.01); } else if (perf.name() == "cpu.utilization.percentage") { ASSERT_NEAR( perf.value(), - delta[check_cpu_detail::average_cpu_index].get_proportional_value( - check_cpu_detail::e_proc_stat_index::used) * + delta[check_cpu_detail::average_cpu_index].get_proportional_used() * 100, 0.01); } else { @@ -468,7 +426,7 @@ TEST(proc_stat_file_test, threshold_nodetailed2) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; @@ -525,7 +483,7 @@ TEST(proc_stat_file_test, threshold_detailed) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; @@ -597,7 +555,7 @@ TEST(proc_stat_file_test, threshold_detailed2) { check_cpu_detail::proc_stat_file second_measure(test_file_path2, 4); - auto delta = second_measure - first_measure; + auto delta = second_measure.subtract(first_measure); std::string output; std::list perfs; diff --git a/agent/test/check_windows_cpu_test.cc b/agent/test/check_windows_cpu_test.cc new file mode 100644 index 00000000000..22000b48815 --- /dev/null +++ b/agent/test/check_windows_cpu_test.cc @@ -0,0 +1,501 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include + +#include "com/centreon/common/rapidjson_helper.hh" + +#include "check_cpu.hh" + +extern std::shared_ptr g_io_context; + +using namespace com::centreon::agent; + +TEST(native_check_cpu_windows, construct) { + check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info; + info.IdleTime.QuadPart = 60; + info.KernelTime.QuadPart = 70; + info.UserTime.QuadPart = 25; + info.DpcTime.QuadPart = 1; + info.InterruptTime.QuadPart = 5; + check_cpu_detail::kernel_per_cpu_time k(info); + ASSERT_EQ(k.get_proportional_value(check_cpu_detail::e_proc_stat_index::user), + 0.25); + ASSERT_EQ( + k.get_proportional_value(check_cpu_detail::e_proc_stat_index::system), + 0.1); + ASSERT_EQ(k.get_proportional_value(check_cpu_detail::e_proc_stat_index::idle), + 0.6); + ASSERT_EQ( + k.get_proportional_value(check_cpu_detail::e_proc_stat_index::interrupt), + 0.05); + ASSERT_EQ(k.get_proportional_value(check_cpu_detail::e_proc_stat_index::dpc), + 0.01); + ASSERT_EQ(k.get_proportional_used(), 0.4); +} + +constexpr check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info[2] = + {{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}}; + +TEST(native_check_cpu_windows, output_no_threshold) { + check_cpu_detail::kernel_cpu_time_snapshot first(info, info + 2); + + check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info2[2]; + info2[0].IdleTime.QuadPart = 60; + info2[0].KernelTime.QuadPart = 70; + info2[0].UserTime.QuadPart = 25; + info2[0].DpcTime.QuadPart = 1; + info2[0].InterruptTime.QuadPart = 5; + + info2[1].IdleTime.QuadPart = 40; + info2[1].KernelTime.QuadPart = 50; + info2[1].UserTime.QuadPart = 45; + info2[1].DpcTime.QuadPart = 0; + info2[1].InterruptTime.QuadPart = 5; + + check_cpu_detail::kernel_cpu_time_snapshot second(info2, info2 + 2); + + std::string output; + std::list perfs; + + rapidjson::Document check_args; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + checker.compute(first, second, &output, &perfs); + ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); + + ASSERT_EQ(perfs.size(), 3); + + for (const auto& perf : perfs) { + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + if (perf.name() == "0#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + } else if (perf.name() == "1#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + } else if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(native_check_cpu_windows, output_no_threshold_detailed) { + check_cpu_detail::kernel_cpu_time_snapshot first(info, info + 2); + + check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info2[2]; + info2[0].IdleTime.QuadPart = 60; + info2[0].KernelTime.QuadPart = 70; + info2[0].UserTime.QuadPart = 25; + info2[0].DpcTime.QuadPart = 1; + info2[0].InterruptTime.QuadPart = 5; + + info2[1].IdleTime.QuadPart = 40; + info2[1].KernelTime.QuadPart = 50; + info2[1].UserTime.QuadPart = 45; + info2[1].DpcTime.QuadPart = 0; + info2[1].InterruptTime.QuadPart = 5; + + check_cpu_detail::kernel_cpu_time_snapshot second(info2, info2 + 2); + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = R"({"cpu-detailed":true})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + checker.compute(first, second, &output, &perfs); + ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); + + ASSERT_EQ(perfs.size(), 18); + + for (const auto& perf : perfs) { + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + + if (perf.name() == "0~user#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 25.0, 0.01); + } else if (perf.name() == "1~user#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 45.0, 0.01); + } else if (perf.name() == "user#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 35.0, 0.01); + } else if (perf.name() == "0~system#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + } else if (perf.name() == "1~system#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + } else if (perf.name() == "system#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + } else if (perf.name() == "0~idle#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + } else if (perf.name() == "1~idle#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + } else if (perf.name() == "idle#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + } else if (perf.name() == "0~interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + } else if (perf.name() == "1~interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + } else if (perf.name() == "interrupt#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + } else if (perf.name() == + "0~dpc_interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 1.0, 0.01); + } else if (perf.name() == + "1~dpc_interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 0.0, 0.01); + } else if (perf.name() == "dpc_interrupt#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 0.5, 0.01); + } else if (perf.name() == "0~used#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + } else if (perf.name() == "1~used#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + } else if (perf.name() == "used#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(native_check_cpu_windows, output_threshold) { + check_cpu_detail::kernel_cpu_time_snapshot first(info, info + 2); + + check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info2[2]; + info2[0].IdleTime.QuadPart = 60; + info2[0].KernelTime.QuadPart = 70; + info2[0].UserTime.QuadPart = 25; + info2[0].DpcTime.QuadPart = 1; + info2[0].InterruptTime.QuadPart = 5; + + info2[1].IdleTime.QuadPart = 40; + info2[1].KernelTime.QuadPart = 50; + info2[1].UserTime.QuadPart = 45; + info2[1].DpcTime.QuadPart = 0; + info2[1].InterruptTime.QuadPart = 5; + + check_cpu_detail::kernel_cpu_time_snapshot second(info2, info2 + 2); + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"warning-core" : "39", "critical-core" : "59", "warning-average" : "49", "critical-average" : "60"})"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + checker.compute(first, second, &output, &perfs); + ASSERT_EQ( + output, + "WARNING: CPU'0' Usage: 40.00%, User 25.00%, System 10.00%, Idle 60.00%, " + "Interrupt 5.00%, Dpc Interrupt 1.00% CRITICAL: CPU'1' Usage: 60.00%, " + "User 45.00%, System 10.00%, Idle 40.00%, Interrupt 5.00%, Dpc Interrupt " + "0.00% WARNING: CPU(s) average Usage: 50.00%, User 35.00%, System " + "10.00%, Idle 50.00%, Interrupt 5.00%, Dpc Interrupt 0.50%"); + + ASSERT_EQ(perfs.size(), 3); + + for (const auto& perf : perfs) { + if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR(perf.warning(), 49.0, 0.01); + ASSERT_NEAR(perf.critical(), 60.0, 0.01); + } else { + ASSERT_NEAR(perf.warning(), 39.0, 0.01); + ASSERT_NEAR(perf.critical(), 59.0, 0.01); + } + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + ASSERT_FALSE(perf.critical_mode()); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + if (perf.name() == "0#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + } else if (perf.name() == "1#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + } else if (perf.name() == "cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(native_check_cpu_windows, output_threshold_detailed) { + check_cpu_detail::kernel_cpu_time_snapshot first(info, info + 2); + + check_cpu_detail::M_SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION info2[2]; + info2[0].IdleTime.QuadPart = 60; + info2[0].KernelTime.QuadPart = 70; + info2[0].UserTime.QuadPart = 25; + info2[0].DpcTime.QuadPart = 1; + info2[0].InterruptTime.QuadPart = 5; + + info2[1].IdleTime.QuadPart = 40; + info2[1].KernelTime.QuadPart = 50; + info2[1].UserTime.QuadPart = 45; + info2[1].DpcTime.QuadPart = 0; + info2[1].InterruptTime.QuadPart = 5; + + check_cpu_detail::kernel_cpu_time_snapshot second(info2, info2 + 2); + + std::string output; + std::list perfs; + + using namespace com::centreon::common::literals; + rapidjson::Document check_args = + R"({"cpu-detailed":true, "warning-core" : "39", "critical-core" : "59", "warning-average" : "49", "critical-average" : "60", "warning-core-user": "30", "critical-core-user": "40", "warning-average-user": "31", "critical-average-user": "41" })"_json; + + check_cpu checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + checker.compute(first, second, &output, &perfs); + ASSERT_EQ( + output, + "WARNING: CPU'0' Usage: 40.00%, User 25.00%, System 10.00%, Idle 60.00%, " + "Interrupt 5.00%, Dpc Interrupt 1.00% CRITICAL: CPU'1' Usage: 60.00%, " + "User 45.00%, System 10.00%, Idle 40.00%, Interrupt 5.00%, Dpc Interrupt " + "0.00% WARNING: CPU(s) average Usage: 50.00%, User 35.00%, System " + "10.00%, Idle 50.00%, Interrupt 5.00%, Dpc Interrupt 0.50%"); + + ASSERT_EQ(perfs.size(), 18); + + for (const auto& perf : perfs) { + ASSERT_FALSE(perf.critical_mode()); + ASSERT_FALSE(perf.warning_mode()); + ASSERT_EQ(perf.min(), 0); + ASSERT_EQ(perf.max(), 100); + ASSERT_EQ(perf.unit(), "%"); + ASSERT_EQ(perf.value_type(), com::centreon::common::perfdata::gauge); + + if (perf.name() == "0~user#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 25.0, 0.01); + ASSERT_NEAR(perf.warning(), 30.0, 0.01); + ASSERT_NEAR(perf.critical(), 40.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else if (perf.name() == "1~user#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 45.0, 0.01); + ASSERT_NEAR(perf.warning(), 30.0, 0.01); + ASSERT_NEAR(perf.critical(), 40.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else if (perf.name() == "user#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 35.0, 0.01); + ASSERT_NEAR(perf.warning(), 31.0, 0.01); + ASSERT_NEAR(perf.critical(), 41.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else if (perf.name() == "0~system#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "1~system#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "system#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 10.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "0~idle#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "1~idle#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "idle#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "0~interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "1~interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "interrupt#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 5.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == + "0~dpc_interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 1.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + + } else if (perf.name() == + "1~dpc_interrupt#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 0.0, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "dpc_interrupt#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 0.5, 0.01); + ASSERT_TRUE(std::isnan(perf.critical_low())); + ASSERT_TRUE(std::isnan(perf.critical())); + ASSERT_TRUE(std::isnan(perf.warning_low())); + ASSERT_TRUE(std::isnan(perf.warning())); + } else if (perf.name() == "0~used#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 40.0, 0.01); + ASSERT_NEAR(perf.warning(), 39.0, 0.01); + ASSERT_NEAR(perf.critical(), 59.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else if (perf.name() == "1~used#core.cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 60.0, 0.01); + ASSERT_NEAR(perf.warning(), 39.0, 0.01); + ASSERT_NEAR(perf.critical(), 59.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else if (perf.name() == "used#cpu.utilization.percentage") { + ASSERT_NEAR(perf.value(), 50.0, 0.01); + ASSERT_NEAR(perf.warning(), 49.0, 0.01); + ASSERT_NEAR(perf.critical(), 60.0, 0.01); + ASSERT_EQ(perf.warning_low(), 0); + ASSERT_EQ(perf.critical_low(), 0); + } else { + FAIL() << "unexpected perfdata name:" << perf.name(); + } + } +} + +TEST(native_check_cpu_windows, compare_kernel_dph) { + using namespace com::centreon::common::literals; + rapidjson::Document nt_check_args = + R"({"use-nt-query-system-information":true })"_json; + + check_cpu nt_checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", nt_check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + rapidjson::Document pdh_check_args = + R"({"use-nt-query-system-information":false })"_json; + + check_cpu pdh_checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv", "cmd_name", + "cmd_line", pdh_check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}); + + auto first_nt = nt_checker.get_cpu_time_snapshot(true); + auto first_pdh = pdh_checker.get_cpu_time_snapshot(true); + + std::this_thread::sleep_for(std::chrono::milliseconds(2500)); + + auto second_nt = nt_checker.get_cpu_time_snapshot(false); + auto second_pdh = pdh_checker.get_cpu_time_snapshot(false); + + auto diff_nt = second_nt->subtract(*first_nt); + auto diff_pdh = second_pdh->subtract(*first_pdh); + + ASSERT_EQ(diff_nt.size(), diff_pdh.size()); + auto nt_iter = diff_nt.begin(); + auto pdh_iter = diff_pdh.begin(); + auto nt_iter_end = diff_nt.end(); + for (; nt_iter != nt_iter_end; ++nt_iter, ++pdh_iter) { + ASSERT_NEAR(nt_iter->second.get_proportional_used(), + pdh_iter->second.get_proportional_used(), 0.1); + for (size_t j = 0; j < 5; ++j) { + ASSERT_NEAR(nt_iter->second.get_proportional_value(j), + pdh_iter->second.get_proportional_value(j), 0.1); + } + } +} \ No newline at end of file diff --git a/bbdo/CMakeLists.txt b/bbdo/CMakeLists.txt index 5232e26135b..7e1c5c35645 100644 --- a/bbdo/CMakeLists.txt +++ b/bbdo/CMakeLists.txt @@ -151,7 +151,7 @@ add_library( "storage/status.hh") set_target_properties(bbdo_storage PROPERTIES POSITION_INDEPENDENT_CODE ON) target_precompile_headers(bbdo_storage REUSE_FROM bbdo_bbdo) -add_dependencies(bbdo_storage table_max_size) +add_dependencies(bbdo_storage table_max_size pb_storage_lib) add_library( bbdo_bam STATIC "bam/ba_duration_event.cc" diff --git a/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh b/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh index 4d151fa0baa..552d25a4ae8 100644 --- a/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh +++ b/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh @@ -55,17 +55,32 @@ class grpc_config { bool _compress; int _second_keepalive_interval; + /** + * @brief (client side) if this parameter is > 0 this is the longest delay in + * second between two failed connection. if is the + * GRPC_ARG_MAX_RECONNECT_BACKOFF_MS parameter + * + */ + unsigned _second_max_reconnect_backoff; + public: using pointer = std::shared_ptr; - grpc_config() : _compress(false), _second_keepalive_interval(30) {} + grpc_config() + : _compress(false), + _second_keepalive_interval(30), + _second_max_reconnect_backoff(0) {} grpc_config(const std::string& hostp) - : _hostport(hostp), _compress(false), _second_keepalive_interval(30) {} + : _hostport(hostp), + _compress(false), + _second_keepalive_interval(30), + _second_max_reconnect_backoff(0) {} grpc_config(const std::string& hostp, bool crypted) : _hostport(hostp), _crypted(crypted), _compress(false), - _second_keepalive_interval(30) {} + _second_keepalive_interval(30), + _second_max_reconnect_backoff(0) {} grpc_config(const std::string& hostp, bool crypted, const std::string& certificate, @@ -81,7 +96,27 @@ class grpc_config { _ca_cert(ca_cert), _ca_name(ca_name), _compress(compression), - _second_keepalive_interval(second_keepalive_interval) {} + _second_keepalive_interval(second_keepalive_interval), + _second_max_reconnect_backoff(0) {} + + grpc_config(const std::string& hostp, + bool crypted, + const std::string& certificate, + const std::string& cert_key, + const std::string& ca_cert, + const std::string& ca_name, + bool compression, + int second_keepalive_interval, + unsigned second_max_reconnect_backoff) + : _hostport(hostp), + _crypted(crypted), + _certificate(certificate), + _cert_key(cert_key), + _ca_cert(ca_cert), + _ca_name(ca_name), + _compress(compression), + _second_keepalive_interval(second_keepalive_interval), + _second_max_reconnect_backoff(second_max_reconnect_backoff) {} const std::string& get_hostport() const { return _hostport; } bool is_crypted() const { return _crypted; } @@ -94,12 +129,17 @@ class grpc_config { return _second_keepalive_interval; } + unsigned get_second_max_reconnect_backoff() const { + return _second_max_reconnect_backoff; + } + bool operator==(const grpc_config& right) const { return _hostport == right._hostport && _crypted == right._crypted && _certificate == right._certificate && _cert_key == right._cert_key && _ca_cert == right._ca_cert && _ca_name == right._ca_name && _compress == right._compress && - _second_keepalive_interval == right._second_keepalive_interval; + _second_keepalive_interval == right._second_keepalive_interval && + _second_max_reconnect_backoff == right._second_max_reconnect_backoff; } /** diff --git a/common/grpc/src/grpc_client.cc b/common/grpc/src/grpc_client.cc index e62396901f7..53614b1e845 100644 --- a/common/grpc/src/grpc_client.cc +++ b/common/grpc/src/grpc_client.cc @@ -79,5 +79,10 @@ grpc_client_base::grpc_client_base( creds = ::grpc::InsecureChannelCredentials(); } + if (conf->get_second_max_reconnect_backoff() > 0) { + args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, + conf->get_second_max_reconnect_backoff() * 1000); + } + _channel = ::grpc::CreateCustomChannel(conf->get_hostport(), creds, args); } diff --git a/engine/modules/opentelemetry/src/grpc_config.cc b/engine/modules/opentelemetry/src/grpc_config.cc index 0cf4569f2ff..964735843f6 100644 --- a/engine/modules/opentelemetry/src/grpc_config.cc +++ b/engine/modules/opentelemetry/src/grpc_config.cc @@ -67,6 +67,12 @@ static constexpr std::string_view _grpc_config_schema(R"( "type": "integer", "minimum": -1, "maximum": 3600 + }, + "second_max_reconnect_backoff": { + "description": "maximum time between subsequent connection attempts, in seconds. Default: 60s", + "type": "integer", + "minimum": 0, + "maximum": 600 } }, "required": [ @@ -121,9 +127,12 @@ grpc_config::grpc_config(const rapidjson::Value& json_config_v) { else second_keepalive_interval = 30; + unsigned second_max_reconnect_backoff = + json_config.get_unsigned("second_max_reconnect_backoff", 60); + static_cast(*this) = common::grpc::grpc_config( hostport, crypted, certificate, cert_key, ca_cert, ca_name, compress, - second_keepalive_interval); + second_keepalive_interval, second_max_reconnect_backoff); } /** diff --git a/tests/broker-engine/cma.robot b/tests/broker-engine/cma.robot index f8c8e5ba5f7..f3f7294e632 100644 --- a/tests/broker-engine/cma.robot +++ b/tests/broker-engine/cma.robot @@ -385,11 +385,11 @@ BEOTEL_CENTREON_AGENT_CHECK_NATIVE_CPU ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 Should Be True ${result} "unencrypted server listening on 0.0.0.0:4317" should be available. - ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 0 60 HARD + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 0 120 HARD Should Be True ${result} resources table not updated ${metrics_list} Create List cpu.utilization.percentage 0#core.cpu.utilization.percentage - ${result} Ctn Compare Metrics Of Service 1 ${metrics_list} 10 + ${result} Ctn Compare Metrics Of Service 1 ${metrics_list} 30 Should Be True ${result} metrics not updated @@ -408,7 +408,7 @@ BEOTEL_CENTREON_AGENT_CHECK_NATIVE_CPU Ctn Engine Config Add Command ${0} otel_check3 {"check": "cpu_percentage", "args": {"critical-average" : "0.2", "warning-average" : "0.1"}} OTEL connector Ctn Reload Engine - ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 2 60 SOFT + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 2 60 ANY Should Be True ${result} resources table not updated diff --git a/tests/resources/Common.py b/tests/resources/Common.py index 6164dff129e..1325adbacad 100644 --- a/tests/resources/Common.py +++ b/tests/resources/Common.py @@ -623,7 +623,9 @@ def ctn_check_service_resource_status_with_timeout(hostname: str, service_desc: if len(result) > 0 and result[0]['status'] is not None and int(result[0]['status']) == int(status): logger.console( f"status={result[0]['status']} and status_confirmed={result[0]['status_confirmed']}") - if state_type == 'HARD' and int(result[0]['status_confirmed']) == 1: + if state_type == 'ANY': + return True + elif state_type == 'HARD' and int(result[0]['status_confirmed']) == 1: return True elif state_type == 'SOFT' and int(result[0]['status_confirmed']) == 0: return True From a37a5b914a989e91d364b1273c1b6cad32f0d70f Mon Sep 17 00:00:00 2001 From: sechkem Date: Tue, 12 Nov 2024 18:17:43 +0100 Subject: [PATCH 09/13] Mon 152644 engine coredump (#1859) * fix(engine/broker): segfault relations child/parent host (#1846) * feat(test): add tests EBPN about relation parent child * fix(engine/broker): make parents_host shared ptr & remove relation db when child is deleted REFS: MON-151686 * fix(tests): gprc GetHost --- broker/neb/src/callbacks.cc | 14 +- broker/neb/src/initial.cc | 12 +- engine/inc/com/centreon/engine/host.hh | 5 +- engine/src/command_manager.cc | 14 +- engine/src/commands/commands.cc | 50 ++-- engine/src/config.cc | 12 +- engine/src/configuration/applier/host.cc | 35 +-- engine/src/host.cc | 163 ++++++----- engine/src/macros/grab_host.cc | 12 +- .../broker-engine/parent_child_relation.robot | 260 ++++++++++++++++++ tests/resources/Engine.py | 56 ++++ 11 files changed, 457 insertions(+), 176 deletions(-) create mode 100644 tests/broker-engine/parent_child_relation.robot diff --git a/broker/neb/src/callbacks.cc b/broker/neb/src/callbacks.cc index 1e9ec8c3ba6..0d8d39077cf 100644 --- a/broker/neb/src/callbacks.cc +++ b/broker/neb/src/callbacks.cc @@ -2602,12 +2602,8 @@ int neb::callback_relation(int callback_type, void* data) { if (relation->hst && relation->dep_hst && !relation->svc && !relation->dep_svc) { // Find host IDs. - int host_id; - int parent_id; - { - host_id = engine::get_host_id(relation->dep_hst->name()); - parent_id = engine::get_host_id(relation->hst->name()); - } + int host_id = relation->dep_hst->host_id(); + int parent_id = relation->hst->host_id(); if (host_id && parent_id) { // Generate parent event. auto new_host_parent{std::make_shared()}; @@ -2658,10 +2654,8 @@ int neb::callback_pb_relation(int callback_type [[maybe_unused]], void* data) { if (relation->hst && relation->dep_hst && !relation->svc && !relation->dep_svc) { // Find host IDs. - int host_id; - int parent_id; - host_id = engine::get_host_id(relation->dep_hst->name()); - parent_id = engine::get_host_id(relation->hst->name()); + int host_id = relation->dep_hst->host_id(); + int parent_id = relation->hst->host_id(); if (host_id && parent_id) { // Generate parent event. auto new_host_parent{std::make_shared()}; diff --git a/broker/neb/src/initial.cc b/broker/neb/src/initial.cc index 4fc23eb0626..93de76e58b8 100644 --- a/broker/neb/src/initial.cc +++ b/broker/neb/src/initial.cc @@ -288,19 +288,15 @@ static void send_host_parents_list(neb_sender sender = neb::callback_relation) { try { // Loop through all hosts. - for (host_map::iterator it{com::centreon::engine::host::hosts.begin()}, - end{com::centreon::engine::host::hosts.end()}; - it != end; ++it) { + for (const auto& [_, sptr_host] : com::centreon::engine::host::hosts) { // Loop through all parents. - for (host_map_unsafe::iterator pit{it->second->parent_hosts.begin()}, - pend{it->second->parent_hosts.end()}; - pit != pend; ++pit) { + for (const auto& [_, sptr_host_parent] : sptr_host->parent_hosts) { // Fill callback struct. nebstruct_relation_data nsrd; memset(&nsrd, 0, sizeof(nsrd)); nsrd.type = NEBTYPE_PARENT_ADD; - nsrd.hst = pit->second; - nsrd.dep_hst = it->second.get(); + nsrd.hst = sptr_host_parent.get(); + nsrd.dep_hst = sptr_host.get(); // Callback. sender(NEBTYPE_PARENT_ADD, &nsrd); diff --git a/engine/inc/com/centreon/engine/host.hh b/engine/inc/com/centreon/engine/host.hh index 20366362956..8c4d70b3e65 100644 --- a/engine/inc/com/centreon/engine/host.hh +++ b/engine/inc/com/centreon/engine/host.hh @@ -250,7 +250,7 @@ class host : public notifier { void set_check_command_ptr( const std::shared_ptr& cmd) override; - host_map_unsafe parent_hosts; + host_map parent_hosts; host_map_unsafe child_hosts; static host_map hosts; static host_id_map hosts_by_id; @@ -309,6 +309,7 @@ int number_of_total_parent_hosts(com::centreon::engine::host* hst); std::ostream& operator<<(std::ostream& os, com::centreon::engine::host const& obj); std::ostream& operator<<(std::ostream& os, host_map_unsafe const& obj); +std::ostream& operator<<(std::ostream& os, host_map const& obj); namespace com::centreon::engine { @@ -320,6 +321,4 @@ std::string get_host_name(const uint64_t host_id); } // namespace com::centreon::engine -std::ostream& operator<<(std::ostream& os, host_map_unsafe const& obj); - #endif // !CCE_HOST_HH diff --git a/engine/src/command_manager.cc b/engine/src/command_manager.cc index 18207450d8a..c14333b0d52 100644 --- a/engine/src/command_manager.cc +++ b/engine/src/command_manager.cc @@ -411,19 +411,17 @@ void command_manager::schedule_and_propagate_downtime( unsigned long triggered_by, unsigned long duration) { /* check all child hosts... */ - for (host_map_unsafe::iterator it(temp_host->child_hosts.begin()), - end(temp_host->child_hosts.end()); - it != end; ++it) { - if (it->second == nullptr) + for (const auto& [_, ptr_host] : temp_host->child_hosts) { + if (ptr_host == nullptr) continue; /* recurse... */ - schedule_and_propagate_downtime(it->second, entry_time, author, - comment_data, start_time, end_time, fixed, - triggered_by, duration); + schedule_and_propagate_downtime(ptr_host, entry_time, author, comment_data, + start_time, end_time, fixed, triggered_by, + duration); /* schedule downtime for this host */ downtime_manager::instance().schedule_downtime( - downtime::host_downtime, it->second->host_id(), 0, entry_time, author, + downtime::host_downtime, ptr_host->host_id(), 0, entry_time, author, comment_data, start_time, end_time, fixed, triggered_by, duration, nullptr); } diff --git a/engine/src/commands/commands.cc b/engine/src/commands/commands.cc index 072e14870d4..6b6275eb6fa 100644 --- a/engine/src/commands/commands.cc +++ b/engine/src/commands/commands.cc @@ -2444,28 +2444,24 @@ void enable_and_propagate_notifications(host* hst, enable_host_notifications(hst); /* check all child hosts... */ - for (host_map_unsafe::iterator it(hst->child_hosts.begin()), - end(hst->child_hosts.end()); - it != end; ++it) { - if (it->second == nullptr) + for (const auto& [_, ptr_host] : hst->child_hosts) { + if (ptr_host == nullptr) continue; /* recurse... */ - enable_and_propagate_notifications(it->second, level + 1, affect_top_host, + enable_and_propagate_notifications(ptr_host, level + 1, affect_top_host, affect_hosts, affect_services); /* enable notifications for this host */ if (affect_hosts) - enable_host_notifications(it->second); + enable_host_notifications(ptr_host); /* enable notifications for all services on this host... */ if (affect_services) { - for (service_map_unsafe::iterator it2(it->second->services.begin()), - end2(it->second->services.end()); - it2 != end2; ++it2) { - if (!it2->second) + for (const auto& [_, ptr_srv] : ptr_host->services) { + if (!ptr_srv) continue; - enable_service_notifications(it2->second); + enable_service_notifications(ptr_srv); } } } @@ -2485,28 +2481,24 @@ void disable_and_propagate_notifications(host* hst, disable_host_notifications(hst); /* check all child hosts... */ - for (host_map_unsafe::iterator it(hst->child_hosts.begin()), - end(hst->child_hosts.begin()); - it != end; ++it) { - if (!it->second) + for (const auto& [_, ptr_host] : hst->child_hosts) { + if (!ptr_host) continue; /* recurse... */ - disable_and_propagate_notifications(it->second, level + 1, affect_top_host, + disable_and_propagate_notifications(ptr_host, level + 1, affect_top_host, affect_hosts, affect_services); /* disable notifications for this host */ if (affect_hosts) - disable_host_notifications(it->second); + disable_host_notifications(ptr_host); /* disable notifications for all services on this host... */ if (affect_services) { - for (service_map_unsafe::iterator it2(it->second->services.begin()), - end2(it->second->services.end()); - it2 != end2; ++it2) { - if (!it2->second) + for (const auto& [_, ptr_srv] : ptr_host->services) { + if (!ptr_srv) continue; - disable_service_notifications(it2->second); + disable_service_notifications(ptr_srv); } } } @@ -2627,20 +2619,18 @@ void schedule_and_propagate_downtime(host* temp_host, unsigned long triggered_by, unsigned long duration) { /* check all child hosts... */ - for (host_map_unsafe::iterator it(temp_host->child_hosts.begin()), - end(temp_host->child_hosts.end()); - it != end; ++it) { - if (it->second == nullptr) + for (const auto& [_, ptr_host] : temp_host->child_hosts) { + if (ptr_host == nullptr) continue; /* recurse... */ - schedule_and_propagate_downtime(it->second, entry_time, author, - comment_data, start_time, end_time, fixed, - triggered_by, duration); + schedule_and_propagate_downtime(ptr_host, entry_time, author, comment_data, + start_time, end_time, fixed, triggered_by, + duration); /* schedule downtime for this host */ downtime_manager::instance().schedule_downtime( - downtime::host_downtime, it->second->host_id(), 0, entry_time, author, + downtime::host_downtime, ptr_host->host_id(), 0, entry_time, author, comment_data, start_time, end_time, fixed, triggered_by, duration, nullptr); } diff --git a/engine/src/config.cc b/engine/src/config.cc index 544d2be9a0d..6fc4941c5bb 100644 --- a/engine/src/config.cc +++ b/engine/src/config.cc @@ -63,19 +63,17 @@ static int dfs_host_path(host* root) { dfs_set_status(root, DFS_TEMP_CHECKED); /* We are scanning the children */ - for (host_map_unsafe::iterator it(root->child_hosts.begin()), - end(root->child_hosts.end()); - it != end; it++) { - int child_status = dfs_get_status(it->second); + for (const auto& [_, ptr_host] : root->child_hosts) { + int child_status = dfs_get_status(ptr_host); /* If a child is not checked, check it */ if (child_status == DFS_UNCHECKED) - child_status = dfs_host_path(it->second); + child_status = dfs_host_path(ptr_host); /* If a child already temporary checked, its a problem, * loop inside, and its a acked status */ if (child_status == DFS_TEMP_CHECKED) { - dfs_set_status(it->second, DFS_LOOPY); + dfs_set_status(ptr_host, DFS_LOOPY); dfs_set_status(root, DFS_LOOPY); } @@ -86,7 +84,7 @@ static int dfs_host_path(host* root) { dfs_set_status(root, DFS_NEAR_LOOP); /* we already saw this child, it's a problem */ - dfs_set_status(it->second, DFS_LOOPY); + dfs_set_status(ptr_host, DFS_LOOPY); } } diff --git a/engine/src/configuration/applier/host.cc b/engine/src/configuration/applier/host.cc index ea2a7dc2f76..e270a98b0e6 100644 --- a/engine/src/configuration/applier/host.cc +++ b/engine/src/configuration/applier/host.cc @@ -482,10 +482,8 @@ void applier::host::modify_object(configuration::host const& obj) { if (obj.parents() != obj_old.parents()) { // Delete old parents. { - for (host_map_unsafe::iterator it(it_obj->second->parent_hosts.begin()), - end(it_obj->second->parent_hosts.end()); - it != end; it++) - broker_relation_data(NEBTYPE_PARENT_DELETE, it->second, nullptr, + for (const auto& [_, sptr_host] : it_obj->second->parent_hosts) + broker_relation_data(NEBTYPE_PARENT_DELETE, sptr_host.get(), nullptr, it_obj->second.get(), nullptr); } it_obj->second->parent_hosts.clear(); @@ -727,10 +725,9 @@ void applier::host::modify_object(configuration::Host* old_obj, if (parents_changed) { // Delete old parents. - for (auto it = h->parent_hosts.begin(), end = h->parent_hosts.end(); - it != end; it++) - broker_relation_data(NEBTYPE_PARENT_DELETE, it->second, nullptr, h.get(), - nullptr); + for (const auto& [_, sptr_host] : h->parent_hosts) + broker_relation_data(NEBTYPE_PARENT_DELETE, sptr_host.get(), nullptr, + h.get(), nullptr); h->parent_hosts.clear(); // Create parents. @@ -787,6 +784,11 @@ void applier::host::remove_object(configuration::host const& obj) { for (auto& it_h : it->second->get_parent_groups()) it_h->members.erase(it->second->name()); + // remove any relations + for (const auto& [_, sptr_host] : it->second->parent_hosts) + broker_relation_data(NEBTYPE_PARENT_DELETE, sptr_host.get(), nullptr, + it->second.get(), nullptr); + // Notify event broker. for (auto it_s = it->second->services.begin(); it_s != it->second->services.end(); ++it_s) @@ -833,6 +835,11 @@ void applier::host::remove_object(ssize_t idx) { for (auto& it_h : it->second->get_parent_groups()) it_h->members.erase(it->second->name()); + // remove any relations + for (const auto& [_, sptr_host] : it->second->parent_hosts) + broker_relation_data(NEBTYPE_PARENT_DELETE, sptr_host.get(), nullptr, + it->second.get(), nullptr); + // Notify event broker. for (auto it_s = it->second->services.begin(); it_s != it->second->services.end(); ++it_s) @@ -870,10 +877,8 @@ void applier::host::resolve_object(const configuration::host& obj, // It is necessary to do it only once to prevent the removal // of valid child backlinks. if (obj == *config->hosts().begin()) { - for (host_map::iterator it(engine::host::hosts.begin()), - end(engine::host::hosts.end()); - it != end; ++it) - it->second->child_hosts.clear(); + for (const auto& [_, sptr_host] : engine::host::hosts) + sptr_host->child_hosts.clear(); } // Find host. @@ -911,10 +916,8 @@ void applier::host::resolve_object(const configuration::Host& obj, // It is necessary to do it only once to prevent the removal // of valid child backlinks. if (&obj == &(*pb_config.hosts().begin())) { - for (host_map::iterator it(engine::host::hosts.begin()), - end(engine::host::hosts.end()); - it != end; ++it) - it->second->child_hosts.clear(); + for (const auto& [_, sptr_host] : engine::host::hosts) + sptr_host->child_hosts.clear(); } // Find host. diff --git a/engine/src/host.cc b/engine/src/host.cc index 1296834af0d..2c18afaef22 100644 --- a/engine/src/host.cc +++ b/engine/src/host.cc @@ -581,13 +581,25 @@ int host::get_current_state_int() const { } std::ostream& operator<<(std::ostream& os, host_map_unsafe const& obj) { - for (host_map_unsafe::const_iterator it{obj.begin()}, end{obj.end()}; - it != end; ++it) { - os << it->first; - if (std::next(it) != end) + bool first = true; + for (const auto& [key, _] : obj) { + if (!first) { os << ", "; - else - os << ""; + } + os << key; + first = false; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, host_map const& obj) { + bool first = true; + for (const auto& [key, _] : obj) { + if (!first) { + os << ", "; + } + os << key; + first = false; } return os; } @@ -1035,8 +1047,7 @@ int is_host_immediate_child_of_host(com::centreon::engine::host* parent_host, } // Mid-level/bottom hosts. else { - host_map_unsafe::const_iterator it{ - child_host->parent_hosts.find(parent_host->name())}; + auto it{child_host->parent_hosts.find(parent_host->name())}; return it != child_host->parent_hosts.end(); } @@ -1070,9 +1081,8 @@ int is_host_immediate_parent_of_host(com::centreon::engine::host* child_host, */ int number_of_immediate_child_hosts(com::centreon::engine::host* hst) { int children(0); - for (host_map::iterator it{host::hosts.begin()}, end{host::hosts.end()}; - it != end; ++it) - if (is_host_immediate_child_of_host(hst, it->second.get())) + for (const auto& [_, sptr_host] : host::hosts) + if (is_host_immediate_child_of_host(hst, sptr_host.get())) ++children; return children; } @@ -1088,9 +1098,8 @@ int number_of_immediate_child_hosts(com::centreon::engine::host* hst) { */ int number_of_immediate_parent_hosts(com::centreon::engine::host* hst) { int parents(0); - for (host_map::iterator it{host::hosts.begin()}, end{host::hosts.end()}; - it != end; ++it) - if (is_host_immediate_parent_of_host(hst, it->second.get())) + for (const auto& [_, sptr_host] : host::hosts) + if (is_host_immediate_parent_of_host(hst, sptr_host.get())) ++parents; return parents; } @@ -1106,10 +1115,9 @@ int number_of_immediate_parent_hosts(com::centreon::engine::host* hst) { */ int number_of_total_child_hosts(com::centreon::engine::host* hst) { int children(0); - for (host_map::iterator it{host::hosts.begin()}, end{host::hosts.end()}; - it != end; ++it) - if (is_host_immediate_child_of_host(hst, it->second.get())) - children += number_of_total_child_hosts(it->second.get()) + 1; + for (const auto& [_, sptr_host] : host::hosts) + if (is_host_immediate_child_of_host(hst, sptr_host.get())) + children += number_of_total_child_hosts(sptr_host.get()) + 1; return children; } @@ -3255,17 +3263,15 @@ int host::process_check_result_3x(enum host::host_state new_state, SPDLOG_LOGGER_DEBUG(checks_logger, "Propagating checks to parent host(s)..."); - for (host_map_unsafe::iterator it{parent_hosts.begin()}, - end{parent_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, sptr_host] : parent_hosts) { + if (!sptr_host) continue; - if (it->second->get_current_state() != host::state_up) { + if (sptr_host->get_current_state() != host::state_up) { engine_logger(dbg_checks, more) - << "Check of parent host '" << it->first << "' queued."; + << "Check of parent host '" << key << "' queued."; SPDLOG_LOGGER_DEBUG(checks_logger, - "Check of parent host '{}' queued.", it->first); - check_hostlist.push_back(it->second); + "Check of parent host '{}' queued.", key); + check_hostlist.push_back(sptr_host.get()); } } @@ -3277,17 +3283,15 @@ int host::process_check_result_3x(enum host::host_state new_state, SPDLOG_LOGGER_DEBUG(checks_logger, "Propagating checks to child host(s)..."); - for (host_map_unsafe::iterator it{child_hosts.begin()}, - end{child_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, ptr_host] : child_hosts) { + if (!ptr_host) continue; - if (it->second->get_current_state() != host::state_up) { + if (ptr_host->get_current_state() != host::state_up) { engine_logger(dbg_checks, more) - << "Check of child host '" << it->first << "' queued."; + << "Check of child host '" << key << "' queued."; SPDLOG_LOGGER_DEBUG(checks_logger, "Check of child host '{}' queued.", - it->first); - check_hostlist.push_back(it->second); + key); + check_hostlist.push_back(ptr_host); } } } @@ -3377,24 +3381,21 @@ int host::process_check_result_3x(enum host::host_state new_state, "** WARNING: Max attempts = 1, so we have to run serial " "checks of all parent hosts!"); - for (host_map_unsafe::iterator it{parent_hosts.begin()}, - end{parent_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, sptr_host] : parent_hosts) { + if (!sptr_host) continue; has_parent = true; engine_logger(dbg_checks, more) - << "Running serial check parent host '" << it->first << "'..."; - SPDLOG_LOGGER_DEBUG(checks_logger, - "Running serial check parent host '{}'...", - it->first); + << "Running serial check parent host '" << key << "'..."; + SPDLOG_LOGGER_DEBUG( + checks_logger, "Running serial check parent host '{}'...", key); /* run an immediate check of the parent host */ - it->second->run_sync_check_3x(&parent_state, check_options, - use_cached_result, - check_timestamp_horizon); + sptr_host->run_sync_check_3x(&parent_state, check_options, + use_cached_result, + check_timestamp_horizon); /* bail out as soon as we find one parent host that is UP */ if (parent_state == host::state_up) { @@ -3444,17 +3445,15 @@ int host::process_check_result_3x(enum host::host_state new_state, checks_logger, "Propagating check to immediate non-UNREACHABLE child hosts..."); - for (host_map_unsafe::iterator it{child_hosts.begin()}, - end{child_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, ptr_host] : child_hosts) { + if (!ptr_host) continue; - if (it->second->get_current_state() != host::state_unreachable) { + if (ptr_host->get_current_state() != host::state_unreachable) { engine_logger(dbg_checks, more) - << "Check of child host '" << it->first << "' queued."; + << "Check of child host '" << key << "' queued."; SPDLOG_LOGGER_DEBUG(checks_logger, - "Check of child host '{}' queued.", it->first); - check_hostlist.push_back(it->second); + "Check of child host '{}' queued.", key); + check_hostlist.push_back(ptr_host); } } } @@ -3488,17 +3487,15 @@ int host::process_check_result_3x(enum host::host_state new_state, "Propagating checks to immediate parent hosts that " "are UP..."); - for (host_map_unsafe::iterator it{parent_hosts.begin()}, - end{parent_hosts.end()}; - it != end; it++) { - if (it->second == nullptr) + for (const auto& [key, sptr_host] : parent_hosts) { + if (sptr_host == nullptr) continue; - if (it->second->get_current_state() == host::state_up) { - check_hostlist.push_back(it->second); + if (sptr_host->get_current_state() == host::state_up) { + check_hostlist.push_back(sptr_host.get()); engine_logger(dbg_checks, more) - << "Check of host '" << it->first << "' queued."; + << "Check of host '" << key << "' queued."; SPDLOG_LOGGER_DEBUG(checks_logger, "Check of host '{}' queued.", - it->first); + key); } } @@ -3511,17 +3508,15 @@ int host::process_check_result_3x(enum host::host_state new_state, "Propagating checks to immediate non-UNREACHABLE " "child hosts..."); - for (host_map_unsafe::iterator it{child_hosts.begin()}, - end{child_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, ptr_host] : child_hosts) { + if (!ptr_host) continue; - if (it->second->get_current_state() != host::state_unreachable) { + if (ptr_host->get_current_state() != host::state_unreachable) { engine_logger(dbg_checks, more) - << "Check of child host '" << it->first << "' queued."; + << "Check of child host '" << key << "' queued."; SPDLOG_LOGGER_DEBUG(checks_logger, - "Check of child host '{}' queued.", it->first); - check_hostlist.push_back(it->second); + "Check of child host '{}' queued.", key); + check_hostlist.push_back(ptr_host); } } @@ -3739,22 +3734,20 @@ enum host::host_state host::determine_host_reachability( /* check all parent hosts to see if we're DOWN or UNREACHABLE */ else { - for (host_map_unsafe::iterator it{parent_hosts.begin()}, - end{parent_hosts.end()}; - it != end; it++) { - if (!it->second) + for (const auto& [key, sptr_host] : parent_hosts) { + if (!sptr_host) continue; /* bail out as soon as we find one parent host that is UP */ - if (it->second->get_current_state() == host::state_up) { + if (sptr_host->get_current_state() == host::state_up) { is_host_present = true; /* set the current state */ state = host::state_down; - engine_logger(dbg_checks, most) << "At least one parent (" << it->first - << ") is up, so host is DOWN."; + engine_logger(dbg_checks, most) + << "At least one parent (" << key << ") is up, so host is DOWN."; SPDLOG_LOGGER_DEBUG(checks_logger, "At least one parent ({}) is up, so host is DOWN.", - it->first); + key); break; } } @@ -4102,22 +4095,20 @@ void host::resolve(uint32_t& w, uint32_t& e) { } /* check all parent parent host */ - for (host_map_unsafe::iterator it(parent_hosts.begin()), - end(parent_hosts.end()); - it != end; it++) { - host_map::const_iterator it_host{host::hosts.find(it->first)}; + for (auto& [key, sptr_host] : parent_hosts) { + host_map::const_iterator it_host{host::hosts.find(key)}; if (it_host == host::hosts.end() || !it_host->second) { - engine_logger(log_verification_error, basic) << "Error: '" << it->first + engine_logger(log_verification_error, basic) << "Error: '" << key << "' is not a " "valid parent for host '" << name() << "'!"; config_logger->error("Error: '{}' is not a valid parent for host '{}'!", - it->first, name()); + key, name()); errors++; } else { - it->second = it_host->second.get(); - it_host->second->add_child_host( - this); // add a reverse (child) link to make searches faster later on + sptr_host = it_host->second; + it_host->second->add_child_host(this); // add a reverse (child) link to + // make searches faster later on } } diff --git a/engine/src/macros/grab_host.cc b/engine/src/macros/grab_host.cc index c611278d665..22cf6899213 100644 --- a/engine/src/macros/grab_host.cc +++ b/engine/src/macros/grab_host.cc @@ -184,12 +184,10 @@ std::string get_host_total_services(host& hst, nagios_macros* mac) { static std::string get_host_parents(host& hst, nagios_macros* mac) { (void)mac; std::string retval; - for (host_map_unsafe::const_iterator it(hst.parent_hosts.begin()), - end(hst.parent_hosts.end()); - it != end; it++) { + for (const auto& [key, _] : hst.parent_hosts) { if (!retval.empty()) retval.append(","); - retval.append(it->first); + retval.append(key); } return retval; } @@ -205,12 +203,10 @@ static std::string get_host_parents(host& hst, nagios_macros* mac) { static std::string get_host_children(host& hst, nagios_macros* mac) { (void)mac; std::string retval; - for (host_map_unsafe::const_iterator it(hst.child_hosts.begin()), - end(hst.child_hosts.end()); - it != end; it++) { + for (const auto& [key, _] : hst.child_hosts) { if (!retval.empty()) retval.append(","); - retval.append(it->first); + retval.append(key); } return retval; } diff --git a/tests/broker-engine/parent_child_relation.robot b/tests/broker-engine/parent_child_relation.robot new file mode 100644 index 00000000000..e469e94d252 --- /dev/null +++ b/tests/broker-engine/parent_child_relation.robot @@ -0,0 +1,260 @@ +*** Settings *** +Documentation Centreon Engine/Broker verify relation parent child host. + +Resource ../resources/import.resource + +Suite Setup Ctn Clean Before Suite +Suite Teardown Ctn Clean After Suite +Test Setup Ctn Stop Processes +Test Teardown Ctn Save Logs If Failed + + +*** Test Cases *** + +EBPN0 + [Documentation] Verify if child is in queue when parent is down. + [Tags] broker engine MON-151686 + + Ctn Config Engine ${1} ${5} ${1} + Ctn Config Broker rrd + Ctn Config Broker central + Ctn Config Broker module + Ctn Config BBDO3 1 + + Ctn Broker Config Log rrd rrd trace + Ctn Broker Config Log central sql debug + Ctn Broker Config Log rrd core error + Ctn Engine Config Set Value 0 log_level_checks debug + Ctn Config Broker Sql Output central unified_sql 10 + Ctn Broker Config Flush Log central 0 + Ctn Broker Config Flush Log rrd 0 + + Ctn Clear Retention + Ctn Clear Db resources + + # force the check result to 2 + Ctn Config Host Command Status ${0} checkh1 2 + + # host_1 is parent of host_2 + Ctn Add Parent To Host 0 host_2 host_1 + + ${start} Get Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Wait For Engine To Be Ready ${start} ${1} + + # check if host_2 is child of host_1 + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + + FOR ${index} IN RANGE 30 + ${output} Query + ... SELECT child_id, parent_id FROM hosts_hosts_parents + Log To Console ${output} + Sleep 1s + IF "${output}" == "((2, 1),)" BREAK + END + Should Be Equal As Strings ${output} ((2, 1),) host parent not inserted + + # check if host_1 is pending + ${result} Ctn Check Host Status host_1 4 1 True + Should Be True ${result} host_1 should be pending + + ${result} Ctn Check Host Status host_2 4 1 True + Should Be True ${result} host_2 should be pending + + ${content} Create List INITIAL HOST STATE: host_1; + ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 + Should Be True + ... ${result} + ... An Initial host state on host_1 should be raised before we can start our external commands. + + Ctn Process Host Check Result host_1 0 host_1 UP + + FOR ${i} IN RANGE ${4} + Ctn Schedule Forced Host Check host_1 ${VarRoot}/lib/centreon-engine/config0/rw/centengine.cmd + Sleep 1s + END + + ${content} Create List + ... EXTERNAL COMMAND: SCHEDULE_FORCED_HOST_CHECK;host_1; + ... HOST ALERT: host_1;DOWN;HARD; + + ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 + Should Be True ${result} Message about SCHEDULE HOST should be down in log. + + ${result} Ctn Check Host Status host_1 1 1 True + Should Be True ${result} host_1 should be down/hard + + ${content} Create List + ... Check of child host 'host_2' queued. + ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 + Should Be True ${result} Check of child host 'host_2' should be queued. + + Disconnect From Database + Ctn Stop Engine + Ctn Kindly Stop Broker + +EBPN1 + [Documentation] verify relation parent child when delete parent. + [Tags] broker engine MON-151686 + + Ctn Config Engine ${1} ${5} ${1} + Ctn Config Broker rrd + Ctn Config Broker central + Ctn Config Broker module + Ctn Config BBDO3 1 + + Ctn Broker Config Log rrd rrd error + Ctn Broker Config Log rrd core error + Ctn Broker Config Log module0 core error + + Ctn Broker Config Log central sql debug + Ctn Engine Config Set Value 0 log_level_checks error + Ctn Config Broker Sql Output central unified_sql 10 + Ctn Broker Config Flush Log central 0 + Ctn Broker Config Flush Log rrd 0 + + Ctn Clear Retention + + # host_1 is parent of host_2 + Ctn Add Parent To Host 0 host_2 host_1 + + ${start} Get Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Wait For Engine To Be Ready ${start} ${1} + + ${output} Ctn Get Host Info Grpc ${2} + Log To Console parents:${output}[parentHosts] + Should Contain ${output}[parentHosts] host_1 parentHosts + + ${output} Ctn Get Host Info Grpc ${1} + Log To Console childs:${output}[childHosts] + Should Contain ${output}[childHosts] host_2 childHosts + + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + + FOR ${index} IN RANGE 30 + ${output} Query + ... SELECT child_id, parent_id FROM hosts_hosts_parents + Log To Console ${output} + Sleep 1s + IF "${output}" == "((2, 1),)" BREAK + END + Should Be Equal As Strings ${output} ((2, 1),) the parent link not inserted + + Ctn Engine Config Del Block In Cfg ${0} host host_1 hosts.cfg + Ctn Engine Config Del Block In Cfg ${0} service host_1 services.cfg + Ctn Engine Config Delete Value In Hosts ${0} host_2 parents + + ${start} Get Current Date + Ctn Reload Engine + Ctn Wait For Engine To Be Ready ${start} ${1} + ${content} Create List Reload configuration finished + ${result} Ctn Find In Log With Timeout + ... ${ENGINE_LOG}/config0/centengine.log + ... ${start} + ... ${content} + ... 60 + ... verbose=False + Should Be True ${result} Engine is Not Ready after 60s!! + + + ${output} Ctn Get Host Info Grpc ${2} + Log To Console parents:${output}[parentHosts] + Should Be Empty ${output}[parentHosts] + + FOR ${index} IN RANGE 30 + ${output} Query + ... SELECT child_id, parent_id FROM hosts_hosts_parents + Log To Console ${output} + Sleep 1s + IF "${output}" == "()" BREAK + END + Should Be Equal As Strings ${output} () the parent link should be deleted + + Disconnect From Database + Ctn Stop Engine + Ctn Kindly Stop Broker + +EBPN2 + [Documentation] verify relation parent child when delete child. + [Tags] broker engine MON-151686 + + Ctn Config Engine ${1} ${5} ${1} + Ctn Config Broker rrd + Ctn Config Broker central + Ctn Config Broker module + Ctn Config BBDO3 1 + + Ctn Broker Config Log rrd rrd error + Ctn Broker Config Log rrd core error + Ctn Broker Config Log module0 core error + + Ctn Broker Config Log central sql debug + Ctn Engine Config Set Value 0 log_level_checks error + Ctn Config Broker Sql Output central unified_sql 10 + Ctn Broker Config Flush Log central 0 + Ctn Broker Config Flush Log rrd 0 + + Ctn Clear Retention + + # host_1 is parent of host_2 + Ctn Add Parent To Host 0 host_2 host_1 + + ${start} Get Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Wait For Engine To Be Ready ${start} ${1} + + ${output} Ctn Get Host Info Grpc ${2} + Log To Console parents:${output}[parentHosts] + Should Contain ${output}[parentHosts] host_1 parentHosts + + ${output} Ctn Get Host Info Grpc ${1} + Log To Console childs:${output}[childHosts] + Should Contain ${output}[childHosts] host_2 childHosts + + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + + FOR ${index} IN RANGE 30 + ${output} Query + ... SELECT child_id, parent_id FROM hosts_hosts_parents + Log To Console ${output} + Sleep 1s + IF "${output}" == "((2, 1),)" BREAK + END + Should Be Equal As Strings ${output} ((2, 1),) the parent link not inserted + + Ctn Engine Config Del Block In Cfg ${0} host host_2 hosts.cfg + Ctn Engine Config Del Block In Cfg ${0} service host_2 services.cfg + Ctn Engine Config Delete Value In Hosts ${0} host_2 parents + + ${start} Get Current Date + Ctn Reload Engine + Ctn Wait For Engine To Be Ready ${start} ${1} + ${content} Create List Reload configuration finished + ${result} Ctn Find In Log With Timeout + ... ${ENGINE_LOG}/config0/centengine.log + ... ${start} + ... ${content} + ... 60 + ... verbose=False + Should Be True ${result} Engine is Not Ready after 60s!! + + ${output} Ctn Get Host Info Grpc ${1} + Log To Console childs:${output}[childHosts] + Should Be Empty ${output}[childHosts] + + FOR ${index} IN RANGE 30 + ${output} Query + ... SELECT child_id, parent_id FROM hosts_hosts_parents + Log To Console ${output} + Sleep 1s + IF "${output}" == "()" BREAK + END + Should Be Equal As Strings ${output} () the parent link should be deleted + + Disconnect From Database + Ctn Stop Engine + Ctn Kindly Stop Broker \ No newline at end of file diff --git a/tests/resources/Engine.py b/tests/resources/Engine.py index 4df82695dea..019bdd681a8 100755 --- a/tests/resources/Engine.py +++ b/tests/resources/Engine.py @@ -24,6 +24,7 @@ import math from google.protobuf import empty_pb2 from google.protobuf.timestamp_pb2 import Timestamp +from google.protobuf.json_format import MessageToDict import engine_pb2 import engine_pb2_grpc import opentelemetry.proto.collector.metrics.v1.metrics_service_pb2 @@ -3720,3 +3721,58 @@ def ctn_send_otl_to_engine(port: int, resource_metrics: list): logger.console("gRPC server not ready") +def ctn_engine_config_del_block_in_cfg(idx: int, type: str, key: str, file): + """ + Delete a element in the file given for the Engine configuration idx. + + Args: + idx (int): Index of the Engine configuration (from 0) + type (str): The type (host/service/...). + key (str): The parameter that will be deleted. + file (str): The file to delete the key from. + """ + filename = f"{ETC_ROOT}/centreon-engine/config{idx}/{file}" + + with open(filename, "r") as f: + content = f.read() + + if type == "host": + pattern = rf"define host \{{\s*host_name\s+{re.escape(key)}\b.*?\}}" + elif type == "service": + pattern = rf"define service \{{\s*host_name\s+{re.escape(key)}\b.*?\}}" + + # Use re.sub to remove the matched block + new_content = re.sub(pattern, '', content, flags=re.DOTALL) + new_content = re.sub(r'\n\s*\n', '\n', new_content) + + if content != new_content: + with open(filename, "w") as f: + f.write(new_content) + else: + logger.console(f'\n\033[91mFailed : Cannot delete the block with the type : {type} and the key : {key} in {file}\033[0m') + +def ctn_get_host_info_grpc(id:int): + """ + Retrieve host information via a gRPC call. + + Args: + id: The identifier of the host to retrieve. + + Returns: + A dictionary containing the host informations, if successfully retrieved. + """ + if id is not None: + limit = time.time() + 30 + while time.time() < limit: + time.sleep(1) + with grpc.insecure_channel("127.0.0.1:50001") as channel: + stub = engine_pb2_grpc.EngineStub(channel) + request = engine_pb2.HostIdentifier(id=id) + try: + host = stub.GetHost(request) + host_dict = MessageToDict(host, always_print_fields_with_no_presence=True) + return host_dict + except Exception as e: + logger.console(f"gRPC server not ready {e}") + return {} + From 3c2277d4a376195ac6c3fddf103d56894136e737 Mon Sep 17 00:00:00 2001 From: pkippes <144150042+pkippes@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:58:39 +0100 Subject: [PATCH 10/13] chore(release) bump collect to 24.10.2 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 881c4cc326c..e2ce0657192 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,7 +119,7 @@ endif() # Version. set(COLLECT_MAJOR 24) set(COLLECT_MINOR 10) -set(COLLECT_PATCH 1) +set(COLLECT_PATCH 2) set(COLLECT_VERSION "${COLLECT_MAJOR}.${COLLECT_MINOR}.${COLLECT_PATCH}") From 475c0974a82af4510c720e4b0c571734e5ff1a44 Mon Sep 17 00:00:00 2001 From: pkippes <144150042+pkippes@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:59:08 +0100 Subject: [PATCH 11/13] chore(release) bump gorgone to 24.10.1 --- gorgone/.version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gorgone/.version b/gorgone/.version index be51a9cda2e..b199fceb8a9 100644 --- a/gorgone/.version +++ b/gorgone/.version @@ -1 +1 @@ -MINOR=0 +MINOR=1 From df18ba218e9299de562f10d2f79a0912571d3cb8 Mon Sep 17 00:00:00 2001 From: pkippes <144150042+pkippes@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:59:40 +0100 Subject: [PATCH 12/13] chore(release) bump collect to 24.10.2 --- .version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.version b/.version index fc21cce9317..63bda96baa3 100644 --- a/.version +++ b/.version @@ -1,2 +1,2 @@ MAJOR=24.10 -MINOR=1 +MINOR=2 From d72912fd6b9f2db2d9ee76d2dce017498a7f108e Mon Sep 17 00:00:00 2001 From: David Boucher Date: Thu, 21 Nov 2024 07:54:36 +0100 Subject: [PATCH 13/13] fix(broker/sql): two issues in the mysql object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * typo on hosts_hostgroups (#1871) (#1883) Co-authored-by: Stéphane Duret * Fix parsing labels with brackets in their names (but not data source types) Co-authored-by: Vincent Untz * A possible segfault fixed. * An issue on errors raised by mariadb that can have errno=0 now. REFS: MON-153675 --- broker/CMakeLists.txt | 2 +- broker/core/sql/src/mysql_connection.cc | 27 ++- broker/core/sql/src/mysql_multi_insert.cc | 6 +- broker/unified_sql/src/stream_sql.cc | 2 +- common/src/perfdata.cc | 5 +- common/tests/perfdata_test.cc | 15 ++ engine/tests/string/string.cc | 11 ++ .../services-and-bulk-stmt.robot | 156 +++++++++++++++- tests/broker-engine/services-increased.robot | 4 +- tests/resources/Broker.py | 174 +++++++++++++++++- tests/resources/resources.resource | 18 +- 11 files changed, 386 insertions(+), 34 deletions(-) diff --git a/broker/CMakeLists.txt b/broker/CMakeLists.txt index ad2373471fe..a17b6afbf17 100644 --- a/broker/CMakeLists.txt +++ b/broker/CMakeLists.txt @@ -468,7 +468,7 @@ target_link_libraries( # Standalone binary. add_executable(cbd ${SRC_DIR}/main.cc) -add_dependencies(cbd multiplexing centreon_common) +add_dependencies(cbd multiplexing centreon_common pb_neb_lib) # Flags needed to include all symbols in binary. target_link_libraries( diff --git a/broker/core/sql/src/mysql_connection.cc b/broker/core/sql/src/mysql_connection.cc index 5c6d2548bba..13f5fd1be7c 100644 --- a/broker/core/sql/src/mysql_connection.cc +++ b/broker/core/sql/src/mysql_connection.cc @@ -16,6 +16,7 @@ * For more information : contact@centreon.com */ #include +#include #include "com/centreon/broker/config/applier/init.hh" #include "com/centreon/broker/misc/misc.hh" @@ -460,18 +461,26 @@ void mysql_connection::_statement(mysql_task* t) { "mysql_connection {:p}: execute statement {:x} attempt {}: {}", static_cast(this), task->statement_id, attempts, query); if (mysql_stmt_execute(stmt)) { - std::string err_msg( - fmt::format("{} errno={} {}", mysql_error::msg[task->error_code], - ::mysql_errno(_conn), ::mysql_stmt_error(stmt))); - SPDLOG_LOGGER_ERROR(_logger, - "connection fail to execute statement {:p}: {}", - static_cast(this), err_msg); - if (_server_error(::mysql_stmt_errno(stmt))) { + int32_t err_code = ::mysql_stmt_errno(stmt); + std::string err_msg(fmt::format("{} errno={} {}", + mysql_error::msg[task->error_code], + err_code, ::mysql_stmt_error(stmt))); + if (err_code == 0) { + SPDLOG_LOGGER_ERROR(_logger, + "mysql_connection: errno=0, so we simulate a " + "server error CR_SERVER_LOST"); + err_code = CR_SERVER_LOST; + } else { + SPDLOG_LOGGER_ERROR(_logger, + "connection fail to execute statement {:p}: {}", + static_cast(this), err_msg); + } + if (_server_error(err_code)) { set_error_message(err_msg); break; } - if (mysql_stmt_errno(stmt) != 1213 && - mysql_stmt_errno(stmt) != 1205) // Dead Lock error + if (err_code != ER_LOCK_DEADLOCK && + err_code != ER_LOCK_WAIT_TIMEOUT) // Dead Lock error attempts = MAX_ATTEMPTS; if (mysql_commit(_conn)) { diff --git a/broker/core/sql/src/mysql_multi_insert.cc b/broker/core/sql/src/mysql_multi_insert.cc index cafc020e386..7d375cb82cd 100644 --- a/broker/core/sql/src/mysql_multi_insert.cc +++ b/broker/core/sql/src/mysql_multi_insert.cc @@ -132,7 +132,11 @@ void bulk_or_multi::execute(mysql& connexion, my_error::code ec, int thread_id) { if (_bulk_stmt) { - if (!_bulk_bind->empty()) { + /* If the database connection is lost, we can have this issue */ + if (!_bulk_bind) { + _bulk_bind = _bulk_stmt->create_bind(); + _bulk_bind->reserve(_bulk_row); + } else if (!_bulk_bind->empty()) { _bulk_stmt->set_bind(std::move(_bulk_bind)); connexion.run_statement(*_bulk_stmt, ec, thread_id); _bulk_bind = _bulk_stmt->create_bind(); diff --git a/broker/unified_sql/src/stream_sql.cc b/broker/unified_sql/src/stream_sql.cc index 5ad2836dbeb..c436c36805d 100644 --- a/broker/unified_sql/src/stream_sql.cc +++ b/broker/unified_sql/src/stream_sql.cc @@ -1503,7 +1503,7 @@ void stream::_process_pb_host_group_member(const std::shared_ptr& d) { } std::string query = fmt::format( - "DELETE FROM hosts_hostgroup WHERE host_id={} and hostgroup_id = {}", + "DELETE FROM hosts_hostgroups WHERE host_id={} and hostgroup_id = {}", hgm.host_id(), hgm.hostgroup_id()); _mysql.run_query(query, database::mysql_error::delete_host_group_member, diff --git a/common/src/perfdata.cc b/common/src/perfdata.cc index 80945b75950..0d6f5b89af3 100644 --- a/common/src/perfdata.cc +++ b/common/src/perfdata.cc @@ -265,18 +265,21 @@ std::list perfdata::parse_perfdata( /* The label is given by s and finishes at end */ if (*end == ']') { - --end; if (strncmp(s, "a[", 2) == 0) { s += 2; + --end; p._value_type = perfdata::data_type::absolute; } else if (strncmp(s, "c[", 2) == 0) { s += 2; + --end; p._value_type = perfdata::data_type::counter; } else if (strncmp(s, "d[", 2) == 0) { s += 2; + --end; p._value_type = perfdata::data_type::derive; } else if (strncmp(s, "g[", 2) == 0) { s += 2; + --end; p._value_type = perfdata::data_type::gauge; } } diff --git a/common/tests/perfdata_test.cc b/common/tests/perfdata_test.cc index bab234f9522..c64d9fe623a 100644 --- a/common/tests/perfdata_test.cc +++ b/common/tests/perfdata_test.cc @@ -623,3 +623,18 @@ TEST_F(PerfdataParser, BadMetric1) { ++i; } } + +TEST_F(PerfdataParser, ExtractPerfdataBrackets) { + std::string perfdata( + "'xx[aa a aa]'=2;3;7;1;9 '[a aa]'=12;25;50;0;118 'aa a]'=28;13;54;0;80"); + auto lst{common::perfdata::parse_perfdata(0, 0, perfdata.c_str(), _logger)}; + auto it = lst.begin(); + ASSERT_NE(it, lst.end()); + ASSERT_EQ(it->name(), "xx[aa a aa]"); + ++it; + ASSERT_NE(it, lst.end()); + ASSERT_EQ(it->name(), "[a aa]"); + ++it; + ASSERT_NE(it, lst.end()); + ASSERT_EQ(it->name(), "aa a]"); +} diff --git a/engine/tests/string/string.cc b/engine/tests/string/string.cc index 3486ba7e15d..e0adeb7217d 100644 --- a/engine/tests/string/string.cc +++ b/engine/tests/string/string.cc @@ -62,6 +62,17 @@ TEST(string_utils, extractPerfdataGaugeDiff) { "d[aa a]=28;13;54;0;80"); } +TEST(string_utils, extractPerfdataBrackets) { + std::string perfdata( + "'xx[aa a aa]'=2;3;7;1;9 '[a aa]'=12;25;50;0;118 'aa a]'=28;13;54;0;80"); + ASSERT_EQ(string::extract_perfdata(perfdata, "xx[aa a aa]"), + "'xx[aa a aa]'=2;3;7;1;9"); + ASSERT_EQ(string::extract_perfdata(perfdata, "[a aa]"), + "'[a aa]'=12;25;50;0;118"); + ASSERT_EQ(string::extract_perfdata(perfdata, "aa a]"), + "'aa a]'=28;13;54;0;80"); +} + TEST(string_utils, removeThresholdsWithoutThresholds) { std::string perfdata("a=2V"); ASSERT_EQ(string::remove_thresholds(perfdata), "a=2V"); diff --git a/tests/broker-engine/services-and-bulk-stmt.robot b/tests/broker-engine/services-and-bulk-stmt.robot index c6d070e8b05..4bb3255a524 100644 --- a/tests/broker-engine/services-and-bulk-stmt.robot +++ b/tests/broker-engine/services-and-bulk-stmt.robot @@ -29,7 +29,7 @@ EBBPS1 ${start} Get Current Date ${start_broker} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine Ctn Wait For Engine To Be Ready ${start} FOR ${i} IN RANGE ${1000} @@ -52,6 +52,7 @@ EBBPS1 IF "${output}" == "((0,),)" BREAK END Should Be Equal As Strings ${output} ((0,),) + Disconnect From Database FOR ${i} IN RANGE ${1000} Ctn Process Service Check Result host_1 service_${i+1} 2 warning${i} @@ -89,6 +90,7 @@ EBBPS1 IF "${output}" == "((0,),)" BREAK END Should Be Equal As Strings ${output} ((0,),) + Disconnect From Database EBBPS2 [Documentation] 1000 service check results are sent to the poller. The test is done with the unified_sql stream, no service status is lost, we find the 1000 results in the database: table services. @@ -109,7 +111,7 @@ EBBPS2 ${start} Get Current Date ${start_broker} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine ${content} Create List INITIAL SERVICE STATE: host_1;service_1000; ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 Should Be True @@ -135,6 +137,7 @@ EBBPS2 IF "${output}" == "((0,),)" BREAK END Should Be Equal As Strings ${output} ((0,),) + Disconnect From Database FOR ${i} IN RANGE ${1000} Ctn Process Service Check Result host_1 service_${i+1} 2 critical${i} @@ -171,6 +174,7 @@ EBBPS2 IF "${output}" == "((0,),)" BREAK END Should Be Equal As Strings ${output} ((0,),) + Disconnect From Database EBMSSM [Documentation] 1000 services are configured with 100 metrics each. The rrd output is removed from the broker configuration. GetSqlManagerStats is called to measure writes into data_bin. @@ -191,7 +195,7 @@ EBMSSM Ctn Clear Retention ${start} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine Ctn Broker Set Sql Manager Stats 51001 5 5 # Let's wait for the external command check start @@ -217,6 +221,7 @@ EBMSSM Sleep 1s END Should Be True ${output[0][0]} >= 100000 + Disconnect From Database EBPS2 [Documentation] 1000 services are configured with 20 metrics each. The rrd output is removed from the broker configuration to avoid to write too many rrd files. While metrics are written in bulk, the database is stopped. This must not crash broker. @@ -240,7 +245,7 @@ EBPS2 ${start} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine # Let's wait for the external command check start ${content} Create List check_for_external_commands() ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 @@ -294,7 +299,7 @@ RLCode ${start} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine ${content} Create List check_for_external_commands() ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 @@ -364,7 +369,7 @@ metric_mapping ${start} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine ${content} Create List check_for_external_commands() ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60 @@ -404,7 +409,7 @@ Services_and_bulks_${id} ${start} Get Current Date Ctn Start Broker - Ctn Start engine + Ctn Start Engine Ctn Broker Set Sql Manager Stats 51001 5 5 # Let's wait for the external command check start @@ -435,6 +440,143 @@ Services_and_bulks_${id} ... 1 1020 ... 2 150 +EBMSSMDBD + [Documentation] 1000 services are configured with 100 metrics each. + ... The rrd output is removed from the broker configuration. + ... While metrics are written in the database, we stop the database and then restart it. + ... Broker must recover its connection to the database and continue to write metrics. + [Tags] broker engine unified_sql MON-152743 + Ctn Clear Metrics + Ctn Config Engine ${1} ${1} ${1000} + # We want all the services to be passive to avoid parasite checks during our test. + Ctn Set Services Passive ${0} service_.* + Ctn Config Broker central + Ctn Config Broker rrd + Ctn Config Broker module ${1} + Ctn Config BBDO3 1 + Ctn Broker Config Log central core error + Ctn Broker Config Log central tcp error + Ctn Broker Config Log central sql debug + Ctn Config Broker Sql Output central unified_sql + Ctn Config Broker Remove Rrd Output central + Ctn Clear Retention + ${start} Get Current Date + Ctn Start Broker + Ctn Start Engine + + Ctn Wait For Engine To Be Ready ${start} 1 + + ${start} Ctn Get Round Current Date + # Let's wait for one "INSERT INTO data_bin" to appear in stats. + Log To Console Many service checks with 100 metrics each are processed. + FOR ${i} IN RANGE ${1000} + Ctn Process Service Check Result With Metrics host_1 service_${i+1} 1 warning${i} 100 + END + + Log To Console We wait for at least one metric to be written in the database. + # Let's wait for all force checks to be in the storage database. + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + FOR ${i} IN RANGE ${500} + ${output} Query + ... SELECT COUNT(s.last_check) FROM metrics m LEFT JOIN index_data i ON m.index_id = i.id LEFT JOIN services s ON s.host_id = i.host_id AND s.service_id = i.service_id WHERE metric_name LIKE "metric_%%" AND s.last_check >= ${start} + IF ${output[0][0]} >= 1 BREAK + Sleep 1s + END + Disconnect From Database + + Log To Console Let's start some database manipulation... + ${start} Get Current Date + + FOR ${i} IN RANGE ${3} + Ctn Stop Mysql + Sleep 10s + Ctn Start Mysql + ${content} Create List could not insert data in data_bin + ${result} Ctn Find In Log With Timeout ${centralLog} ${start} ${content} 10 + Log To Console ${result} + END + +EBMSSMPART + [Documentation] 1000 services are configured with 100 metrics each. + ... The rrd output is removed from the broker configuration. + ... The data_bin table is configured with two partitions p1 and p2 such + ... that p1 contains old data and p2 contains current data. + ... While metrics are written in the database, we remove the p2 partition. + ... Once the p2 partition is recreated, broker must recover its connection + ... to the database and continue to write metrics. + ... To check that last point, we force a last service check and we check + ... that its metrics are written in the database. + [Tags] broker engine unified_sql MON-152743 + Ctn Clear Metrics + Ctn Config Engine ${1} ${1} ${1000} + # We want all the services to be passive to avoid parasite checks during our test. + Ctn Set Services Passive ${0} service_.* + Ctn Config Broker central + Ctn Config Broker rrd + Ctn Config Broker module ${1} + Ctn Config BBDO3 1 + Ctn Broker Config Log central core error + Ctn Broker Config Log central tcp error + Ctn Broker Config Log central sql trace + Ctn Config Broker Sql Output central unified_sql + Ctn Config Broker Remove Rrd Output central + Ctn Clear Retention + + Ctn Prepare Partitions For Data Bin + ${start} Get Current Date + Ctn Start Broker + Ctn Start Engine + + Ctn Wait For Engine To Be Ready ${start} 1 + + ${start} Ctn Get Round Current Date + # Let's wait for one "INSERT INTO data_bin" to appear in stats. + Log To Console Many service checks with 100 metrics each are processed. + FOR ${i} IN RANGE ${1000} + Ctn Process Service Check Result With Metrics host_1 service_${i+1} 1 warning${i} 100 + END + + Log To Console We wait for at least one metric to be written in the database. + # Let's wait for all force checks to be in the storage database. + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + FOR ${i} IN RANGE ${500} + ${output} Query + ... SELECT COUNT(s.last_check) FROM metrics m LEFT JOIN index_data i ON m.index_id = i.id LEFT JOIN services s ON s.host_id = i.host_id AND s.service_id = i.service_id WHERE metric_name LIKE "metric_%%" AND s.last_check >= ${start} + IF ${output[0][0]} >= 1 BREAK + Sleep 1s + END + Disconnect From Database + + Log To Console Let's start some database manipulation... + Ctn Remove P2 From Data Bin + ${start} Get Current Date + + ${content} Create List errno= + FOR ${i} IN RANGE ${6} + ${result} Ctn Find In Log With Timeout ${centralLog} ${start} ${content} 10 + IF ${result} BREAK + END + + Log To Console Let's recreate the p2 partition... + Ctn Add P2 To Data Bin + + ${start} Ctn Get Round Current Date + Ctn Process Service Check Result With Metrics host_1 service_1 0 Last Output OK 100 + + Log To Console Let's wait for the last service check to be in the database... + Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort} + FOR ${i} IN RANGE ${120} + ${output} Query SELECT count(*) FROM data_bin WHERE ctime >= ${start} - 10 + Log To Console ${output} + IF ${output[0][0]} >= 100 BREAK + Sleep 1s + END + Log To Console ${output} + Should Be True ${output[0][0]} >= 100 + Disconnect From Database + + Ctn Init Data Bin Without Partition + *** Keywords *** Ctn Test Clean diff --git a/tests/broker-engine/services-increased.robot b/tests/broker-engine/services-increased.robot index d64909f265f..b6c2ad86e52 100644 --- a/tests/broker-engine/services-increased.robot +++ b/tests/broker-engine/services-increased.robot @@ -42,7 +42,7 @@ EBNSVC1 ${result} Ctn Check Number Of Resources Monitored By Poller Is ${3} ${nb_res} 30 Should Be True ${result} Poller 3 should monitor ${nb_srv} services and 16 hosts. END - Ctn Stop engine + Ctn Stop Engine Ctn Kindly Stop Broker Service_increased_huge_check_interval @@ -154,4 +154,4 @@ Service_increased_huge_check_interval ... rra[0].pdp_per_row must be equal to 5400 for metric ${m} END - [Teardown] Run Keywords Ctn Stop engine AND Ctn Kindly Stop Broker + [Teardown] Run Keywords Ctn Stop Engine AND Ctn Kindly Stop Broker diff --git a/tests/resources/Broker.py b/tests/resources/Broker.py index e23c0fd98a7..61acb9c12f2 100755 --- a/tests/resources/Broker.py +++ b/tests/resources/Broker.py @@ -1690,7 +1690,7 @@ def ctn_get_service_index(host_id: int, service_id: int, timeout: int = 60): my_id = [r['id'] for r in result] if len(my_id) > 0: logger.console( - f"Index data {id} found for service {host_id}:{service_id}") + f"Index data {id} found for service {host_id}:{service_id}") return my_id[0] time.sleep(2) logger.console(f"no index data found for service {host_id}:{service_id}") @@ -1780,7 +1780,6 @@ def ctn_compare_metrics_of_service(service_id: int, metrics: list, timeout: int return False - def ctn_get_not_existing_metrics(count: int): """ Return a list of metrics that does not exist. @@ -2082,14 +2081,16 @@ def ctn_get_indexes_to_rebuild(count: int, nb_day=180): dt = now.replace(hour=0, minute=0, second=0, microsecond=0) start = dt - datetime.timedelta(days=nb_day) start = int(start.timestamp()) - logger.console(f">>>>>>>>>> start = {datetime.datetime.fromtimestamp(start)}") + logger.console( + f">>>>>>>>>> start = {datetime.datetime.fromtimestamp(start)}") value = int(r['metric_id']) // 2 status_value = index_id % 3 cursor.execute("DELETE FROM data_bin WHERE id_metric={} AND ctime >= {}".format( r['metric_id'], start)) # We set the value to a constant on 180 days now = int(now.timestamp()) - logger.console(f">>>>>>>>>> end = {datetime.datetime.fromtimestamp(now)}") + logger.console( + f">>>>>>>>>> end = {datetime.datetime.fromtimestamp(now)}") for i in range(start, now, 60 * 5): if i == start: logger.console( @@ -2974,3 +2975,168 @@ def ctn_get_broker_log_info(port, log, timeout=TIMEOUT): except: logger.console("gRPC server not ready") return str(res) + + +def aes_encrypt(port, app_secret, salt, content, timeout: int = 30): + """ + Send a gRPC command to aes encrypt a content + + Args: + port (int): the port to the gRPC server. + app_secret (str): The APP_SECRET base64 encoded. + salt (str): Salt base64 encoded. + content (str): The content to encrypt. + + Returns: + The encrypted result string or an error message. + """ + limit = time.time() + timeout + encoded = "" + while time.time() < limit: + time.sleep(1) + with grpc.insecure_channel(f"127.0.0.1:{port}") as channel: + stub = broker_pb2_grpc.BrokerStub(channel) + te = broker_pb2.AesMessage() + te.app_secret = app_secret + te.salt = salt + te.content = content + try: + encoded = stub.Aes256Encrypt(te) + break + except grpc.RpcError as rpc_error: + return rpc_error.details() + except: + logger.console("gRPC server not ready") + + return encoded.str_arg + + +def aes_decrypt(port, app_secret, salt, content, timeout: int = 30): + """ + Send a gRPC command to aes decrypt a content + + Args: + port (int): the port to the gRPC server. + app_secret (str): The APP_SECRET base64 encoded. + salt (str): Salt base64 encoded. + content (str): The content to decrypt. + + Returns: + The decrypted result string or an error message. + """ + limit = time.time() + timeout + encoded = "" + while time.time() < limit: + time.sleep(1) + with grpc.insecure_channel(f"127.0.0.1:{port}") as channel: + stub = broker_pb2_grpc.BrokerStub(channel) + te = broker_pb2.AesMessage() + te.app_secret = app_secret + te.salt = salt + te.content = content + try: + encoded = stub.Aes256Decrypt(te) + break + except grpc.RpcError as rpc_error: + return rpc_error.details() + except: + logger.console("gRPC server not ready") + + return encoded.str_arg + + +def ctn_prepare_partitions_for_data_bin(): + """ + Create two partitions for the data_bin table. + The first one named p1 contains data with ctime older than now - 60. + The second one named p2 contains data with ctime older than now + 3600. + """ + connection = pymysql.connect(host=DB_HOST, + user=DB_USER, + password=DB_PASS, + database=DB_NAME_STORAGE, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + now = int(time.time()) + before = now - 60 + after = now + 3600 + with connection: + with connection.cursor() as cursor: + cursor.execute("DROP TABLE IF EXISTS data_bin") + sql = f"""CREATE TABLE `data_bin` ( + `id_metric` int(11) DEFAULT NULL, + `ctime` int(11) DEFAULT NULL, + `value` float DEFAULT NULL, + `status` enum('0','1','2','3','4') DEFAULT NULL, + KEY `index_metric` (`id_metric`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 + PARTITION BY RANGE (`ctime`) +(PARTITION `p1` VALUES LESS THAN ({before}) ENGINE = InnoDB, + PARTITION `p2` VALUES LESS THAN ({after}) ENGINE = InnoDB)""" + cursor.execute(sql) + connection.commit() + + +def ctn_remove_p2_from_data_bin(): + """ + Remove the partition p2 from the data_bin table. + """ + connection = pymysql.connect(host=DB_HOST, + user=DB_USER, + password=DB_PASS, + database=DB_NAME_STORAGE, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + with connection: + with connection.cursor() as cursor: + cursor.execute("ALTER TABLE data_bin DROP PARTITION p2") + connection.commit() + + +def ctn_add_p2_to_data_bin(): + """ + Add the partition p2 the the data_bin table. + """ + connection = pymysql.connect(host=DB_HOST, + user=DB_USER, + password=DB_PASS, + database=DB_NAME_STORAGE, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + after = int(time.time()) + 3600 + with connection: + with connection.cursor() as cursor: + cursor.execute( + f"ALTER TABLE data_bin ADD PARTITION (PARTITION p2 VALUES LESS THAN ({after}))") + connection.commit() + + +def ctn_init_data_bin_without_partition(): + """ + Recreate the data_bin table without partition. + """ + connection = pymysql.connect(host=DB_HOST, + user=DB_USER, + password=DB_PASS, + database=DB_NAME_STORAGE, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor) + + now = int(time.time()) + before = now - 60 + after = now + 3600 + with connection: + with connection.cursor() as cursor: + cursor.execute("DROP TABLE IF EXISTS data_bin") + sql = f"""CREATE TABLE `data_bin` ( + `id_metric` int(11) DEFAULT NULL, + `ctime` int(11) DEFAULT NULL, + `value` float DEFAULT NULL, + `status` enum('0','1','2','3','4') DEFAULT NULL, + KEY `index_metric` (`id_metric`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1""" + cursor.execute(sql) + connection.commit() diff --git a/tests/resources/resources.resource b/tests/resources/resources.resource index d1143f9da95..bcf0b5de4b6 100644 --- a/tests/resources/resources.resource +++ b/tests/resources/resources.resource @@ -369,13 +369,15 @@ Ctn Dump Ba On Error Ctn Process Service Result Hard [Arguments] ${host} ${svc} ${state} ${output} - Repeat Keyword - ... 3 times - ... Ctn Process Service Check Result - ... ${host} - ... ${svc} - ... ${state} - ... ${output} + FOR ${idx} IN RANGE 3 + Ctn Process Service Check Result + ... ${host} + ... ${svc} + ... ${state} + ... ${output} + Sleep 1s + END + Ctn Wait For Engine To Be Ready [Arguments] ${start} ${nbEngine}=1 @@ -385,7 +387,7 @@ Ctn Wait For Engine To Be Ready ${result} Ctn Find In Log With Timeout ... ${ENGINE_LOG}/config${i}/centengine.log ... ${start} ${content} 60 - ... verbose=False + ... verbose=False Should Be True ... ${result} ... A message telling check_for_external_commands() should be available in config${i}/centengine.log.