diff --git a/.github/actions/common/artifact_utils.py b/.github/actions/common/artifact_utils.py index 58c96fbe5dedd6..cbef0e76024b95 100644 --- a/.github/actions/common/artifact_utils.py +++ b/.github/actions/common/artifact_utils.py @@ -9,7 +9,9 @@ def add_common_args(parser: argparse.ArgumentParser): parser.add_argument('-s', '--commit_sha', help='Commit hash for which artifacts were generated', required=True) parser.add_argument('-b', '--branch_name', help='Name of GitHub branch', required=False, - default=os.getenv('GITHUB_BASE_REF') or os.getenv('GITHUB_REF_NAME')) + default=os.getenv('GITHUB_BASE_REF') or + os.getenv('MERGE_QUEUE_BASE_REF').replace('refs/heads/', '') or + os.getenv('GITHUB_REF_NAME')) parser.add_argument('-e', '--event_name', help='Name of GitHub event', required=False, default=os.getenv('GITHUB_EVENT_NAME')) parser.add_argument('--storage_root', help='Root path of the artifacts storage', required=False, diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index 8cc865c5f5b461..131abb59b5e252 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -145,7 +145,7 @@ runs: - name: Upload commit OpenVINO archives if: steps.openvino_commit_download.outcome == 'success' && !inputs.install_dir - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: ${{ steps.openvino_commit_output.outputs.ov_artifact_name }} path: ${{ steps.openvino_commit_output.outputs.ov_package_path }} @@ -188,7 +188,7 @@ runs: - name: Upload OpenVINO archives if: steps.openvino_s3_download.outcome == 'success' && !inputs.install_dir - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: ${{ steps.openvino_s3_download.outputs.ov_artifact_name }} path: ${{ steps.openvino_s3_download.outputs.ov_package_path }} diff --git a/.github/actions/restore_artifacts/action.yml b/.github/actions/restore_artifacts/action.yml index efa0dfe87ae28f..df5b857cd93f58 100644 --- a/.github/actions/restore_artifacts/action.yml +++ b/.github/actions/restore_artifacts/action.yml @@ -60,3 +60,4 @@ runs: EVENT_PARAM: "-e ${{ inputs.event_name }}" TO_RESTORE: "-r ${{ inputs.to_restore }}" DEFAULT_TARGET_DIR: "${{ env.GITHUB_WORKSPACE }}/${{ inputs.storage_dir || inputs.platform }}" + MERGE_QUEUE_BASE_REF: "${{ github.event.merge_group.base_ref }}" diff --git a/.github/actions/store_artifacts/action.yml b/.github/actions/store_artifacts/action.yml index 4e50953d8c9874..b652f707b75fcd 100644 --- a/.github/actions/store_artifacts/action.yml +++ b/.github/actions/store_artifacts/action.yml @@ -60,3 +60,4 @@ runs: EVENT_PARAM: "-e ${{ inputs.event_name }}" STORAGE_PARAM: "--storage_dir ${{ inputs.storage_dir }}" PLATFORM_PARAM: "--platform ${{ inputs.platform }}" + MERGE_QUEUE_BASE_REF: "" diff --git a/.github/actions/store_artifacts/store_artifacts.py b/.github/actions/store_artifacts/store_artifacts.py index 75cd452b562070..7b761d042b6aae 100644 --- a/.github/actions/store_artifacts/store_artifacts.py +++ b/.github/actions/store_artifacts/store_artifacts.py @@ -98,13 +98,14 @@ def main(): with open(storage / 'workflow_link.txt', 'w') as file: file.write(workflow_link) - latest_artifacts_for_branch = artifact_utils.get_latest_artifacts_link(storage_dir, args.storage_root, - args.branch_name, args.event_name) - # Overwrite path to "latest" built artifacts only if a given commit is the head of a given branch - if args.event_name != 'pull_request' and args.commit_sha == os.getenv('GITHUB_SHA'): - # TODO: lock to avoid corruption in case of a parallel build (unlikely event for now, but still) - with open(latest_artifacts_for_branch, 'w') as file: - file.write(str(storage.relative_to(storage_root))) + if not error_found: + latest_artifacts_for_branch = artifact_utils.get_latest_artifacts_link(storage_dir, args.storage_root, + args.branch_name, args.event_name) + # Overwrite path to "latest" built artifacts only if a given commit is the head of a given branch + if args.event_name != 'pull_request' and args.commit_sha == os.getenv('GITHUB_SHA'): + # TODO: lock to avoid corruption in case of a parallel build (unlikely event for now, but still) + with open(latest_artifacts_for_branch, 'w') as file: + file.write(str(storage.relative_to(storage_root))) logger.debug(f"Copying finished") (storage / 'copying_finished').touch() diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 413b6019a3b0be..71aa82072f1638 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-26586 \ No newline at end of file +pr-26954 \ No newline at end of file diff --git a/.github/dockerfiles/ov_build/debian_10_arm/Dockerfile b/.github/dockerfiles/ov_build/debian_10_arm/Dockerfile index 85cabbd04020dd..0f64afbb20fdc2 100644 --- a/.github/dockerfiles/ov_build/debian_10_arm/Dockerfile +++ b/.github/dockerfiles/ov_build/debian_10_arm/Dockerfile @@ -19,6 +19,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Pythons \ python3 \ python3-pip \ diff --git a/.github/dockerfiles/ov_build/fedora_29/Dockerfile b/.github/dockerfiles/ov_build/fedora_29/Dockerfile index 17797df62f08db..e5f400e2915e9c 100644 --- a/.github/dockerfiles/ov_build/fedora_29/Dockerfile +++ b/.github/dockerfiles/ov_build/fedora_29/Dockerfile @@ -18,6 +18,8 @@ RUN yum update -y && yum install -y \ tar \ gcc \ gcc-c++ \ + # parallel gzip + pigz \ xz # Install build dependencies diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile index 54c1910c7c3385..c7d0e95164f414 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile @@ -23,6 +23,8 @@ RUN apt-get update && \ ca-certificates \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Pythons python3.8-dev \ python3.8-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile index 886ed15a634974..53829ad50b2975 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile @@ -22,6 +22,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Pythons python3.9-dev \ python3.9-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile index 24a78efea17cad..5df369bbb6398a 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update && \ ca-certificates \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Pythons python3.8-dev \ python3.8-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile index 5a34bcbe7615b4..5911016b37d008 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile @@ -30,6 +30,8 @@ RUN apt-get update && \ ca-certificates \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python \ python3-dev \ python3-pip \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64/Dockerfile index 80e3c0259dc482..56dffb11e7ce12 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64/Dockerfile @@ -23,6 +23,8 @@ RUN apt-get update && \ gpg-agent \ tzdata \ libtbb2 \ + # parallel gzip + pigz \ # Pythons \ python3.11-dev \ python3.11-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile index ac924bbc8171b9..4e044eab71ef84 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update && \ gpg-agent \ tzdata \ libtbb2 \ + # parallel gzip + pigz \ # Pythons python3.8-dev \ python3.8-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_dpcpp/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_dpcpp/Dockerfile index 8146ca89a99094..03e8246d6ddc32 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_dpcpp/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_dpcpp/Dockerfile @@ -25,6 +25,8 @@ RUN apt-get update && \ gpg-agent \ tzdata \ libtbb2 \ + # parallel gzip + pigz \ # Pythons \ python3.11-dev \ python3.11-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_nvidia/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_nvidia/Dockerfile index 1d92c6e92b4d6b..19f3e136995dc3 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_nvidia/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_nvidia/Dockerfile @@ -23,6 +23,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3.11-dev \ python3.11-venv \ diff --git a/.github/dockerfiles/ov_build/ubuntu_24_04_x64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_24_04_x64/Dockerfile index e92026bdefae2e..82a51ffc0b175e 100644 --- a/.github/dockerfiles/ov_build/ubuntu_24_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_24_04_x64/Dockerfile @@ -22,6 +22,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3-dev \ python3-venv \ diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile index 3f2359fbd61358..e46a1a2ea03ef6 100644 --- a/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile +++ b/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile @@ -23,6 +23,8 @@ RUN apt-get update && \ ca-certificates \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3.11-dev \ python3.11-venv \ diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile index 38c377724f3ffe..9c6b4b08f498fa 100644 --- a/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile @@ -22,6 +22,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3.9-dev \ python3.9-venv \ diff --git a/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile index 410a3f401949f2..2c68ea12d7b58f 100644 --- a/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile @@ -23,6 +23,8 @@ RUN apt-get update && \ ca-certificates \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3.11-dev \ python3.11-venv \ diff --git a/.github/dockerfiles/ov_test/ubuntu_24_04_x64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_24_04_x64/Dockerfile index aa09c6015fddf2..4cb29c0174476f 100644 --- a/.github/dockerfiles/ov_test/ubuntu_24_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_test/ubuntu_24_04_x64/Dockerfile @@ -20,6 +20,8 @@ RUN apt-get update && \ git \ gpg-agent \ tzdata \ + # parallel gzip + pigz \ # Python python3-full \ libhdf5-dev \ diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index ec08a12352d0f4..c3c7fc296ade61 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -48,7 +48,7 @@ jobs: run: | # install doxygen wget https://www.doxygen.nl/files/doxygen-$DOXY_VER.linux.bin.tar.gz - tar -xzf doxygen-$DOXY_VER.linux.bin.tar.gz + tar -I pigz -xf doxygen-$DOXY_VER.linux.bin.tar.gz echo "$(pwd)/doxygen-$DOXY_VER/bin/" >> $GITHUB_PATH - name: Validate benchmarks files diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 25daf31604ab18..604ca0fdb81b29 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -103,7 +103,7 @@ jobs: rm -rf ${COVERITY_TOOL_DIR} && mkdir -p ${COVERITY_TOOL_DIR} pushd ${COVERITY_TOOL_DIR} wget https://scan.coverity.com/download/linux64 --progress=bar:force:noscroll --post-data "token=${{ secrets.COVERITY_TOKEN }}&project=openvino" -O coverity_tool.tgz - tar xvf coverity_tool.tgz && rm coverity_tool.tgz + tar -I pigz -xf coverity_tool.tgz && rm coverity_tool.tgz popd - name: Cmake build - OpenVINO with Coverity @@ -112,7 +112,7 @@ jobs: - name: Pack Artefacts run: | pushd ${BUILD_DIR} - tar -C ${BUILD_DIR} -czvf openvino.tgz cov-int + tar -C ${BUILD_DIR} -I pigz -cvf openvino.tgz cov-int popd - name: Submit artefacts diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index fef0b0dec8b433..c2da4c1b2d2f9c 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -158,11 +158,11 @@ jobs: run: | pushd ${INSTALL_DIR} - tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * popd pushd ${INSTALL_TEST_DIR} - tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * popd # @@ -230,11 +230,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Snippets func tests @@ -287,11 +287,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Fetch setup_python action diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index 0520336ccd20c0..5ea7e0638546a4 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -189,15 +189,15 @@ jobs: cp -R ${ONNX_RUNTIME_UTILS} ${INSTALL_DIR} pushd ${INSTALL_DIR} - tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * popd pushd ${DEVELOPER_PACKAGE_DIR} - tar -czvf ${BUILD_DIR}/openvino_developer_package.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz * popd pushd ${INSTALL_TEST_DIR} - tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * popd - name: Build Debian packages diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 1ad48695eb5bcb..a0dcd52076cec0 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -55,11 +55,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Fetch setup_python action diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 4e5b7aa342808d..e3b18a6984117b 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -64,10 +64,10 @@ jobs: if: ${{ runner.os != 'Windows' }} run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Extract OpenVINO packages (Windows) diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index a4831dd2193f7a..147afcccddfe17 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -59,10 +59,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Install dependencies (Linux) diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 74dc28078e300f..8f9292d35fb803 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -64,11 +64,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - name: Fetch setup_python action diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 69fc100890a9f5..ffc4da8ef87b54 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -62,11 +62,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} popd # Issue 148922 diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index b7da6d827d542d..3a7bc4d53ef3b3 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -60,7 +60,7 @@ jobs: - name: Extract OpenVINO package run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd - name: Clone ONNX Runtime diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 26ee7933d5f3b3..e6ba39fdb3bfe3 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -66,10 +66,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Fetch setup_python action diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 88b41f983f7094..2359bb2d2aea0f 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -7,10 +7,6 @@ on: description: 'Machine on which the tests would run' type: string required: true - shell: - description: "shell to override the default shell settings in the runner's operating system." - type: string - required: true container: description: 'JSON to be converted to the value of the "container" configuration for the job' type: string @@ -20,12 +16,15 @@ on: description: 'Components that are affected by changes in the commit defined by the Smart CI Action' type: string required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true permissions: read-all env: PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' jobs: PyTorch_Layer_Tests: @@ -35,7 +34,7 @@ jobs: container: ${{ fromJSON(inputs.container) }} defaults: run: - shell: ${{ inputs.shell }} + shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }} env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input OPENVINO_REPO: ${{ github.workspace }}/openvino @@ -55,12 +54,6 @@ jobs: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} - - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: openvino_tokenizers_wheel - path: ${{ env.INSTALL_DIR }} - # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables if: runner.os != 'Windows' @@ -74,10 +67,10 @@ jobs: if: runner.os != 'Windows' run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Extract OpenVINO packages (Windows) @@ -98,10 +91,10 @@ jobs: sparse-checkout-cone-mode: false path: 'openvino' - - name: Setup Python ${{ env.PYTHON_VERSION }} + - name: Setup Python ${{ inputs.python-version }} uses: ./openvino/.github/actions/setup_python with: - version: ${{ env.PYTHON_VERSION }} + version: ${{ inputs.python-version }} pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} should-setup-pip-paths: ${{ runner.os == 'Linux' }} self-hosted-runner: ${{ runner.os == 'Linux' }} @@ -112,9 +105,6 @@ jobs: # Install the core OV wheel python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl - # Install the core OV Tokenizers wheel - python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*.whl - - name: Install OpenVINO Python wheels (Windows) if: runner.os == 'Windows' run: | @@ -122,10 +112,6 @@ jobs: $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" - # Find and install the core OV Tokenizers wheel - $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }} -Filter openvino_tokenizers-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - - name: Install Pytorch Layer tests dependencies run: | # pytorch test requirements @@ -133,22 +119,25 @@ jobs: - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -n logical -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + # due to CVS-152795, parallel run is not possible on Windows + run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 + PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}} - name: PyTorch torch.export Layer Tests - if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287 + if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -n logical -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 PYTORCH_TRACING_MODE: EXPORT + PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}} - name: PyTorch torch.compile TORCHFX Layer Tests - if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' }} # Ticket: 126287 + if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml env: diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 6164bdb29c7d6f..bcbcad872b42e1 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -77,11 +77,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - name: Fetch setup_python action diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 0ada0e1557d965..cc314ee93ee876 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -55,10 +55,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Install OpenVINO dependencies (mac) diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 0801010b86bde3..cc9e2781923c33 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -7,10 +7,6 @@ on: description: 'Machine on which the tests would run' type: string required: true - shell: - description: "shell to override the default shell settings in the runner's operating system." - type: string - required: true container: description: 'JSON to be converted to the value of the "container" configuration for the job' type: string @@ -20,12 +16,15 @@ on: description: 'Components that are affected by changes in the commit defined by the Smart CI Action' type: string required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true permissions: read-all env: PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' jobs: TensorFlow_Layer_Tests: @@ -35,7 +34,7 @@ jobs: container: ${{ fromJSON(inputs.container) }} defaults: run: - shell: ${{ inputs.shell }} + shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }} env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input OPENVINO_REPO: ${{ github.workspace }}/openvino @@ -74,10 +73,10 @@ jobs: if: runner.os != 'Windows' run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Extract OpenVINO packages (Windows) @@ -98,10 +97,10 @@ jobs: sparse-checkout-cone-mode: false path: 'openvino' - - name: Setup Python ${{ env.PYTHON_VERSION }} + - name: Setup Python ${{ inputs.python-version }} uses: ./openvino/.github/actions/setup_python with: - version: ${{ env.PYTHON_VERSION }} + version: ${{ inputs.python-version }} pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} should-setup-pip-paths: ${{ runner.os == 'Linux' }} self-hosted-runner: ${{ runner.os == 'Linux' }} diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index e9f6f130e0c24e..b2cdf5a6336db0 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -69,11 +69,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - name: Fetch setup_python action diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index fdd0a76088c725..c01c2740201384 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -83,7 +83,7 @@ jobs: if: runner.os != 'Windows' run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd - name: Extract OpenVINO packages (Windows) diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 3506ca49846f45..e4e608f3aca6d4 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -173,19 +173,19 @@ jobs: uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'aks-linux-16-cores-arm' - shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' Pytorch_Layer_Tests: name: Pytorch Layer Tests - needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ] + needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: runner: 'aks-linux-16-cores-arm' - shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' CPU_Functional_Tests: name: CPU functional tests diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index f8abe78439dbf6..50f3d2451071a2 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -200,17 +200,17 @@ jobs: - name: Pack Artifacts run: | pushd ${SELECTIVE_BUILD_STAT_DIR} - tar -czvf ${BUILD_DIR}/openvino_selective_build_stat.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_selective_build_stat.tar.gz * popd pushd ${INSTALL_DIR} - tar -czvf ${BUILD_DIR}/openvino_package.tar.gz \ + tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz \ install_dependencies/install_openvino_dependencies.sh popd cp -v ${OPENVINO_REPO}/temp/tbb/lib/lib* ${INSTALL_TEST_DIR}/tests pushd ${INSTALL_TEST_DIR} - tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz \ + tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz \ tests/ov_cpu_func_tests \ tests/libopenvino_template_extension.so \ tests/libze_loader.so* \ @@ -302,7 +302,7 @@ jobs: path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} - name: Extract selective build statistics package - run: tar -xvzf ${SELECTIVE_BUILD_STAT_DIR}/openvino_selective_build_stat.tar.gz -C ${SELECTIVE_BUILD_STAT_DIR} + run: tar -I pigz -xvf ${SELECTIVE_BUILD_STAT_DIR}/openvino_selective_build_stat.tar.gz -C ${SELECTIVE_BUILD_STAT_DIR} # # Build diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index a56bbe54f82b53..5ee3a7ea3dde0b 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -175,11 +175,11 @@ jobs: - name: Pack Artifacts run: | pushd ${INSTALL_DIR} - tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * popd pushd ${INSTALL_TEST_DIR} - tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * + tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * popd # @@ -257,10 +257,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR popd pushd $INSTALL_TEST_DIR - tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR popd - name: Install dependencies (Linux) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index da3224fa483ad1..e7ed552bed70f5 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -172,11 +172,11 @@ jobs: - name: Pack Artifacts run: | pushd ${{ env.INSTALL_DIR }} - tar -czvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * + tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * popd pushd ${{ env.INSTALL_TEST_DIR }} - tar -czvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * + tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * popd - name: Cmake & Build - OpenVINO Contrib @@ -276,17 +276,17 @@ jobs: uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'macos-13' - shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' Pytorch_Layer_Tests: name: Pytorch Layer Tests - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Build, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: runner: 'macos-13' - shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' CPU_Functional_Tests: name: CPU functional tests diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 331afc7266cd6a..f2175a0f2926c0 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -172,11 +172,11 @@ jobs: - name: Pack Artifacts run: | pushd ${{ env.INSTALL_DIR }} - tar -czvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * + tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * popd pushd ${{ env.INSTALL_TEST_DIR }} - tar -czvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * + tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * popd - name: Cmake & Build - OpenVINO Contrib @@ -275,17 +275,17 @@ jobs: uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'macos-13-xlarge' - shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' Pytorch_Layer_Tests: name: Pytorch Layer Tests - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Build, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: runner: 'macos-13-xlarge' - shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' CPU_Functional_Tests: name: CPU functional tests diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 8f461391f20a9f..84a93ee93b6164 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -176,10 +176,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - name: Fetch setup_python action @@ -222,7 +222,7 @@ jobs: if: ${{ always() }} run: | pushd ${CONFORMANCE_ARTIFACTS_DIR} - tar -czvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * + tar -I pigz -cvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * popd - name: Upload Conformance Artifacts @@ -248,7 +248,7 @@ jobs: if: ${{ matrix.TEST_TYPE == 'API' }} run: | pushd ${CONFORMANCE_ARTIFACTS_DIR} - tar -czvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * + tar -I pigz -cvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * popd - name: Upload Conformance Artifacts @@ -305,19 +305,19 @@ jobs: uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'aks-linux-4-cores-16gb' - shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' Pytorch_Layer_Tests: name: Pytorch Layer Tests - needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: runner: 'aks-linux-4-cores-16gb' - shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' CPU_Functional_Tests: name: CPU functional tests @@ -449,11 +449,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} popd pushd ${INSTALL_DIR} - tar -xzf openvino_developer_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_developer_package.tar.gz -C ${INSTALL_DIR} popd - name: Clone OpenVINO Contrib diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index 6409b417a0731b..295a4dd0e2c61a 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -133,6 +133,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + Pytorch_Layer_Tests: + name: Pytorch Layer Tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_pytorch_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + Overall_Status: name: ci/gha_overall_status_ubuntu_24 needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests] diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 39cf2161525513..122fcc3c1c5021 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -404,17 +404,17 @@ jobs: uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'aks-win-8-cores-16gb' - shell: pwsh affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' Pytorch_Layer_Tests: name: Pytorch Layer Tests - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Build, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: runner: 'aks-win-8-cores-16gb' - shell: pwsh affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' CXX_Unit_Tests: name: C++ unit tests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9557603de5f06..7169ebc2ba2c9b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -52,7 +52,7 @@ product better. Since the market of computing devices is constantly evolving, OpenVINO is always open to extending its support for new hardware. If you want to run inference on a device that is currently not supported, you can see how to develop a new plugin for it in the - [Plugin Developer Guide](https://docs.openvino.ai/canonical/openvino_docs_ie_plugin_dg_overview.html). + [Plugin Developer Guide](https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library.html). ### Improve documentation diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 128124f5862651..4bd0b5d32c0f0e 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -305,12 +305,12 @@ Known Issues | ID: n/a | Description: | For ARM binaries, the `1.16 ONNX library `__ - is not yet available. -| The ONNX library for ARM, version 1.15, does not include the latest - functional and security updates. OpenVINO, version 2024.5 is targeted to be released in - Q4 2024 and will include additional functional and security updates. Users should update to - the latest version as it becomes available. -| The current vulnerabilities are: + is not yet available. The ONNX library for ARM, version 1.15, does not include the latest + functional and security updates. Users should update to the latest version as it becomes + available. +| Currently, if an unverified AI model is supplied to the ONNX frontend, it could lead to a + directory traversal issue. Ensure that the file name and file path that a model contains + are verified and correct. To learn more about the vulnerability, see: `CVE-2024-27318 `__ and `CVE-2024-27319 `__. diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst index b9f2664b050282..c079f167761ada 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst @@ -15,19 +15,7 @@ Install Intel® Distribution of OpenVINO™ Toolkit from PyPI Repository * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) * macOS offers support only for CPU inference - -| **Simplified Build and Integration** -| The package includes CMake configurations, precompiled static libraries, and headers, which - can be easily accessed through the Python API. You can use the `get_cmake_path()` method to - retrieve the paths to the CMake configurations and libraries: - -.. code-block:: python - from openvino import get_cmake_path - cmake_path = get_cmake_path() - -For detailed instructions on how to use these configurations in your build setup, check out the -:ref:`Create a library with extensions ` section. .. tab-set:: @@ -42,10 +30,13 @@ For detailed instructions on how to use these configurations in your build setup .. tab-item:: Processor Notes :sync: processor-notes - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: + | To see if your processor includes the integrated graphics technology and supports iGPU + inference, refer to: | `Product Specifications `__ + + Installing OpenVINO Runtime ########################### @@ -137,20 +128,47 @@ to see if your case needs any of them. + + +| **Simplified Build and Integration** +| The package includes CMake configurations, precompiled static libraries, and headers, which + can be easily accessed through the Python API. You can use the `get_cmake_path()` method to + retrieve the paths to the CMake configurations and libraries: + +.. code-block:: python + + from openvino import get_cmake_path + cmake_path = get_cmake_path() + +For detailed instructions on how to use these configurations in your build setup, check out the +:ref:`Create a library with extensions ` section. + + + + + + + What's Next? #################### -Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. +Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning +applications! Learn more about how to integrate a model in OpenVINO applications by trying out +the following tutorials. .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 -Try the `Python Quick Start Example `__ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. +Try the `Python Quick Start Example `__ +to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside +your web browser. + Get started with Python +++++++++++++++++++++++ -Visit the :doc:`Tutorials <../../../learn-openvino/interactive-tutorials-python>` page for more Jupyter Notebooks to get you started with OpenVINO, such as: +Visit the :doc:`Tutorials <../../../learn-openvino/interactive-tutorials-python>` page for more +Jupyter Notebooks to get you started with OpenVINO, such as: * `OpenVINO Python API Tutorial `__ * `Basic image classification program with Hello Image Classification `__ diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 70670e40a91790..c6965d054f0991 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20240923220849/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241007220823/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index dd85261aab6e8d..cdb1e4ff5083cd 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -95,6 +95,94 @@ Lab instead.** %pip install pythreejs "openvino-dev>=2024.0.0" "opencv-python" "torch" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + +.. parsed-literal:: + + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Collecting pythreejs + Using cached pythreejs-2.4.2-py3-none-any.whl.metadata (5.4 kB) + Collecting openvino-dev>=2024.0.0 + Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl.metadata (16 kB) + Collecting opencv-python + Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB) + Collecting torch + Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) + Collecting onnx<1.16.2 + Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Collecting ipydatawidgets>=1.1.1 (from pythreejs) + Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) + Collecting numpy (from pythreejs) + Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) + Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) + Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) + Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) + Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) + Collecting openvino==2024.4.0 (from openvino-dev>=2024.0.0) + Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) + Collecting filelock (from torch) + Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Collecting sympy (from torch) + Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Collecting fsspec (from torch) + Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB) + Collecting protobuf>=3.20.2 (from onnx<1.16.2) + Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) + Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) + Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) + Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) + Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl (4.7 MB) + Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) + Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) + Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB) + Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) + Using cached networkx-3.1-py3-none-any.whl (2.1 MB) + Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) + Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) + Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB) + Using cached filelock-3.16.1-py3-none-any.whl (16 kB) + Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB) + Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) + Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) + Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs + Successfully installed filelock-3.16.1 fsspec-2024.9.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 + Note: you may need to restart the kernel to use updated packages. + + Imports ------- @@ -165,6 +253,18 @@ directory structure and downloads the selected model. download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" ! $download_command + +.. parsed-literal:: + + ################|| Downloading human-pose-estimation-3d-0001 ||################ + + ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz + + + ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz + + + Convert Model to OpenVINO IR format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -183,6 +283,28 @@ IR format. ) ! $convert_command + +.. parsed-literal:: + + ========== Converting human-pose-estimation-3d-0001 to ONNX + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + model.load_state_dict(torch.load(weights, map_location='cpu')) + ONNX check passed successfully. + + ========== Converting human-pose-estimation-3d-0001 to IR (FP32) + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False + + [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. + In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. + Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + [ SUCCESS ] Generated IR version 11 model. + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin + + + Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -196,6 +318,15 @@ select device from dropdown list for running inference using OpenVINO device + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + Load the model ~~~~~~~~~~~~~~ @@ -231,6 +362,15 @@ heat maps, PAF (part affinity fields) and features. input_layer.any_name, [o.any_name for o in output_layers] + + + +.. parsed-literal:: + + ('data', ['features', 'heatmaps', 'pafs']) + + + Processing ---------- diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index 9c441fdf0d11c1..ce27c3006ac36a 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -227,7 +227,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_60730/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_59833/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -321,7 +321,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_60730/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_59833/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png index 76fe35e4bad0db..2bb95f87f90515 100644 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3abf9ccb9dfa3a1f55db22dbb1bbdd3ea307c9495ae9b756acbc071a036b86b5 -size 68096 +oid sha256:d9ed14955798d0fbcdc284da02f19a12ae92c89fe6c1f4760951a414f4047b66 +size 68016 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index a1d92044cc9bf5..62e4b205f45f75 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -68,6 +68,7 @@ notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb notebooks/magika-content-type-recognition/magika-content-type-recognition.ipynb notebooks/meter-reader/meter-reader.ipynb notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb +notebooks/mllama-3.2/mllama-3.2.ipynb notebooks/mms-massively-multilingual-speech/mms-massively-multilingual-speech.ipynb notebooks/mobileclip-video-search/mobileclip-video-search.ipynb notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb @@ -93,6 +94,7 @@ notebooks/phi-3-vision/phi-3-vision.ipynb notebooks/photo-maker/photo-maker.ipynb notebooks/pix2struct-docvqa/pix2struct-docvqa.ipynb notebooks/pixart/pixart.ipynb +notebooks/pixtral/pixtral.ipynb notebooks/pose-estimation-webcam/pose-estimation.ipynb notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb @@ -107,6 +109,8 @@ notebooks/qwen2-vl/qwen2-vl.ipynb notebooks/riffusion-text-to-music/riffusion-text-to-music.ipynb notebooks/rmbg-background-removal/rmbg-background-removal.ipynb notebooks/s3d-mil-nce-text-to-video-retrieval/s3d-mil-nce-text-to-video-retrieval.ipynb +notebooks/sam2-image-segmentation/segment-anything-2-image.ipynb +notebooks/sam2-video-segmentation/segment-anything-2-video.ipynb notebooks/sdxl-turbo/sdxl-turbo.ipynb notebooks/segment-anything/segment-anything.ipynb notebooks/siglip-zero-shot-image-classification/siglip-zero-shot-image-classification.ipynb @@ -149,9 +153,14 @@ notebooks/typo-detector/typo-detector.ipynb notebooks/vehicle-detection-and-recognition/vehicle-detection-and-recognition.ipynb notebooks/vision-background-removal/vision-background-removal.ipynb notebooks/vision-monodepth/vision-monodepth.ipynb +notebooks/wav2lip/wav2lip.ipynb +notebooks/whisper-asr-genai/whisper-asr-genai.ipynb notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb notebooks/wuerstchen-image-generation/wuerstchen-image-generation.ipynb notebooks/yolov10-optimization/yolov10-optimization.ipynb +notebooks/yolov11-optimization/yolov11-instance-segmentation.ipynb +notebooks/yolov11-optimization/yolov11-keypoint-detection.ipynb +notebooks/yolov11-optimization/yolov11-object-detection.ipynb notebooks/yolov8-optimization/yolov8-instance-segmentation.ipynb notebooks/yolov8-optimization/yolov8-keypoint-detection.ipynb notebooks/yolov8-optimization/yolov8-obb.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index 7ea11af6e2eee3..880bf539025c5e 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -226,23 +226,23 @@ Convert the Text Encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:797: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:825: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:828: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:924: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1415: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -351,13 +351,13 @@ suitable. This function repeats part of ``AmusedPipeline``. .. parsed-literal:: - /tmp/ipykernel_61545/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_60662/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -490,7 +490,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -707,7 +707,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -760,17 +760,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -794,7 +794,7 @@ Demo generation with quantized pipeline .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -878,7 +878,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -890,7 +890,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() @@ -907,8 +907,8 @@ a rough estimate of generation quality. .. parsed-literal:: - Quantized pipeline Inception Score: 11.0730562210083 - Quantization speed-up: 2.06x + Quantized pipeline Inception Score: 11.073053359985352 + Quantization speed-up: 2.07x Interactive inference diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index 1c5e5bd322fead..15459596dea5bf 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -36,7 +36,9 @@ repo `__ and .. warning:: - This tutorial requires at least **96 GB **of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + + This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + **Table of contents:** @@ -162,11 +164,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( @@ -281,7 +283,7 @@ Download weights .. parsed-literal:: - diffusion_pytorch_model.safetensors: 0%| | 0.00/335M [00:00 - + Your browser does not support the video tag. diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 74704ad6cc7fea..135554acc38de4 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -355,7 +355,7 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 62.96 fps + average throuput in sync mode: 63.95 fps Async Mode @@ -494,7 +494,7 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 105.40 fps + average throuput in async mode: 108.46 fps Compare the performance @@ -637,5 +637,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 140.92 fps + average throughput in async mode with async infer queue: 144.01 fps diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index 66cfd441941135..f80b64476e19ea 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ae89d325549471fb262f2c192fc3fc094fe0449df44306d0e4e5c8dfd349ccb -size 29439 +oid sha256:b164e09df4e90dc87d63caf35cc832021fbd147354a5300605164fce212e36b8 +size 29453 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index b19ab69ca4cd65..5f7d8dfc61502f 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -200,16 +200,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [23:33:07.3079]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [23:33:07.3079]I[schedule.cpp:17][AUTO] scheduler starting - [23:33:07.3080]I[auto_schedule.cpp:181][AUTO] select device:CPU - [23:33:07.4176]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 109.630635 ms - [23:33:07.4178]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [23:33:07.4178]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [23:32:04.6480]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [23:32:04.6481]I[schedule.cpp:17][AUTO] scheduler starting + [23:32:04.6481]I[auto_schedule.cpp:181][AUTO] select device:CPU + [23:32:04.7787]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 130.622171 ms + [23:32:04.7789]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [23:32:04.7790]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -222,8 +222,8 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [23:33:07.4229]I[schedule.cpp:308][AUTO] scheduler ending Deleted compiled_model + [23:32:04.7847]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -556,12 +556,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 184.55fps, latency: 31.14ms, time interval: 10.00s - throughput: 183.88fps, latency: 31.86ms, time interval: 10.01s - throughput: 182.58fps, latency: 32.11ms, time interval: 10.00s - throughput: 183.39fps, latency: 31.91ms, time interval: 10.00s - throughput: 183.80fps, latency: 31.85ms, time interval: 10.01s - throughput: 183.74fps, latency: 31.86ms, time interval: 10.00s + throughput: 183.87fps, latency: 31.26ms, time interval: 10.01s + throughput: 184.60fps, latency: 31.70ms, time interval: 10.00s + throughput: 183.24fps, latency: 31.93ms, time interval: 10.01s + throughput: 184.05fps, latency: 31.75ms, time interval: 10.00s + throughput: 184.40fps, latency: 31.77ms, time interval: 10.00s + throughput: 178.41fps, latency: 32.83ms, time interval: 10.02s Done @@ -607,12 +607,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 139.69fps, latency: 6.62ms, time interval: 10.00s - throughput: 141.89fps, latency: 6.61ms, time interval: 10.00s - throughput: 142.44fps, latency: 6.64ms, time interval: 10.00s - throughput: 142.12fps, latency: 6.61ms, time interval: 10.01s - throughput: 142.13fps, latency: 6.60ms, time interval: 10.00s - throughput: 141.86fps, latency: 6.66ms, time interval: 10.00s + throughput: 140.52fps, latency: 6.62ms, time interval: 10.01s + throughput: 142.84fps, latency: 6.60ms, time interval: 10.00s + throughput: 142.14fps, latency: 6.60ms, time interval: 10.00s + throughput: 142.63fps, latency: 6.60ms, time interval: 10.00s + throughput: 143.11fps, latency: 6.61ms, time interval: 10.01s + throughput: 132.99fps, latency: 7.13ms, time interval: 10.01s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index aca6ede1f773a1..5ef6531526d989 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8441bdf4d1afa3e31512c0f101778671ce6ad70445bebca05b730dd10219208f -size 26638 +oid sha256:4cf9ec5f2d8e34510d31d10190c4d7f269bb83a2800891ff865e09ee85e80d95 +size 27103 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index f9d1b83be7e6de..edc7de70aeb565 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7d5449cf917e635df8929f98f63f423e264eb608bd858c58097793690227522 -size 40041 +oid sha256:ed265008638a6c38fbfc8397e6bb20d0e1fc900df893f30b7238c518d2380b2b +size 40093 diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 5a2e54c69394a1..2e5a45826eaceb 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -736,7 +736,6 @@ up of the dynamic quantized models. Interactive demo ---------------- - Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index fd5f10a68ead6c..e5a66d4f74ae17 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -46,7 +46,7 @@ Guide `__. .. parsed-literal:: - /tmp/ipykernel_75492/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_72009/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -452,7 +452,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -534,18 +534,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 246600 / 262144 (94.1%) - Greatest absolute difference: 3.6983602046966553 at index (0, 0, 464, 188) (up to 1e-05 allowed) - Greatest relative difference: 29992.842219662372 at index (0, 0, 248, 255) (up to 1e-05 allowed) + Mismatched elements: 249913 / 262144 (95.3%) + Greatest absolute difference: 4.628173828125 at index (0, 0, 430, 337) (up to 1e-05 allowed) + Greatest relative difference: 31968.152067381572 at index (0, 0, 102, 269) (up to 1e-05 allowed) _check_trace( @@ -671,7 +671,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.94 ms + [ INFO ] Read model took 8.78 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -685,7 +685,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 250.27 ms + [ INFO ] Compile model took 236.15 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -722,17 +722,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 50.04 ms + [ INFO ] First inference took 48.84 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 429 iterations - [ INFO ] Duration: 15001.16 ms + [ INFO ] Duration: 15015.79 ms [ INFO ] Latency: - [ INFO ] Median: 34.68 ms - [ INFO ] Average: 34.74 ms - [ INFO ] Min: 34.33 ms - [ INFO ] Max: 37.05 ms - [ INFO ] Throughput: 28.60 FPS + [ INFO ] Median: 34.71 ms + [ INFO ] Average: 34.77 ms + [ INFO ] Min: 34.38 ms + [ INFO ] Max: 37.16 ms + [ INFO ] Throughput: 28.57 FPS .. code:: ipython3 @@ -758,7 +758,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.95 ms + [ INFO ] Read model took 10.92 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -772,7 +772,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 246.69 ms + [ INFO ] Compile model took 239.20 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -809,17 +809,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 27.16 ms + [ INFO ] First inference took 29.32 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 950 iterations - [ INFO ] Duration: 15005.10 ms + [ INFO ] Count: 883 iterations + [ INFO ] Duration: 15004.05 ms [ INFO ] Latency: - [ INFO ] Median: 15.55 ms - [ INFO ] Average: 15.59 ms - [ INFO ] Min: 15.30 ms - [ INFO ] Max: 17.50 ms - [ INFO ] Throughput: 63.31 FPS + [ INFO ] Median: 15.57 ms + [ INFO ] Average: 16.79 ms + [ INFO ] Min: 15.15 ms + [ INFO ] Max: 22.01 ms + [ INFO ] Throughput: 58.85 FPS Visually Compare Inference Results @@ -913,7 +913,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1727128557 + Visualizing results with seed 1728337035 @@ -997,7 +997,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.31 seconds, fps:29.93 + Total time for 68 frames: 2.32 seconds, fps:29.73 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index 479fbbc13510c0..03b5eb1e3fd9f0 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de1590c308306d9f19115503c44cbed2ae2b9b10e2ca29e02620b00f2d16fec3 -size 386773 +oid sha256:1aac55db34be1df744fd19868762a8b4572a8e19683af72a57d7176b1486af0c +size 380239 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index e556a6c2a3ef4b..ccd6216d26268d 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -110,7 +110,7 @@ Prerequisites remote: Counting objects: 100% (76/76), done. remote: Compressing objects: 100% (42/42), done. remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 21.85 MiB/s, done. + Receiving objects: 100% (233/233), 13.34 MiB | 17.27 MiB/s, done. Resolving deltas: 100% (80/80), done. @@ -406,10 +406,10 @@ Perform model quantization .. parsed-literal:: - 2024-09-23 23:58:13.958747: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-23 23:58:13.997019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-07 23:39:33.824396: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-07 23:39:33.863560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-23 23:58:14.401686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-07 23:39:34.271973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -517,7 +517,7 @@ Tool + @@ -306,13 +306,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -404,7 +404,7 @@ range. .. parsed-literal:: - + @@ -626,7 +626,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.27 seconds. Total FPS (including video processing): 4.52.Inference FPS: 10.67 + Processed 60 frames in 14.01 seconds. Total FPS (including video processing): 4.28.Inference FPS: 9.46 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -653,7 +653,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -786,10 +786,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-09-24 00:08:42.576231: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 00:08:42.609829: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-07 23:47:57.736195: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-07 23:47:57.768920: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 00:08:43.185945: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-07 23:47:58.341833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -915,10 +915,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.69 seconds. Total FPS (including video processing): 4.73.Inference FPS: 13.15 + Processed 60 frames in 12.89 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.78 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -998,9 +998,9 @@ Tool =2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.44.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.19.1) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.0) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -130,13 +130,6 @@ understand the context of a sentence. Here, we will use tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 - warnings.warn( - - Convert Model to OpenVINO Intermediate Representation format ------------------------------------------------------------ @@ -173,9 +166,9 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -281,7 +274,7 @@ For a single input sentence .. parsed-literal:: Label: POSITIVE - Total Time: 0.03 seconds + Total Time: 0.02 seconds Read from a text file diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 9f6857b608d962..01d4b8fed8bb57 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -214,10 +214,10 @@ you can add ``--sym``. For INT4 quantization you can also specify the following arguments : - The ``--group-size`` parameter will define the group size to use for - quantization, -1 it will results in per-column quantization. +quantization, -1 it will results in per-column quantization. - The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit - quantization. If set to 0.9, it means that 90% of the layers will be - quantized to int4 while 10% will be quantized to int8. +quantization. If set to 0.9, it means that 90% of the layers will be +quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 08850d5989f830..194282459f56cb 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -151,6 +151,13 @@ Prerequisites %pip install -q "openvino>=2024.2.0" "nncf>=2.11.0" "datasets>=2.20.0" %pip install -q "gradio>=4.19" omegaconf einops pytorch_lightning kornia "open_clip_torch==2.22.0" transformers av opencv-python "torch==2.2.2" --extra-index-url https://download.pytorch.org/whl/cpu + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + .. code:: ipython3 import sys @@ -174,6 +181,28 @@ Prerequisites ) open("notebook_utils.py", "w").write(r.text) + +.. parsed-literal:: + + Cloning into 'dynamicrafter'... + remote: Enumerating objects: 335, done. + remote: Counting objects: 100% (153/153), done. + remote: Compressing objects: 100% (99/99), done. + remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) + Receiving objects: 100% (335/335), 72.41 MiB | 19.06 MiB/s, done. + Resolving deltas: 100% (123/123), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images + + + + +.. parsed-literal:: + + 24692 + + + Load and run the original pipeline ---------------------------------- @@ -251,11 +280,24 @@ We will use model for 256x256 resolution as example. Also, models for model = download_model() +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. + warnings.warn( + + + +.. parsed-literal:: + + model.ckpt: 0%| | 0.00/10.4G [00:00>> model checkpoint loaded. - + Convert the model to OpenVINO IR -------------------------------- @@ -354,6 +396,49 @@ resolutions. del model.embedder gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if input.numel() == 0: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if size == input_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + antialias = antialias and (max(factors) > 1) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if antialias: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + sigma = tensor([sigma], device=input.device, dtype=input.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + sigma = tensor([sigma], device=input.device, dtype=input.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if x_shape_to_check[i] != dim: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if len(mean.shape) == 0 or mean.shape[0] == 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if len(std.shape) == 0 or std.shape[0] == 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if mean.shape and mean.shape[0] != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if std.shape and std.shape[0] != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + std = torch.as_tensor(std, device=data.device, dtype=data.dtype) + + Convert AE encoder ~~~~~~~~~~~~~~~~~~ @@ -376,6 +461,13 @@ Convert AE encoder del model.first_stage_model.encoder gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + w_ = w_ * (int(c)**(-0.5)) + + Convert Diffusion U-Net model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -413,6 +505,21 @@ Convert Diffusion U-Net model del model.model.diffusion_model gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if l_context == 77 + t*16: ## !!! HARD CODE here + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if batch_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if self.use_temporal_conv and batch_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert x.shape[1] == self.channels + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert x.shape[1] == self.channels + + Convert AE decoder ~~~~~~~~~~~~~~~~~~ @@ -796,15 +903,15 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_971108/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_79693/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-08-06 13:54:24 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 164.28 seconds - + start: man fishing in a boat at sunset 2024-10-08 00:11:25 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.23 seconds + .. code:: ipython3 @@ -827,7 +934,7 @@ Run OpenVINO pipeline inference - + @@ -952,7 +1059,7 @@ To collect intermediate model inputs for calibration we should customize modified_model = CompiledModelDecorator(original_diffusion_model, keep_prob=1) model.model.diffusion_model = CModelWrapper(modified_model, model.model.diffusion_model.out_channels) - dataset = datasets.load_dataset("jovianzm/Pexels-400k", split="train", streaming=True).shuffle(seed=42).take(subset_size) + dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True, split="train", streaming=True).shuffle(seed=42).take(subset_size) pbar = tqdm(total=subset_size) channels = model.model.diffusion_model.out_channels @@ -960,8 +1067,8 @@ To collect intermediate model inputs for calibration we should customize h, w = 256 // 8, 256 // 8 noise_shape = [1, channels, frames, h, w] for batch in dataset: - prompt = batch["title"] - image_path = batch["thumbnail"] + prompt = batch["caption"] + image_path = batch["image_url"] image = download_image(image_path) if image is None: continue @@ -999,6 +1106,19 @@ To collect intermediate model inputs for calibration we should customize 0%| | 0/300 [00:00>> model checkpoint loaded. - + .. code:: ipython3 @@ -1316,13 +1343,13 @@ Let’s run the optimized pipeline .. parsed-literal:: Seed set to 234 - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-08-06 15:09:26 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 81.47 seconds - + start: man fishing in a boat at sunset 2024-10-08 01:40:46 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.42 seconds + .. code:: ipython3 @@ -1344,7 +1371,7 @@ Let’s run the optimized pipeline - + Compare model file sizes @@ -1372,7 +1399,7 @@ Compare model file sizes encoder_first_stage_ir compression rate: 3.986 embedder_ir compression rate: 3.977 model_ir compression rate: 3.981 - + Compare inference time of the FP32 and INT8 models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1426,10 +1453,10 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 162.304 - INT8 latency: 79.590 - Performance speed up: 2.039 - + FP32 latency: 193.245 + INT8 latency: 97.168 + Performance speed up: 1.989 + Interactive inference --------------------- @@ -1453,6 +1480,15 @@ to launch the interactive demo. use_quantized_models + + + +.. parsed-literal:: + + Checkbox(value=True, description='Use quantized models') + + + .. code:: ipython3 from functools import partial @@ -1471,9 +1507,23 @@ to launch the interactive demo. demo = make_demo(fn=get_image_fn) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(debug=True, share=True) + demo.queue().launch(debug=False, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index 40c7638dccf39f..e9c5d2f07afec7 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -116,9 +116,9 @@ Prerequisites remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 23.40 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 22.33 MiB/s, done. Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 @@ -385,23 +385,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -648,10 +648,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-09-24 00:41:36.417744: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 00:41:36.450605: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 01:57:55.723142: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 01:57:55.754489: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 00:41:37.081764: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 01:57:56.401127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -818,7 +818,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.72 ms + [ INFO ] Read model took 29.82 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -838,7 +838,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1391.34 ms + [ INFO ] Compile model took 1394.30 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -879,17 +879,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 822.04 ms + [ INFO ] First inference took 815.67 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 47 iterations - [ INFO ] Duration: 16545.03 ms + [ INFO ] Count: 54 iterations + [ INFO ] Duration: 16885.27 ms [ INFO ] Latency: - [ INFO ] Median: 2017.02 ms - [ INFO ] Average: 2049.16 ms - [ INFO ] Min: 1110.81 ms - [ INFO ] Max: 2739.11 ms - [ INFO ] Throughput: 2.84 FPS + [ INFO ] Median: 1856.65 ms + [ INFO ] Average: 1850.85 ms + [ INFO ] Min: 1459.90 ms + [ INFO ] Max: 2009.04 ms + [ INFO ] Throughput: 3.20 FPS .. code:: ipython3 @@ -915,7 +915,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 43.63 ms + [ INFO ] Read model took 44.15 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -935,7 +935,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1629.94 ms + [ INFO ] Compile model took 1618.00 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -976,17 +976,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 606.10 ms + [ INFO ] First inference took 587.18 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 55 iterations - [ INFO ] Duration: 15905.50 ms + [ INFO ] Count: 58 iterations + [ INFO ] Duration: 16436.53 ms [ INFO ] Latency: - [ INFO ] Median: 1706.55 ms - [ INFO ] Average: 1699.17 ms - [ INFO ] Min: 613.31 ms - [ INFO ] Max: 1844.61 ms - [ INFO ] Throughput: 3.46 FPS + [ INFO ] Median: 1670.45 ms + [ INFO ] Average: 1680.37 ms + [ INFO ] Min: 1321.28 ms + [ INFO ] Max: 2532.97 ms + [ INFO ] Throughput: 3.53 FPS Interactive segmentation demo @@ -1316,7 +1316,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index b8366baf628648..8854bf68943a42 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5bbafa04ac9b94411aed97da4ab9979a1b94c71012b68f0597d0c3393ce608e2 -size 1260003 +oid sha256:a0ed908091274e314601740b2be6f08a06b74532f09d98e99703a91f1155ccd4 +size 1260810 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index a007fbc9ab007a..45da467e43595b 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1e13ae84be0a3bb187750f1716fc193299896abf2c19bdaf76b9f994fc7724c -size 1263032 +oid sha256:444eaadeac9bde960bc08fc6eed2ca8c5d5de854782d9ea322535ec1e35a38b0 +size 1261402 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index dcb52e2d6a049f..91cbceb5a9bc44 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b77b2e111af333e6aad3e957dffdb68a65ce8f9277d2f29479b821fc644b0697 -size 1260790 +oid sha256:90b937884a9df8f888beb020e5a56dba5ee941f780ac61dd0fda8502909038d2 +size 1261809 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 12381c64712f11..5036c6f32a2259 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 56b76d5a85269e..65bd0c194a8116 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 67.2MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 67.6MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 642.3ms - Speed: 3.8ms preprocess, 642.3ms inference, 754.8ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 612.7ms + Speed: 3.0ms preprocess, 612.7ms inference, 794.5ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -214,10 +214,10 @@ tracing. The FastSAM model itself is based on YOLOv8 model. PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.0s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.0s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 501.4ms - Speed: 6.6ms preprocess, 501.4ms inference, 33.0ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.1ms + Speed: 5.7ms preprocess, 498.1ms inference, 31.2ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -615,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 67 seconds. - Resulting in 1.91 fps + Segmented in 70 seconds. + Resulting in 1.83 fps .. code:: ipython3 @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 22 seconds - Resulting in 5.82 fps - That is 3.05 times faster! + Segmented in 21 seconds + Resulting in 6.1 fps + That is 3.33 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index 7738a227d911a5..a09f2a1ea60399 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -108,10 +108,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-09-24 00:54:26.992764: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 00:54:27.026832: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:10:50.200273: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:10:50.234398: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 00:54:27.545998: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:10:50.883345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -201,31 +201,31 @@ pipeline. .. parsed-literal:: - README.md: 0%| | 0.00/14.8k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index 59810b77c9135f..a1484a3eb4b6d0 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ae7dc54dc314e4f5280d3fddb878c1784e6be727f7910e869e64632e43fd12d -size 259663 +oid sha256:f5808e660c685e4518f0a83232c2b9c9e88263e05ecd05891f769d263df4c642 +size 259656 diff --git a/docs/notebooks/flux.1-image-generation-with-output.rst b/docs/notebooks/flux.1-image-generation-with-output.rst index afefa2838f49f1..09a05c0e73bf8d 100644 --- a/docs/notebooks/flux.1-image-generation-with-output.rst +++ b/docs/notebooks/flux.1-image-generation-with-output.rst @@ -116,10 +116,10 @@ FLUX.1-dev version using widget bellow. .. parsed-literal:: - 2024-09-24 00:55:16.941633: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 00:55:16.976313: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:11:42.908018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:11:42.941481: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 00:55:17.640172: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:11:43.614104: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -193,15 +193,15 @@ and convert each part of pipeline using ``ov.convert_model``. Loading pipeline components...: 0%| | 0/7 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -241,9 +241,9 @@ and convert each part of pipeline using ``ov.convert_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index fa7d4e26fbf5f6..73db954afdfbbb 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -104,7 +104,7 @@ Check if FreeVC is installed and append its path to ``sys.path`` remote: Counting objects: 100% (74/74), done. remote: Compressing objects: 100% (47/47), done. remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 23.82 MiB/s, done. + Receiving objects: 100% (131/131), 15.28 MiB | 17.35 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -134,8 +134,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:28<00:00, 45.0MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:28<00:00, 44.0MB/s] .. code:: ipython3 @@ -239,13 +239,13 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") .. parsed-literal:: - Loaded the voice encoder model on cpu in 0.01 seconds. + Loaded the voice encoder model on cpu in 0.00 seconds. Reading dataset settings @@ -288,7 +288,7 @@ Inference .. parsed-literal:: - 2it [00:03, 1.92s/it] + 2it [00:03, 1.90s/it] Result audio files should be available in ‘outputs/freevc’ @@ -360,13 +360,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +581,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25917 / 25920 (100.0%) - Greatest absolute difference: 1.4735720753669739 at index (0, 0, 19664) (up to 1e-05 allowed) - Greatest relative difference: 79463.82149046793 at index (0, 0, 23530) (up to 1e-05 allowed) + Mismatched elements: 25912 / 25920 (100.0%) + Greatest absolute difference: 1.0370872616767883 at index (0, 0, 18175) (up to 1e-05 allowed) + Greatest relative difference: 8656.24884080371 at index (0, 0, 11526) (up to 1e-05 allowed) _check_trace( @@ -645,7 +645,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:01, 1.31it/s] + 2it [00:01, 1.29it/s] Result audio files should be available in ‘outputs/freevc’ and you can @@ -707,7 +707,7 @@ Result audio: diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst index c65f3d63d22c5a..2b35846b766bd1 100644 --- a/docs/notebooks/gpu-device-with-output.rst +++ b/docs/notebooks/gpu-device-with-output.rst @@ -121,11 +121,8 @@ Install required packages %pip install -q "openvino-dev>=2024.0.0" "opencv-python" "tqdm" %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 - %pip install -q "tensorflow-macos>=2.5,<=2.12.0; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version <= '3.8'" # macOS M1 and M2 %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 - %pip install -q "tensorflow>=2.5,<=2.12.0; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version <= '3.8'" # macOS x86 %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version > '3.8'" - %pip install -q "tensorflow>=2.5,<=2.12.0; sys_platform != 'darwin' and python_version <= '3.8'" Checking GPUs with Query Device ------------------------------- diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index 0667cb9285e26c..ee6741ff7af4e7 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -64,6 +64,7 @@ Clone repositories and install requirements .. parsed-literal:: + WARNING: supervision 0.24.0 does not provide the extra 'desktop' Note: you may need to restart the kernel to use updated packages. @@ -125,14 +126,14 @@ segmentation you can select vanilla ``SAM``. remote: Counting objects: 100% (190/190), done. remote: Compressing objects: 100% (80/80), done. remote: Total 379 (delta 135), reused 110 (delta 110), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 16.82 MiB/s, done. + Receiving objects: 100% (379/379), 14.03 MiB | 19.90 MiB/s, done. Resolving deltas: 100% (194/194), done. Cloning into 'EfficientSAM'... remote: Enumerating objects: 424, done. remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 26.81 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 25.51 MiB/s, done. Resolving deltas: 100% (246/246), done. @@ -259,15 +260,6 @@ GroundingDINO imports .. parsed-literal:: final text_encoder_type: bert-base-uncased - - -.. parsed-literal:: - - FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 - - -.. parsed-literal:: - final text_encoder_type: bert-base-uncased @@ -514,10 +506,10 @@ class, but the inference will be done using OpenVINO optimized model. .. parsed-literal:: - 2024-09-24 01:11:42.692032: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:11:42.732257: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:28:09.725059: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:28:09.764729: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:11:43.332245: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:28:10.354526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Convert predicted boxes to supervision box detections format diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index d40160316302c4..6f55f7666710cc 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -196,7 +196,7 @@ is provided. .. parsed-literal:: - + @@ -223,7 +223,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index 941931bf097a63..b13205541f558f 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -132,8 +132,6 @@ tutorials `__. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - 2024-09-24 01:13:10.420082: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:13:10.454873: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:29:42.074725: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:29:42.108881: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:13:11.041395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:29:42.698091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Initialize and Convert the Model Automatically using OVModel class @@ -445,7 +443,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-09-24 01:13:25.031832: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:29:55.851395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4}] @@ -455,7 +453,7 @@ Full list of supported arguments available via ``--help`` [--ratio RATIO] [--sym] [--group-size GROUP_SIZE] [--dataset DATASET] [--all-layers] [--awq] - [--scale-estimation] + [--scale-estimation] [--gptq] [--sensitivity-metric SENSITIVITY_METRIC] [--num-samples NUM_SAMPLES] [--disable-stateful] @@ -476,20 +474,20 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['multiple-choice', 'image-to-text', 'zero-shot- - object-detection', 'audio-classification', 'image- - segmentation', 'inpainting', 'audio-frame- - classification', 'masked-im', 'depth-estimation', - 'sentence-similarity', 'object-detection', 'feature- - extraction', 'text-to-audio', 'text-generation', - 'text-classification', 'mask-generation', 'audio- - xvector', 'semantic-segmentation', 'text2text- - generation', 'text-to-image', 'question-answering', - 'token-classification', 'image-classification', 'fill- - mask', 'zero-shot-image-classification', 'image-to- - image', 'automatic-speech-recognition']. For decoder - models, use `xxx-with-past` to export the model using - past key values in the decoder. + ['image-classification', 'depth-estimation', 'object- + detection', 'zero-shot-object-detection', 'text-to- + audio', 'token-classification', 'audio-frame- + classification', 'semantic-segmentation', 'automatic- + speech-recognition', 'image-segmentation', 'question- + answering', 'text-classification', 'multiple-choice', + 'inpainting', 'masked-im', 'text2text-generation', + 'image-to-text', 'text-generation', 'sentence- + similarity', 'mask-generation', 'text-to-image', + 'audio-xvector', 'zero-shot-image-classification', + 'fill-mask', 'image-to-image', 'feature-extraction', + 'audio-classification']. For decoder models, use `xxx- + with-past` to export the model using past key values + in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment. @@ -545,6 +543,11 @@ Full list of supported arguments available via ``--help`` required to run scale estimation. Please note, that applying scale estimation takes additional memory and time. + --gptq Indicates whether to apply GPTQ algorithm that + optimizes compressed weights in a layer-wise fashion + to minimize the difference between activations of a + compressed and original layer. Please note, that + applying GPTQ takes additional memory and time. --sensitivity-metric SENSITIVITY_METRIC The sensitivity metric for assigning quantization precision to layers. Can be one of the following: @@ -589,7 +592,7 @@ compression: .. parsed-literal:: - 2024-09-24 01:13:30.217357: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:30:01.388483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Framework not specified. Using pt to export the model. Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). diff --git a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst index e8e9e84a1230dd..b3fea075421bb3 100644 --- a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst +++ b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst @@ -36,7 +36,6 @@ using OpenVINO. Additionally, we will use `NNCF `__ for optimizing model in low precision. - **Table of contents:** - `Prerequisites <#prerequisites>`__ diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index 70426460484e1d..f45c3ca5ec3b32 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -112,7 +112,7 @@ Model preparation stage has the following steps: remote: Counting objects: 100% (281/281), done. remote: Compressing objects: 100% (96/96), done. remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 (from 1) - Receiving objects: 100% (282/282), 9.22 MiB | 18.15 MiB/s, done. + Receiving objects: 100% (282/282), 9.22 MiB | 18.95 MiB/s, done. Resolving deltas: 100% (135/135), done. @@ -184,7 +184,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:07<00:00, 23805938.96it/s] + 100%|██████████| 170498071/170498071 [00:06<00:00, 24572685.83it/s] .. parsed-literal:: @@ -256,10 +256,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-09-24 01:14:11.097058: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:14:11.128111: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:30:41.915322: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:30:41.946467: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:14:11.669050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:30:42.497931: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -427,7 +427,7 @@ Tool self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -521,7 +521,7 @@ Let’s check model capabilities in answering questions about image: .. parsed-literal:: - The image shows a red panda with its eyes and facial features clear and attention-focused. The panda appears to be partially inside an enclosure, possibly a cage, with wooden structures and a tree branch in the background that has dense foliage. The red panda is looking intently at the viewer, with its paws gently resting on a wooden surface. The background is vibrant and lush, indicating the red panda is in a safe and comfortable habitat. + The image displays a young red panda cub. This cute creature has a striking mix of dark reds and whites on its fur, with a striking white underbelly and back. The cub is sitting and peering forward with a curious expression. It appears to be peering through a wooden partition, and a piece of bamboo is visible in the bottom-left corner of the image. The background includes green foliage, suggesting a natural habitat for the cub. This cub is cute and looks like it's enjoying Interactive demo ---------------- diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 3ca475131f64c9..5b61ee9af2d3e4 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -104,12 +104,50 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) + +.. parsed-literal:: + + configuration_clip.py: 0%| | 0.00/11.7k [00:00`__ .. parsed-literal:: - 2024-09-24 01:20:55.079809: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:20:55.114895: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:37:47.151795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:37:47.185561: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:20:55.639666: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:37:47.732267: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.690 seconds - Optimized pipeline: 1.135 seconds - Performance speed-up: 2.370 + FP32 pipeline: 2.691 seconds + Optimized pipeline: 1.193 seconds + Performance speed-up: 2.257 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 8e629981276e5d..16b3efe503aea0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b395a6a5211633b50834f2132a168042b01efdc02f7e3fba0377d404a95dd75 -size 120518 +oid sha256:af0a33aba1728df2580f26f7ecf01774c2bc8084835df321d825197a945d775f +size 118088 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index fadea0f3ac3d7e..c47f776f0af026 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bca8b2a0714d8c6851590f9929ebc6d52cab033887032e34f130a9be82c58b5 -size 1150804 +oid sha256:ebd6f6c9ee8fd3adb0dfbaebe7b41e7bdab039063a56ce21f0c6e01429d5ce6b +size 1151007 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 70e6f34c9a734d..90a6e30cff3f2e 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79eea702b339b48a65206c66033479bfec7cb1bec5f590c2d4c854fc54ef1e21 -size 1148809 +oid sha256:8a3b5a0b0c94dec94a9aeb46ebb0ca6af8e63897684e446a299dc31b84851ce2 +size 1148928 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 1f9ef4e9713068..073bf8e86591fb 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f2e55f694f6a87c5b5299afe5f832d0ebfdf57fd1cdc80739235091cc6f2fb2 -size 118997 +oid sha256:0f41efc8140938f17d9ea5b31d699440fdcc3f3d407eba04abc4d694769f2529 +size 122071 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 0186913bb93a2d..5069e51978df61 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a3763d1b128c1b7a5445e5117175bc0350466b2c2718c2d39c0ea2e5741e73c -size 1150723 +oid sha256:fa756de5e754a64022b09b445f6851d8ce7373e09d5a776497bbf69513085cc8 +size 1150840 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index b5ae8a049d6307..f698389c6f8304 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,10 @@ Imports .. parsed-literal:: - 2024-09-24 01:27:29.753789: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:27:29.788308: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:44:29.489974: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:44:29.524217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:27:30.340147: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:44:30.075180: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,7 +211,7 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -247,7 +247,7 @@ tokenizer from HuggingFace. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -505,9 +505,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.75 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.42 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.26 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.78 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.30 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 107.63 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,27 +548,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.00 ms + [ INFO ] Read model took 18.82 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.50 ms + [ INFO ] Reshape model took 5.48 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 354.56 ms + [ INFO ] Compile model took 351.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +609,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 21.29 ms + [ INFO ] First inference took 20.92 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6646 iterations - [ INFO ] Duration: 120009.35 ms + [ INFO ] Count: 6334 iterations + [ INFO ] Duration: 120017.42 ms [ INFO ] Latency: - [ INFO ] Median: 17.93 ms - [ INFO ] Average: 17.96 ms - [ INFO ] Min: 17.16 ms - [ INFO ] Max: 30.28 ms - [ INFO ] Throughput: 55.38 FPS + [ INFO ] Median: 18.69 ms + [ INFO ] Average: 18.85 ms + [ INFO ] Min: 17.15 ms + [ INFO ] Max: 26.62 ms + [ INFO ] Throughput: 52.78 FPS .. code:: ipython3 @@ -646,27 +646,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.22 ms + [ INFO ] Read model took 25.69 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.49 ms + [ INFO ] Reshape model took 7.39 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1109.75 ms + [ INFO ] Compile model took 1128.78 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +707,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 17.42 ms + [ INFO ] First inference took 15.68 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 13295 iterations - [ INFO ] Duration: 120004.17 ms + [ INFO ] Count: 12868 iterations + [ INFO ] Duration: 120000.02 ms [ INFO ] Latency: - [ INFO ] Median: 8.98 ms - [ INFO ] Average: 8.93 ms - [ INFO ] Min: 7.61 ms - [ INFO ] Max: 10.60 ms - [ INFO ] Throughput: 110.79 FPS + [ INFO ] Median: 9.01 ms + [ INFO ] Average: 9.23 ms + [ INFO ] Min: 8.43 ms + [ INFO ] Max: 12.91 ms + [ INFO ] Throughput: 107.23 FPS diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst index e28ef06dee02b6..348a243480aec1 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -134,10 +134,10 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models .. parsed-literal:: - 2024-09-24 01:34:07.256954: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:34:07.291464: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:50:26.200628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:50:26.234856: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:34:07.813070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:50:26.890470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg index 6f716b25c12aaa..c3ee252ebc2731 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bae92e05066a4104e122aef295e07359b254edf215b788bbc97d43733bf32390 -size 28421 +oid sha256:5596d96021ad02d2a954c32cfe4679598a3fc331bbb92df84e437b2594a5d525 +size 30142 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png index 65a12b7639f0d5..484f8da2cd5c03 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad6fa22eee05f4f20d2f7c2ccb1b432fd188d37a933cd7cc0ad65c845480eb40 -size 417701 +oid sha256:1d2a51b952179d6520da3d61f79e1ee7d57d7c22504cf712579110c07b68e63f +size 421582 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg index f9c4bd85b1beef..bce1a840efae34 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:753051bedb711f0f7a78263dc275e4900e092b06cea63bec711c32cff8c45958 -size 38225 +oid sha256:e624f572f30ee308bed067571eeb9cb035eb095eabebc0914ac8ae67605896c3 +size 33139 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png index 1042456517a548..9803cd18053390 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2bc4b9deefbc219d3bda3732fb6996cd339e5205ccbb21b6b3ea78bbb5962d6 -size 468297 +oid sha256:0bd853d844aabefb69a678715b479dd74c84bd003eeb0362eb568cc592b8fc69 +size 428147 diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst index 41bf829dac1c6d..3e26205ee0272b 100644 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst @@ -74,19 +74,19 @@ Prerequisites .. code:: ipython3 from pathlib import Path - + import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + MODEL_DIR = Path("model") IMAGE_ENCODER_PATH = MODEL_DIR / "image_encoder.xml" INPUT_EMBEDDING_PATH = MODEL_DIR / "input_embeddings.xml" LANGUAGE_MODEL_PATH = MODEL_DIR / "language_model.xml" - + requires_pt_model_loading = not all([p.exists() for p in [IMAGE_ENCODER_PATH, INPUT_EMBEDDING_PATH, LANGUAGE_MODEL_PATH]]) Download PyTorch model @@ -99,18 +99,18 @@ Download PyTorch model from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration import torch import gc - + processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") image_encoder_model, input_embedding_model, language_model = None, None, None - - + + class ImageEncoder(torch.nn.Module): def __init__(self, config, vision_tower, multi_modal_projector): super().__init__() self.config = config self.vision_tower = vision_tower self.multi_modal_projector = multi_modal_projector - + def forward(self, pixel_values): batch_size, num_patches, num_channels, height, width = pixel_values.shape reshaped_pixel_values = pixel_values.view(batch_size * num_patches, num_channels, height, width) @@ -122,8 +122,8 @@ Download PyTorch model selected_image_feature = selected_image_feature image_features = self.multi_modal_projector(selected_image_feature) return image_features - - + + if requires_pt_model_loading: model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", low_cpu_mem_usage=True) model.config.save_pretrained(MODEL_DIR) @@ -134,7 +134,9 @@ Download PyTorch model gc.collect() Convert model to OpenVINO Intermediate Representation ------------------------------------------------------- +----------------------------------------------------- + + OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate Representation (IR). `OpenVINO model conversion @@ -205,8 +207,8 @@ Image Encoder is represented in LLaVA by pretrained CLIP model. import torch import openvino as ov import gc - - + + def cleanup_torchscript_cache(): """ Helper for removing cached model representation @@ -214,14 +216,14 @@ Image Encoder is represented in LLaVA by pretrained CLIP model. torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() - - + + if not IMAGE_ENCODER_PATH.exists(): ov_image_encoder = ov.convert_model(image_encoder_model, example_input=torch.zeros((1, 5, 3, 336, 336))) ov.save_model(ov_image_encoder, IMAGE_ENCODER_PATH) del ov_image_encoder cleanup_torchscript_cache() - + del image_encoder_model gc.collect(); @@ -239,16 +241,16 @@ use it separately. .. code:: ipython3 llm_input = None - + if not LANGUAGE_MODEL_PATH.exists(): llm_input = input_embedding_model(torch.ones((2, 2), dtype=torch.int64)) - + if not INPUT_EMBEDDING_PATH.exists(): ov_input_embeddings_model = ov.convert_model(input_embedding_model, example_input=torch.ones((2, 2), dtype=torch.int64)) ov.save_model(ov_input_embeddings_model, INPUT_EMBEDDING_PATH) del ov_input_embeddings_model cleanup_torchscript_cache() - + del input_embedding_model gc.collect(); @@ -299,28 +301,27 @@ documentation 0 - - + + def model_has_input_output_name(ov_model: ov.Model, name: str): """ Helper function for checking that model has specified input or output name - + Parameters: - ov_model (ov.Model): # TODO: Can we derive the dimensions from the model topology? + ov_model (ov.Model): name (str): name of input or output - + Returns: True if input or output with requested name exists else False """ return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], []) - - + + def fuse_cache_reorder( ov_model: ov.Model, not_kv_inputs: List[str], @@ -329,14 +330,14 @@ documentation 1: base_image_feature = image_feature[0] image_feature = image_feature[1:] - + if height * width != base_image_feature.shape[0]: raise ValueError("The number of patches is not consistent with the image size.") num_patch_height, num_patch_width = get_anyres_image_grid_shape( @@ -1019,7 +1021,7 @@ documentation ", "how", "are"] # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices] final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices] if labels is not None: final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices] - + # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling image_to_overwrite = torch.all(final_embedding == 0, dim=-1) image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device) @@ -1092,19 +1094,19 @@ documentation \n{question}[/INST]" streamer = TextStreamer(processor, skip_special_tokens=True, skip_prompt=True) - + inputs = processor(prompt, image, return_tensors="pt") print(f"Question:\n{question}") image @@ -1264,7 +1283,7 @@ Select device -.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_32_1.png +.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png @@ -1298,8 +1317,8 @@ Interactive demo from threading import Thread from PIL import Image import torch - - + + def bot_streaming(message, history): print(message) if message["files"]: @@ -1310,21 +1329,21 @@ Interactive demo for hist in history: if isinstance(hist[0], tuple): image = hist[0][0] - + if image is None: gr.Error("You need to upload an image for LLaVA to work.") prompt = f"[INST] \n{message['text']} [/INST]" image = Image.open(image).convert("RGB") inputs = processor(prompt, image, return_tensors="pt") - + streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True}) generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=100) - + thread = Thread(target=ov_llava_model.generate, kwargs=generation_kwargs) thread.start() - + text_prompt = f"[INST] \n{message['text']} [/INST]" - + buffer = "" for new_text in streamer: buffer += new_text @@ -1336,11 +1355,11 @@ Interactive demo if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-next-multimodal-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(fn=bot_streaming) - + try: demo.launch(debug=False) except Exception: diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_32_1.jpg b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg similarity index 100% rename from docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_32_1.jpg rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_32_1.png b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png similarity index 100% rename from docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_32_1.png rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png diff --git a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst index 86b28c56321b46..7151d7b4341443 100644 --- a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst +++ b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst @@ -105,13 +105,17 @@ Install required dependencies "llama-index", "llama-index-llms-huggingface==0.3.3", # pin to keep compatibility due to https://github.com/run-llama/llama_index/commit/f037de8d0471b37f9c4069ebef5dfb329633d2c6 "llama-index-readers-file", - "llama-index-llms-openvino>=0.3.1", - "llama-index-embeddings-openvino>=0.2.0", + "llama-index-core", + "llama-index-llms-huggingface", + "llama-index-embeddings-huggingface", "transformers>=4.43.1", + "llama-index-llms-huggingface>=0.3.0,<0.3.4", + "llama-index-embeddings-huggingface>=0.3.0", ) pip_install("-q", "git+https://github.com/huggingface/optimum-intel.git", "git+https://github.com/openvinotoolkit/nncf.git", "datasets", "accelerate") pip_install("--pre", "-Uq", "openvino>=2024.2.0", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") pip_install("--pre", "-Uq", "openvino-tokenizers[transformers]", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") + pip_install("-q", "--no-deps", "llama-index-llms-openvino", "llama-index-embeddings-openvino") .. code:: ipython3 diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index 7777559e0f9100..ac4c22ffc3b185 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -81,9 +81,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install -q -U "openvino>=2024.3.0" openvino-tokenizers[transformers] openvino-genai @@ -112,12 +112,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -136,7 +136,7 @@ Install required dependencies r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py") with open("llm_config.py", "w", encoding="utf-8") as f: f.write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -218,6 +218,70 @@ Click here to see available models options MiniCPM outperforms many popular 7b, 13b and 70b models. More details can be found in `model_card `__. +- **llama-3.2-1B-instruct** - 1B parameters model from LLama3.2 + collection of instruction-tuned multilingual models. Llama 3.2 + instruction-tuned text only models are optimized for multilingual + dialogue use cases, including agentic retrieval and summarization + tasks. They outperform many of the available open source and closed + chat models on common industry benchmarks. More details can be found + in `model + card `__ + >\ **Note**: run model with demo, you will need to accept license + agreement. >You must be a registered user in Hugging Face Hub. + Please visit `HuggingFace model + card `__, + carefully read terms of usage and click accept button. You will need + to use an access token for the code below to run. For more + information on access tokens, refer to `this section of the + documentation `__. + >You can login on Hugging Face Hub in notebook environment, using + following code: + +.. code:: python + + # login to huggingfacehub to get access to pretrained model + + + from huggingface_hub import notebook_login, whoami + + try: + whoami() + print('Authorization token already provided') + except OSError: + notebook_login() + +- **llama-3.2-3B-instruct** - 3B parameters model from LLama3.2 + collection of instruction-tuned multilingual models. Llama 3.2 + instruction-tuned text only models are optimized for multilingual + dialogue use cases, including agentic retrieval and summarization + tasks. They outperform many of the available open source and closed + chat models on common industry benchmarks. More details can be found + in `model + card `__ + >\ **Note**: run model with demo, you will need to accept license + agreement. >You must be a registered user in Hugging Face Hub. + Please visit `HuggingFace model + card `__, + carefully read terms of usage and click accept button. You will need + to use an access token for the code below to run. For more + information on access tokens, refer to `this section of the + documentation `__. + >You can login on Hugging Face Hub in notebook environment, using + following code: + +.. code:: python + + # login to huggingfacehub to get access to pretrained model + + + from huggingface_hub import notebook_login, whoami + + try: + whoami() + print('Authorization token already provided') + except OSError: + notebook_login() + - **gemma-2b-it** - Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only @@ -240,7 +304,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -274,7 +338,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -334,7 +398,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -367,7 +431,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -401,7 +465,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -435,7 +499,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -466,7 +530,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -476,27 +540,18 @@ Click here to see available models options except OSError: notebook_login() -- **qwen2-1.5b-instruct/qwen2-7b-instruct** - Qwen2 is the new series - of Qwen large language models.Compared with the state-of-the-art open - source language models, including the previous released Qwen1.5, - Qwen2 has generally surpassed most open source models and - demonstrated competitiveness against proprietary models across a - series of benchmarks targeting for language understanding, language - generation, multilingual capability, coding, mathematics, reasoning, - etc. For more details, please refer to - `model_card `__, - `blog `__, - `GitHub `__, and +- **qwen2.5-0.5b-instruct/qwen2.5-1.5b-instruct/qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** + - Qwen2.5 is the latest series of Qwen large language models. + Comparing with Qwen2, Qwen2.5 series brings significant improvements + in coding, mathematics and general knowledge skills. Additionally, it + brings long-context and multiple languages support including Chinese, + English, French, Spanish, Portuguese, German, Italian, Russian, + Japanese, Korean, Vietnamese, Thai, Arabic, and more. For more + details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and `Documentation `__. -- **qwen1.5-0.5b-chat/qwen1.5-1.8b-chat/qwen1.5-7b-chat** - Qwen1.5 is - the beta version of Qwen2, a transformer-based decoder-only language - model pretrained on a large amount of data. Qwen1.5 is a language - model series including decoder language models of different model - sizes. It is based on the Transformer architecture with SwiGLU - activation, attention QKV bias, group query attention, mixture of - sliding window attention and full attention. You can find more - details about model in the `model - repository `__. - **qwen-7b-chat** - Qwen-7B is the 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Alibaba Cloud. Qwen-7B is a Transformer-based large language model, @@ -588,9 +643,9 @@ Click here to see available models options .. code:: ipython3 from llm_config import get_llm_selection_widget - + form, lang, model_id_widget, compression_variant, use_preconverted = get_llm_selection_widget() - + form @@ -611,7 +666,7 @@ Click here to see available models options .. parsed-literal:: - Selected model qwen2-0.5b-instruct with INT4 compression + Selected model qwen2.5-0.5b-instruct with INT4 compression Convert model using Optimum-CLI tool @@ -620,7 +675,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ is -the interface between the +the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -677,12 +732,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. You can enable AWQ to @@ -704,13 +760,13 @@ be additionally applied during model export with INT4 precision using .. code:: ipython3 from llm_config import convert_and_compress_model - + model_dir = convert_and_compress_model(model_id, model_configuration, compression_variant.value, use_preconverted.value) .. parsed-literal:: - ⌛ qwen2-0.5b-instruct conversion to INT4 started. It may takes some time. + ⌛ qwen2.5-0.5b-instruct conversion to INT4 started. It may takes some time. @@ -718,40 +774,39 @@ be additionally applied during model export with INT4 precision using -``optimum-cli export openvino --model Qwen/Qwen2-0.5B-Instruct --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 qwen2/INT4_compressed_weights`` +``optimum-cli export openvino --model Qwen/Qwen2.5-0.5B-Instruct --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 1.0 --sym qwen2.5/INT4_compressed_weights`` .. parsed-literal:: - 2024-09-24 01:36:43.036338: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:36:43.069837: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:53:02.359208: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:53:02.392956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:36:43.595089: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:53:02.929372: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Framework not specified. Using pt to export the model. Using framework PyTorch: 2.2.2+cpu Overriding 1 configuration item(s) - use_cache -> True We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:489: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: Set tokenizer padding side to left for `text-generation-with-past` task. .. parsed-literal:: - Mixed-Precision assignment ━━━━━━━━━━━━━━━━━━━━ 100% 168/168 • 0:00:03 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 43% (81 / 169) │ 21% (80 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────���───────────────┤ - │ 4 │ 57% (88 / 169) │ 79% (88 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:09 • 0:00:00 - ✅ INT4 qwen2-0.5b-instruct model converted and can be found in qwen2/INT4_compressed_weights + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 28% (1 / 169) │ 0% (0 / 168) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_sym │ 72% (168 / 169) │ 100% (168 / 168) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:14 • 0:00:00 + ✅ INT4 qwen2.5-0.5b-instruct model converted and can be found in qwen2.5/INT4_compressed_weights Let’s compare model size for different compression types @@ -759,13 +814,13 @@ Let’s compare model size for different compression types .. code:: ipython3 from llm_config import compare_model_size - + compare_model_size(model_dir) .. parsed-literal:: - Size of model with INT4 compressed weights is 358.80 MB + Size of model with INT4 compressed weights is 322.44 MB Select device for inference @@ -776,9 +831,9 @@ Select device for inference .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="CPU", exclude=["NPU"]) - + device @@ -821,14 +876,14 @@ of the available generation parameters more deeply later. .. code:: ipython3 from openvino_genai import LLMPipeline - + print(f"Loading model from {model_dir}\n") - - + + pipe = LLMPipeline(str(model_dir), device.value) - + generation_config = pipe.get_generation_config() - + input_prompt = "The Sun is yellow bacause" print(f"Input text: {input_prompt}") print(pipe.generate(input_prompt, max_new_tokens=10)) @@ -836,10 +891,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: - Loading model from qwen2/INT4_compressed_weights - + Loading model from qwen2.5/INT4_compressed_weights + Input text: The Sun is yellow bacause - it is made of hydrogen and oxygen atoms. The + of its coloration. The Sun is blue because Run Chatbot @@ -975,11 +1030,11 @@ Click here to see detailed description of advanced options if not Path("gradio_helper_genai.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper_genai.py") open("gradio_helper_genai.py", "w").write(r.text) - + from gradio_helper_genai import make_demo - + demo = make_demo(pipe, model_configuration, model_id, lang.value) - + try: demo.launch(debug=False) except Exception: @@ -992,7 +1047,7 @@ Click here to see detailed description of advanced options .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index 808f951d4e53bb..cbd76c0544ba82 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -83,9 +83,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly @@ -104,12 +104,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -184,7 +184,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -217,7 +217,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -278,7 +278,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -311,7 +311,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -345,7 +345,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -379,7 +379,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -410,7 +410,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -420,27 +420,18 @@ Click here to see available models options except OSError: notebook_login() -- **qwen2-1.5b-instruct/qwen2-7b-instruct** - Qwen2 is the new series - of Qwen large language models.Compared with the state-of-the-art open - source language models, including the previous released Qwen1.5, - Qwen2 has generally surpassed most open source models and - demonstrated competitiveness against proprietary models across a - series of benchmarks targeting for language understanding, language - generation, multilingual capability, coding, mathematics, reasoning, - etc. For more details, please refer to - `model_card `__, - `blog `__, - `GitHub `__, and +- **qwen2.5-0.5b-instruct/qwen2.5-1.5b-instruct/qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** + - Qwen2.5 is the latest series of Qwen large language models. + Comparing with Qwen2, Qwen2.5 series brings significant improvements + in coding, mathematics and general knowledge skills. Additionally, it + brings long-context and multiple languages support including Chinese, + English, French, Spanish, Portuguese, German, Italian, Russian, + Japanese, Korean, Vietnamese, Thai, Arabic, and more. For more + details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and `Documentation `__. -- **qwen1.5-0.5b-chat/qwen1.5-1.8b-chat/qwen1.5-7b-chat** - Qwen1.5 is - the beta version of Qwen2, a transformer-based decoder-only language - model pretrained on a large amount of data. Qwen1.5 is a language - model series including decoder language models of different model - sizes. It is based on the Transformer architecture with SwiGLU - activation, attention QKV bias, group query attention, mixture of - sliding window attention and full attention. You can find more - details about model in the `model - repository `__. - **qwen-7b-chat** - Qwen-7B is the 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Alibaba Cloud. Qwen-7B is a Transformer-based large language model, @@ -554,14 +545,14 @@ Click here to see available models options .. code:: ipython3 model_languages = list(SUPPORTED_LLM_MODELS) - + model_language = widgets.Dropdown( options=model_languages, value=model_languages[0], description="Model Language:", disabled=False, ) - + model_language @@ -576,14 +567,14 @@ Click here to see available models options .. code:: ipython3 model_ids = list(SUPPORTED_LLM_MODELS[model_language.value]) - + model_id = widgets.Dropdown( options=model_ids, value=model_ids[0], description="Model:", disabled=False, ) - + model_id @@ -612,7 +603,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ is -the interface between the +the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -664,13 +655,12 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -- The ``--group-size`` parameter will define the group size to use for - quantization, -1 it will results in per-column quantization. -- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit - quantization. If set to 0.9, it means that 90% of the layers will be - quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : - +The ``--group-size`` parameter will define the group size to use for +quantization, -1 it will results in per-column quantization. - The +``--ratio`` parameter controls the ratio between 4-bit and 8-bit +quantization. If set to 0.9, it means that 90% of the layers will be +quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. @@ -681,7 +671,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from IPython.display import Markdown, display - + prepare_int4_model = widgets.Checkbox( value=True, description="Prepare INT4 model", @@ -697,7 +687,7 @@ sacrifice of the model size and inference latency. description="Prepare FP16 model", disabled=False, ) - + display(prepare_int4_model) display(prepare_int8_model) display(prepare_fp16_model) @@ -766,14 +756,14 @@ We can now save floating point and compressed model variants .. code:: ipython3 from pathlib import Path - + pt_model_id = model_configuration["model_id"] pt_model_name = model_id.value.split("-")[0] fp16_model_dir = Path(model_id.value) / "FP16" int8_model_dir = Path(model_id.value) / "INT8_compressed_weights" int4_model_dir = Path(model_id.value) / "INT4_compressed_weights" - - + + def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return @@ -785,8 +775,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return @@ -799,8 +789,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int4(): compression_configs = { "zephyr-7b-beta": { @@ -864,13 +854,18 @@ We can now save floating point and compressed model variants "group_size": 128, "ratio": 0.5, }, + "qwen2.5-7b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-3b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-14b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-1.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, "default": { "sym": False, "group_size": 128, "ratio": 0.8, }, } - + model_compression_params = compression_configs.get(model_id.value, compression_configs["default"]) if (int4_model_dir / "openvino_model.xml").exists(): return @@ -888,8 +883,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + if prepare_fp16_model.value: convert_to_fp16() if prepare_int8_model.value: @@ -904,7 +899,7 @@ Let’s compare model size for different compression types fp16_weights = fp16_model_dir / "openvino_model.bin" int8_weights = int8_model_dir / "openvino_model.bin" int4_weights = int4_model_dir / "openvino_model.bin" - + if fp16_weights.exists(): print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): @@ -930,16 +925,16 @@ Select device for inference and model variant .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -963,14 +958,14 @@ variant of model weights and inference device available_models.append("INT8") if fp16_model_dir.exists(): available_models.append("FP16") - + model_to_run = widgets.Dropdown( options=available_models, value=available_models[0], description="Model to run:", disabled=False, ) - + model_to_run @@ -1022,12 +1017,13 @@ guide `__ from transformers import AutoConfig, AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM - + + import openvino as ov import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - - + + if model_to_run.value == "INT4": model_dir = int4_model_dir elif model_to_run.value == "INT8": @@ -1035,20 +1031,22 @@ guide `__ else: model_dir = fp16_model_dir print(f"Loading model from {model_dir}") - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + if "GPU" in device.value and "qwen2-7b-instruct" in model_id.value: ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO" - + # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy # issues caused by this, which we avoid by setting precision hint to "f32". + core = ov.Core() + if model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and device.value in ["GPU", "AUTO"]: ov_config["INFERENCE_PRECISION_HINT"] = "f32" - + model_name = model_configuration["model_id"] tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) - + ov_model = OVModelForCausalLM.from_pretrained( model_dir, device=device.value, @@ -1122,14 +1120,14 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html :: - playing: 0.5 - sleeping: 0.25 - eating: 0.15 - driving: 0.05 - flying: 0.05 + playing: 0.5 + sleeping: 0.25 + eating: 0.15 + driving: 0.05 + flying: 0.05 - - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." - - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." + - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." + - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." - **High temperature** (e.g., 2.0): The AI model becomes more adventurous, increasing the chances of selecting less likely tokens, such as "driving" and "flying." - ``Top-p``, also known as nucleus sampling, is a parameter used to @@ -1167,7 +1165,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html import torch from threading import Event, Thread - + from typing import List, Tuple from transformers import ( AutoTokenizer, @@ -1175,8 +1173,8 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html StoppingCriteriaList, TextIteratorStreamer, ) - - + + model_name = model_configuration["model_id"] start_message = model_configuration["start_message"] history_template = model_configuration.get("history_template") @@ -1184,46 +1182,46 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html current_message_template = model_configuration.get("current_message_template") stop_tokens = model_configuration.get("stop_tokens") tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) - + max_new_tokens = 256 - - + + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): self.token_ids = token_ids - + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_id in self.token_ids: if input_ids[0][-1] == stop_id: return True return False - - + + if stop_tokens is not None: if isinstance(stop_tokens[0], str): stop_tokens = tok.convert_tokens_to_ids(stop_tokens) - + stop_tokens = [StopOnTokens(stop_tokens)] - - + + def default_partial_text_processor(partial_text: str, new_text: str): """ helper for updating partially generated answer, used by default - + Params: partial_text: text buffer for storing previosly generated text new_text: text update for the current step Returns: updated text string - + """ partial_text += new_text return partial_text - - + + text_processor = model_configuration.get("partial_text_processor", default_partial_text_processor) - - + + def convert_history_to_token(history: List[Tuple[str, str]]): """ function for conversion history stored as list pairs of user and assistant messages to tokens according to model expected conversation template @@ -1257,7 +1255,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html messages.append({"role": "user", "content": user_msg}) if model_msg: messages.append({"role": "assistant", "content": model_msg}) - + input_token = tok.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt") else: text = start_message + "".join( @@ -1278,12 +1276,12 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids return input_token - - + + def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id): """ callback function for running chatbot on submit button click - + Params: history: conversation history temperature: parameter for control the level of creativity in AI-generated text. @@ -1292,16 +1290,16 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. conversation_id: unique conversation identifier. - + """ - + # Construct the input message string for the model by concatenating the current system message and conversation history # Tokenize the messages string input_ids = convert_history_to_token(history) if input_ids.shape[1] > 2000: history = [history[-1]] input_ids = convert_history_to_token(history) - streamer = TextIteratorStreamer(tok, timeout=30.0, skip_prompt=True, skip_special_tokens=True) + streamer = TextIteratorStreamer(tok, timeout=3600.0, skip_prompt=True, skip_special_tokens=True) generate_kwargs = dict( input_ids=input_ids, max_new_tokens=max_new_tokens, @@ -1314,9 +1312,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) if stop_tokens is not None: generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens) - + stream_complete = Event() - + def generate_and_signal_complete(): """ genration function for single thread @@ -1324,18 +1322,18 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html global start_time ov_model.generate(**generate_kwargs) stream_complete.set() - + t1 = Thread(target=generate_and_signal_complete) t1.start() - + # Initialize an empty string to store the generated text partial_text = "" for new_text in streamer: partial_text = text_processor(partial_text, new_text) history[-1][1] = partial_text yield history - - + + def request_cancel(): ov_model.request.cancel() @@ -1344,11 +1342,11 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO {model_id.value} Chatbot", language=model_language.value) - + try: demo.launch() except Exception: diff --git a/docs/notebooks/llm-rag-langchain-with-output.rst b/docs/notebooks/llm-rag-langchain-with-output.rst index 89be1c296d3a8f..935c4c5ef1f205 100644 --- a/docs/notebooks/llm-rag-langchain-with-output.rst +++ b/docs/notebooks/llm-rag-langchain-with-output.rst @@ -547,6 +547,11 @@ with INT4 precision. "group_size": 128, "ratio": 0.5, }, + "qwen2.5-7b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-3b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-14b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-1.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, "default": { "sym": False, "group_size": 128, @@ -1335,7 +1340,7 @@ which will help to create a chain to connect RAG components including: """ streamer = TextIteratorStreamer( llm.pipeline.tokenizer, - timeout=60.0, + timeout=3600.0, skip_prompt=hide_full_prompt, skip_special_tokens=True, ) diff --git a/docs/notebooks/llm-rag-llamaindex-with-output.rst b/docs/notebooks/llm-rag-llamaindex-with-output.rst index b6523008587fa1..b3c7f4e004c7af 100644 --- a/docs/notebooks/llm-rag-llamaindex-with-output.rst +++ b/docs/notebooks/llm-rag-llamaindex-with-output.rst @@ -125,13 +125,13 @@ Install required dependencies "llama-index-readers-file", "llama-index-vector-stores-faiss", "llama-index-llms-langchain", - "llama-index-llms-openvino>=0.3.1", - "llama-index-embeddings-openvino>=0.2.1", - "llama-index-postprocessor-openvino-rerank>=0.2.0", + "llama-index-llms-huggingface>=0.3.0,<0.3.4", + "llama-index-embeddings-huggingface>=0.3.0", ) pip_install("-q", "git+https://github.com/huggingface/optimum-intel.git", "git+https://github.com/openvinotoolkit/nncf.git", "datasets", "accelerate", "gradio") pip_install("--pre", "-U", "openvino>=2024.2", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") pip_install("--pre", "-U", "openvino-tokenizers[transformers]>=2024.2", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") + pip_install("-q", "--no-deps", "llama-index-llms-openvino>=0.3.1", "llama-index-embeddings-openvino>=0.2.1", "llama-index-postprocessor-openvino-rerank>=0.2.0") .. parsed-literal:: @@ -546,6 +546,11 @@ with INT4 precision. "group_size": 128, "ratio": 0.5, }, + "qwen2.5-7b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-3b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-14b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-1.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, + "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0}, "default": { "sym": False, "group_size": 128, diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index ae04df87562368..2fbe7e63a8b21b 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -78,7 +78,7 @@ Prerequisites .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - supervision 0.23.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index 1e30a6854403bb..fbb69d83fe239a 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -645,7 +645,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 004d6ef17a8621..2c5cdf1aecf169 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -55,7 +55,7 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.40" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.40" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" "huggingface-hub>=0.24.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2024.3.0" "nncf>=2.12.0" @@ -205,10 +205,10 @@ Let’s convert each model part. .. parsed-literal:: - 2024-09-24 01:38:17.126512: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:38:17.160546: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 02:54:38.009287: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 02:54:38.043246: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:38:17.679348: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 02:54:38.562064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -239,43 +239,43 @@ Let’s convert each model part. .. parsed-literal:: - image_processing_minicpmv.py: 0%| | 0.00/16.6k [00:00 self.max_seq_len_cached: @@ -423,22 +423,6 @@ Let’s convert each model part. ✅ Language model successfully converted ⌛ Convert Image embedding model - - -.. parsed-literal:: - - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/modeling_navit_siglip.py:334: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). - for batch_idx, p_attn_mask in enumerate(patch_attention_mask): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/modeling_navit_siglip.py:909: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not torch.any(~patch_attention_mask): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/modeling_navit_siglip.py:401: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/modeling_navit_siglip.py:419: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): - - -.. parsed-literal:: - ✅ Image embedding model successfully converted ⌛ Convert Resamler model @@ -545,11 +529,7 @@ documentation `__, +and original `repo `__. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert model <#convert-model>`__ +- `Select inference device <#select-inference-device>`__ +- `Optimize model using NNCF <#optimize-model-using-nncf>`__ + + - `Compress Language model weights in + 4bits <#compress-language-model-weights-in-4bits>`__ + - `Optimize Vision model <#optimize-vision-model>`__ + +- `Model Inference <#model-inference>`__ +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + %pip install -q "torch>=2.1" "torchvision" "Pillow" "tqdm" "datasets>=2.14.6" "gradio>=4.36" "nncf>=2.13.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.45" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -Uq --pre "openvino>2024.4.0" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("ov_mllama_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mllama3.2/ov_mllama_helper.py") + open("ov_mllama_helper.py", "w").write(r.text) + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mllama3.2/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + if not Path("ov_mllama_compression.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mllama3.2/ov_mllama_compression.py") + open("ov_mllama_compression.py", "w").write(r.text) + + if not Path("data_preprocessing.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mllama3.2/data_preprocessing.py") + open("data_preprocessing", "w").write(r.text) + + if not Path("notebook_utils.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") + open("notebook_utils.py", "w").write(r.text) + +Convert model +------------- + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +``ov_mllama_helper.py`` script contains helper function for model +conversion, please check its content if you interested in conversion +details. + +.. raw:: html + +
+ +Click here for more detailed explanation of conversion steps +Llama-3.2.-Vision is autoregressive transformer generative model, it +means that each next model step depends from model output from previous +step. The generation approach is based on the assumption that the +probability distribution of a word sequence can be decomposed into the +product of conditional next word distributions. In other words, model +predicts the next token in the loop guided by previously generated +tokens until the stop-condition will be not reached (generated sequence +of maximum length or end of string token obtained). The way the next +token will be selected over predicted probabilities is driven by the +selected decoding methodology. You can find more information about the +most popular decoding methods in this +`blog `__. The entry point +for the generation process for models from the Hugging Face Transformers +library is the ``generate`` method. You can find more information about +its parameters and configuration in the +`documentation `__. +To preserve flexibility in the selection decoding methodology, we will +convert only model inference for one step. + +The inference flow has difference on first step and for the next. On the +first step, model accept preprocessed input instruction and image. Image +processed via ``Image Encoder`` to cross-attention state, after that +``language model``, LLM-based part of model, runs on cross-attention +states and tokenized input token ids to predict probability of next +generated tokens. On the next step, ``language_model`` accepts only next +token. Since the output side is auto-regressive, an output token hidden +state remains the same once computed for every further generation step. +Therefore, recomputing it every time you want to generate a new token +seems wasteful. With the cache, the model saves the hidden state once it +has been computed. The model only computes the one for the most recently +generated output token at each time step, re-using the saved ones for +hidden tokens. This reduces the generation complexity from +:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details +about how it works can be found in this +`article `__. + +With increasing model size like in modern LLMs, we also can note an +increase in the number of attention blocks and size past key values +tensors respectively. The strategy for handling cache state as model +inputs and outputs in the inference cycle may become a bottleneck for +memory-bounded systems, especially with processing long input sequences, +for example in a chatbot scenario. OpenVINO suggests a transformation +that removes inputs and corresponding outputs with cache tensors from +the model keeping cache handling logic inside the model. Such models are +also called stateful. A stateful model is a model that implicitly +preserves data between two consecutive inference calls. The tensors +saved from one run are kept in an internal memory buffer called a +``state`` or a ``variable`` and may be passed to the next run, while +never being exposed as model output. Hiding the cache enables storing +and updating the cache values in a more device-friendly representation. +It helps to reduce memory consumption and additionally optimize model +performance. More details about stateful models and working with state +can be found in `OpenVINO +documentation `__. + +``image_encoder`` is represented in Llama-3.2-Vision by pretrained VIT +model. + +To sum up above, model consists of 2 parts: + +- **Image Encoder** for encoding input images into LLM cross attention + states space. +- **Language Model** for generation answer based on cross attention + states provided by Image Encoder and input tokens. + +Let’s convert each model part. + +.. raw:: html + +
+ +.. + + **Note**: run model with notebook, you will need to accept license + agreement. You must be a registered user in Hugging Face Hub. + Please visit `HuggingFace model + card `__, + carefully read terms of usage and click accept button. You will need + to use an access token for the code below to run. For more + information on access tokens, refer to `this section of the + documentation `__. + You can login on Hugging Face Hub in notebook environment, using + following code: + +.. code:: ipython3 + + # uncomment these lines to login to huggingfacehub to get access to pretrained model + + # from huggingface_hub import notebook_login, whoami + + # try: + # whoami() + # print('Authorization token already provided') + # except OSError: + # notebook_login() + +.. code:: ipython3 + + from pathlib import Path + from ov_mllama_helper import convert_mllama + + model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct" + model_dir = Path(model_id.split("/")[-1]) / "OV" + + # uncomment the line to see model conversion code + # convert_mllama?? + + +.. parsed-literal:: + + 2024-09-26 08:47:58.173539: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-26 08:47:58.175474: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-09-26 08:47:58.210782: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-09-26 08:47:59.026387: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. code:: ipython3 + + convert_mllama(model_id, model_dir) + + +.. parsed-literal:: + + ⌛ Load original model + + + +.. parsed-literal:: + + Loading checkpoint shards: 0%| | 0/5 [00:00 0 else None + + +.. parsed-literal:: + + ✅ Vision model successfully converted + ⌛ Convert language model... + + +.. parsed-literal:: + + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/mllama/modeling_mllama.py:83: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if sequence_length != 1: + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/mllama/modeling_mllama.py:1710: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if is_cross_attention_layer and cross_attention_states is None and is_cross_attention_cache_empty: + /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/mllama-3.2/ov_mllama_helper.py:402: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + elif past_key_value.get_seq_length(self.layer_idx) != 0: + + +.. parsed-literal:: + + ✅ Language model successfully converted + ✅ Model sucessfully converted and can be found in Llama-3.2-11B-Vision-Instruct/OV + + +Select inference device +----------------------- + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget("CPU", exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Optimize model using NNCF +------------------------- + + + +Compress Language model weights in 4bits +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. + +.. raw:: html + +
+ +Click here for more details about weight compression Weight compression +aims to reduce the memory footprint of a model. It can also lead to +significant performance improvement for large memory-bound models, such +as Large Language Models (LLMs). LLMs and other models, which require +extensive memory to store the weights during inference, can benefit from +weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. raw:: html + +
+ +In this tutorial we consider usage Data-Aware weights compression. Such +approaches may require more time and memory as they involves calibration +dataset, while promising better int4 model accuracy. > **Note:** AWQ +weight quantization requires at least 64GB RAM, if you run notebook in +memory-constrained environment, you can switch to data-free weight +compression using widget bellow + +.. code:: ipython3 + + from ov_mllama_compression import compress + + # uncomment the line to see compression code + # compress?? + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. code:: ipython3 + + from ov_mllama_compression import compression_widgets_helper + + compression_scenario, compress_args = compression_widgets_helper() + + compression_scenario + + + + +.. parsed-literal:: + + VBox(children=(RadioButtons(index=1, options=('data-free', 'data-aware'), value='data-aware'), Accordion(child… + + + +.. code:: ipython3 + + compression_kwargs = {key: value.value for key, value in compress_args.items()} + + language_model_path = compress(model_dir, **compression_kwargs) + + +.. parsed-literal:: + + ✅ Compressed model already exists and can be found in Llama-3.2-11B-Vision-Instruct/OV/llm_int4_asym_r10_gs64_max_activation_variance_awq_scale_all_layers.xml + + +Optimize Vision model +~~~~~~~~~~~~~~~~~~~~~ + + + +While weight compression is the great tool for large language models +memory footprint reduction, for smaller size models like Image Encoder, +it may be more efficient to apply INT8 Post-training quantization. You +can find more details about post-training quantization in `OpenVINO +documentation `__. + +Basically model quantization process consists of 3 steps: 1. Prepare +quantization dataset 2. Perform model quantization using +``nncf.quantize`` 3. Save optimized model on disk using +``ov.save_model`` + + **Note:** Model quantization may requires additional time and memory + for optimization and be non-applicable for some devices. You can skip + quantization step or replace it with weight compression using widget + bellow if you does not have enough resources. + +.. code:: ipython3 + + from ov_mllama_compression import vision_encoder_selection_widget + + vision_encoder_options = vision_encoder_selection_widget(device.value) + + vision_encoder_options + + + + +.. parsed-literal:: + + Dropdown(description='Vision Encoder', index=1, options=('FP16', 'INT8 quantization', 'INT8 weights compressio… + + + +.. code:: ipython3 + + from transformers import AutoProcessor + import nncf + import openvino as ov + import gc + + from data_preprocessing import prepare_dataset_vision + + processor = AutoProcessor.from_pretrained(model_dir) + core = ov.Core() + + fp_vision_encoder_path = model_dir / "openvino_vision_encoder.xml" + int8_vision_encoder_path = model_dir / fp_vision_encoder_path.name.replace(".xml", "_int8.xml") + int8_wc_vision_encoder_path = model_dir / fp_vision_encoder_path.name.replace(".xml", "_int8_wc.xml") + + + if vision_encoder_options.value == "INT8 quantization": + if not int8_vision_encoder_path.exists(): + calibration_data = prepare_dataset_vision(processor, 100) + ov_model = core.read_model(fp_vision_encoder_path) + calibration_dataset = nncf.Dataset(calibration_data) + quantized_model = nncf.quantize( + model=ov_model, + calibration_dataset=calibration_dataset, + model_type=nncf.ModelType.TRANSFORMER, + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.6), + ) + ov.save_model(quantized_model, int8_vision_encoder_path) + del quantized_model + del ov_model + del calibration_dataset + del calibration_data + gc.collect() + + vision_encoder_path = int8_vision_encoder_path + elif vision_encoder_options.value == "INT8 weights compression": + if not int8_wc_vision_encoder_path.exists(): + ov_model = core.read_model(fp_vision_encoder_path) + compressed_model = nncf.compress_weights(ov_model) + ov.save_model(compressed_model, int8_wc_vision_encoder_path) + vision_encoder_path = int8_wc_vision_encoder_path + else: + vision_encoder_path = fp_vision_encoder_path + +Model Inference +--------------- + + + +Now, we are ready to test model inference. +``OVOVMLlamaForConditionalGeneration`` defined in +``ov_mllama_helper.py`` has similar generation interface with original +model and additionally enables runtime optimizations for efficient model +inference with OpenVINO: - **Slicing LM head** - usually LLM models +provides probability for all input tokens, while for selection next +token, we are interested only for the last one. Reducing Language Model +head size to return only last token probability may provide better +performance and reduce memory consumption for the first inference, where +usually whole input prompt processed. You can find more details about +this optimization in `OpenVINO +blog `__ + +.. raw:: html + +

+ +.. raw:: html + +

+ +- **Using Remote tensors for GPU** - Coping data on device and back + into host memory can become bottleneck for efficient execution + multi-model pipeline on GPU. `Remote Tensor + API `__ + provides functionality for low-level GPU memory management, we can + use this feature for sharing cross-attention keys and values between + Image Encoder and Language Model. + +.. code:: ipython3 + + from ov_mllama_helper import OVMLlamaForConditionalGeneration + + # Uncomment this line to see model inference code + # OVMLlamaForConditionalGeneration?? + + ov_model = OVMLlamaForConditionalGeneration( + model_dir, device=device.value, language_model_name=language_model_path.name, image_encoder_name=vision_encoder_path.name + ) + processor = AutoProcessor.from_pretrained(model_dir) + + +.. parsed-literal:: + + applied slice for lm head + + +.. code:: ipython3 + + from PIL import Image + from transformers import TextStreamer + import numpy as np + + question = "What is unusual on this image?" + + messages = [ + {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}, + ] + text = processor.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + raw_image = Image.open(requests.get(url, stream=True).raw) + + inputs = processor(text=text, images=[raw_image], return_tensors="pt") + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + print(f"Question: {question}") + display(raw_image) + output = ov_model.generate(**inputs, do_sample=False, max_new_tokens=100, temperature=None, top_p=None, streamer=streamer) + print(f"Visual encoder time {ov_model.vision_encoder_infer_time[0] * 1000 :.2f} ms") + print(f"First token latency {ov_model.llm_infer_time[0] * 1000 :.2f}ms, Second token latency {np.mean(np.array(ov_model.llm_infer_time[1:])) * 1000:.2f}ms") + + +.. parsed-literal:: + + Question: What is unusual on this image? + + + +.. image:: mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.png + + +.. parsed-literal:: + + The cat is lying in a box, which is an unusual position for a cat. Cats are known for their agility and flexibility, but they tend to prefer more comfortable and secure positions, such as on a soft surface or in a cozy spot. Lying in a box is not a typical behavior for a cat, and it may be due to the cat's desire to feel safe and protected or to explore a new environment. + Visual encoder time 19374.52 ms + First token latency 693.76ms, Second token latency 431.92ms + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + from gradio_helper import make_demo + + processor.chat_template = processor.tokenizer.chat_template + demo = make_demo(ov_model, processor) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(debug=False, share=True) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.jpg b/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.png b/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/mllama-3.2-with-output_files/mllama-3.2-with-output_19_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/mobileclip-video-search-with-output.rst b/docs/notebooks/mobileclip-video-search-with-output.rst index 423d84f30b7593..4fe155b7832e06 100644 --- a/docs/notebooks/mobileclip-video-search-with-output.rst +++ b/docs/notebooks/mobileclip-video-search-with-output.rst @@ -23,6 +23,7 @@ the images most related to the query. In this tutorial, we consider how to use MobileCLIP to implement a visual content search engine for finding relevant frames in video. + **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -77,7 +78,7 @@ Prerequisites remote: Counting objects: 100% (84/84), done. remote: Compressing objects: 100% (61/61), done. remote: Total 84 (delta 29), reused 75 (delta 22), pack-reused 0 (from 0) - Unpacking objects: 100% (84/84), 467.39 KiB | 4.21 MiB/s, done. + Unpacking objects: 100% (84/84), 467.39 KiB | 2.58 MiB/s, done. .. code:: ipython3 @@ -449,7 +450,7 @@ Perform search .. parsed-literal:: - Image encoding took 0.0994 ms + Image encoding took 0.108 ms Text encoding took 0.0118 ms @@ -528,7 +529,7 @@ be used separately. Let’s convert each part to OpenVINO. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -613,8 +614,8 @@ Perform search .. parsed-literal:: - Image encoding took 0.0294 ms - Text encoding took 0.00508 ms + Image encoding took 0.0271 ms + Text encoding took 0.00495 ms diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst index 2d68e4361d00a5..1ba06287ff485c 100644 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ b/docs/notebooks/mobilevlm-language-assistant-with-output.rst @@ -69,7 +69,7 @@ Install requirements ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - optimum-intel 1.20.0.dev0+f1517e3 requires transformers<4.45,>=4.36, but you have transformers 4.33.3 which is incompatible. + optimum-intel 1.20.0.dev0+542347b requires transformers<4.46,>=4.36, but you have transformers 4.33.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -121,13 +121,13 @@ Import required packages .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - 2024-09-24 01:47:39.310512: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:47:39.345272: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 03:04:16.549795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 03:04:16.584461: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:47:39.880337: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-08 03:04:17.090418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -344,15 +344,15 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -372,13 +372,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 24% (43 / 169) │ 20% (42 / 168) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_asym │ 76% (126 / 169) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -418,7 +418,7 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -438,13 +438,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 28% (44 / 170) │ 20% (42 / 168) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_asym │ 72% (126 / 170) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index a0cb74ffab14c7..d1fc70cca19a6d 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,13 +124,13 @@ Imports .. parsed-literal:: - 2024-09-24 01:49:57.160211: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:49:57.194092: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 03:06:32.000424: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 03:06:32.034271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:49:57.824775: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-08 03:06:32.663477: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -170,11 +170,11 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -229,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -655,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index bb70958c01d660..78700602513056 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -104,7 +104,7 @@ Prerequisites .. parsed-literal:: - added_tokens.json: 0%| | 0.00/80.0 [00:00=2024.3.0" "openvino-genai" %pip install -q "torch>=2.1" "nncf>=2.12" "transformers>=4.40.0" "accelerate" "gradio>=4.19" "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import os @@ -188,7 +180,7 @@ dataset for information extraction. .. parsed-literal:: Selected model NuExtract_tiny with INT4 compression - + Download and convert model to OpenVINO IR via Optimum Intel CLI --------------------------------------------------------------- @@ -258,7 +250,7 @@ parameters. An example of this approach usage you can find in .. parsed-literal:: ⌛ NuExtract_tiny conversion to INT4 started. It may takes some time. - + **Export command:** @@ -270,26 +262,21 @@ parameters. An example of this approach usage you can find in .. parsed-literal:: - 2024-09-24 01:56:02.315697: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:56:02.348697: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:56:02.867203: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Framework not specified. Using pt to export the model. - Using framework PyTorch: 2.2.2+cpu + Using framework PyTorch: 2.3.1+cpu Overriding 1 configuration item(s) - use_cache -> True We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:489: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ytarkan/miniconda3/envs/ov_notebooks_env/lib/python3.9/site-packages/optimum/exporters/openvino/model_patcher.py:489: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ytarkan/miniconda3/envs/ov_notebooks_env/lib/python3.9/site-packages/transformers/models/qwen2/modeling_qwen2.py:110: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - Set tokenizer padding side to left for `text-generation-with-past` task. - + .. parsed-literal:: - Mixed-Precision assignment ━━━━━━━━━━━━━━━━━━━━ 100% 168/168 • 0:00:02 • 0:00:00 - INFO:nncf:Statistics of the bitwidth distribution: + Mixed-Precision assignment ━━━━━━━━━━━━━━━━━━━━ 100% 168/168 • 0:00:01 • 0:00:00• 0:00:01 + [?25hINFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ @@ -297,9 +284,19 @@ parameters. An example of this approach usage you can find in ├────────────────┼─────────────────────────────┼────────────────────────���───────────────┤ │ 4 │ 53% (122 / 169) │ 80% (122 / 168) │ ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:08 • 0:00:00 - ✅ INT4 NuExtract_tiny model converted and can be found in NuExtract_tiny/INT4_compressed_weights + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━ 100% 169/169 • 0:00:05 • 0:00:00• 0:00:01 + [?25h + +.. parsed-literal:: + + Set tokenizer padding side to left for `text-generation-with-past` task. + Replacing `(?!\S)` pattern to `(?:$|[^\S])` in RegexSplit operation + + +.. parsed-literal:: + ✅ INT4 NuExtract_tiny model converted and can be found in NuExtract_tiny/INT4_compressed_weights + Let’s compare model size for different compression types @@ -313,7 +310,7 @@ Let’s compare model size for different compression types .. parsed-literal:: Size of model with INT4 compressed weights is 347.03 MB - + Select device for inference and model variant --------------------------------------------- @@ -336,7 +333,7 @@ Select device for inference and model variant .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') @@ -456,11 +453,11 @@ schema format: "mathematics", "code generation" ], - "Licence": "Apache 2.0" + "Licence": "Apache 2.0" } } - + Run interactive structure extraction demo with Gradio ----------------------------------------------------- @@ -487,20 +484,6 @@ Run interactive structure extraction demo with Gradio # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # Uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index 9b7702ffb41afe..06636ec17bf7e9 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -83,7 +83,7 @@ Install requirements magika 0.5.1 requires numpy<2.0,>=1.24; python_version >= "3.8" and python_version < "3.9", but you have numpy 1.23.5 which is incompatible. mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - supervision 0.23.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. + supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png index caaa2301ff4679..67d21a28902ef3 100644 --- a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png +++ b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95391afecd95633885318e637943a774989d61be918b79ab292973be25a682f3 -size 175109 +oid sha256:bae0d18e0fe38cee76b5220d145c96e7d6d38b087cba46fc71db126eb3a88bb8 +size 175079 diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index e19ec8d0044685..789fdeb53b1a45 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -407,7 +407,7 @@ It is pre-trained model optimized to work with TensorFlow Lite. .. parsed-literal:: - Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.0) + Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.1) .. code:: ipython3 @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -1074,7 +1074,7 @@ the cache. .. parsed-literal:: - Loading the network to the AUTO device took 0.14 seconds. + Loading the network to the AUTO device took 0.15 seconds. After running the previous cell, we know the model exists in the cache @@ -1092,5 +1092,5 @@ measure the time it takes now. .. parsed-literal:: - Loading the network to the AUTO device took 0.08 seconds. + Loading the network to the AUTO device took 0.07 seconds. diff --git a/docs/notebooks/openvino-tokenizers-with-output.rst b/docs/notebooks/openvino-tokenizers-with-output.rst index b9b521f5beeadf..354a51c4180fa6 100644 --- a/docs/notebooks/openvino-tokenizers-with-output.rst +++ b/docs/notebooks/openvino-tokenizers-with-output.rst @@ -109,25 +109,6 @@ use ``pip install openvino-tokenizers[transformers]``. %pip install -q -U "openvino>=2024.3.0" openvino-tokenizers[transformers] openvino-genai %pip install "numpy<2.0.0" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: numpy<2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.23.5) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.2.2+cpu) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.16.1) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 from pathlib import Path @@ -160,7 +141,7 @@ constructor. Converting Huggingface Tokenizer to OpenVINO... Saved OpenVINO Tokenizer: tokenizer/openvino_tokenizer.xml, tokenizer/openvino_tokenizer.bin Saved OpenVINO Detokenizer: tokenizer/openvino_detokenizer.xml, tokenizer/openvino_detokenizer.bin - + ⚠️ If you have any problems with the command above on MacOS, try to `install tbb `__. @@ -195,7 +176,7 @@ The other method is to pass HuggingFace ``hf_tokenizer`` object to ( + ] outputs[ , @@ -203,7 +184,7 @@ The other method is to pass HuggingFace ``hf_tokenizer`` object to ]>, + ] outputs[ @@ -257,7 +238,7 @@ tasks, but not suitable for text generation. Token ids: [[ 1 4321] [ 1 6031]] Detokenized text: ['Test' 'strings'] - + We can compare the result of converted (de)tokenizer with the original one: @@ -274,20 +255,8 @@ one: .. parsed-literal:: Token ids: [[1, 4321], [1, 6031]] - - -.. parsed-literal:: - - 2024-09-24 01:58:24.452870: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 01:58:24.486218: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 01:58:25.117973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. parsed-literal:: - Detokenized text: [' Test', ' strings'] - + Text Generation Pipeline with OpenVINO Tokenizers ------------------------------------------------- @@ -328,19 +297,6 @@ command is commented. local_dir=model_dir, ) - - -.. parsed-literal:: - - openvino_model.xml: 0%| | 0.00/2.93M [00:00 False - Detokenizer is not supported, convert tokenizer only. - - .. code:: ipython3 from openvino_tokenizers import connect_models @@ -596,10 +520,10 @@ model has only one input for text input prompt. Combined OpenVINO model inputs: - + + + Logits: [[ 1.2007061 -1.469803 ]] - Logits: [[ 1.2007061 -1.4698029]] - Conclusion ---------- diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index a232a0504f466e..4e4a53150f7504 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -102,12 +102,12 @@ Clone repository and install requirements remote: Counting objects: 100% (238/238), done. remote: Compressing objects: 100% (113/113), done. remote: Total 438 (delta 178), reused 127 (delta 125), pack-reused 200 (from 1) - Receiving objects: 100% (438/438), 3.82 MiB | 5.67 MiB/s, done. + Receiving objects: 100% (438/438), 3.82 MiB | 16.31 MiB/s, done. Resolving deltas: 100% (221/221), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.3.1 which is incompatible. + mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.3.1 which is incompatible. + torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -245,8 +245,10 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. - warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) .. parsed-literal:: @@ -257,6 +259,14 @@ True missing/unexpected keys: [] [] +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) + + Convert models to OpenVINO IR ----------------------------- @@ -399,55 +409,54 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1116: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 - %5559 : Float(1, 192, 151, strides=[28992, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 + %5559 : Float(1, 192, 151, strides=[28992, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1116: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 37888]) != torch.Size([1, 1, 38400]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 38912]) != torch.Size([1, 1, 39424]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1116: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 148, 43]) != torch.Size([1, 1, 150, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 152, 43]) != torch.Size([1, 1, 154, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1116: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 148]) != torch.Size([1, 1, 150]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 152]) != torch.Size([1, 1, 154]). _check_trace( - 2024-09-24 02:00:09.622566: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 03:16:11.904034: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (199 / 199) │ 100% (199 / 199) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (199 / 199) │ 100% (199 / 199) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -470,27 +479,27 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -699,7 +716,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:665: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -854,7 +871,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -872,7 +889,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1061,7 +1078,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index b429a8e8695865..59f72ff2fd84c3 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -77,7 +77,7 @@ Guide =3.4,<3.7" %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 - %pip install -q "tensorflow-macos>=2.5,<=2.12.0; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version <= '3.8'" # macOS M1 and M2 %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 - %pip install -q "tensorflow>=2.5,<=2.12.0; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version <= '3.8'" # macOS x86 %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version > '3.8'" - %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version <= '3.8'" %pip install -q tf_keras tensorflow_hub @@ -107,9 +104,6 @@ Settings Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -366,7 +360,7 @@ for mean/scale normalization. .. parsed-literal:: - + @@ -397,7 +391,7 @@ may be specified is input data .. parsed-literal:: - + @@ -435,7 +429,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -649,6 +643,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0146 seconds per image, FPS: 68.47 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0140 seconds per image, FPS: 71.20 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0148 seconds per image, FPS: 67.77 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.46 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index f3e391f9eeb08f..6e0bcc263ed873 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -207,7 +207,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/paddle-o… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -253,7 +253,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/paddle-o… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index ccb60d00ebb5ad..e6377775fd7b9b 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c84e08f99986049e29bd75df24308517846f6b550c2b6c0891eb6e443d81c956 -size 593424 +oid sha256:972e260b6c702c732ac3d1cb7cb2d8e38c9fe8ea0bd7b8e783e74f5d5c8cc6f5 +size 591296 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index 99298966216af5..e3bb27d374c1aa 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -89,11 +89,11 @@ Imports .. parsed-literal:: - --2024-09-24 02:03:28-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + --2024-10-08 03:19:19-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. Proxy request sent, awaiting response... 404 Not Found - 2024-09-24 02:03:28 ERROR 404: Not Found. + 2024-10-08 03:19:19 ERROR 404: Not Found. dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-09-24 02:03:30 INFO: Loading faiss with AVX512 support. - 2024-09-24 02:03:30 INFO: Successfully loaded faiss with AVX512 support. + 2024-10-08 03:19:21 INFO: Loading faiss with AVX512 support. + 2024-10-08 03:19:21 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +209,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/09/24 02:03:51] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/10/08 03:19:44] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -275,7 +275,7 @@ clipping values. .. parsed-literal:: - 2024-09-24 02:03:52 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-10-08 03:19:45 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,7 +287,7 @@ clipping values. .. parsed-literal:: - + @@ -462,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0076 seconds per image, FPS: 131.64 + PaddlePaddle model on CPU: 0.0077 seconds per image, FPS: 130.66 PaddlePaddle result: Labrador retriever, 0.75138 @@ -523,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 383.19 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.65 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 323959aa17e8ef..25e4d4bed03a7d 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -9,7 +9,7 @@ with synthetic annotations `__ by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. -.. image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w +|image0| Text-to-speech models trained on large-scale datasets have demonstrated impressive in-context learning capabilities and naturalness. However, @@ -53,6 +53,8 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. +.. |image0| image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w + Prerequisites ------------- @@ -67,6 +69,24 @@ Prerequisites %pip install -q "openvino>=2024.2.0" %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" --extra-index-url https://download.pytorch.org/whl/cpu + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible. + mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1+cpu which is incompatible. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + onnx 1.16.1 requires protobuf>=3.20.2, but you have protobuf 3.19.6 which is incompatible. + paddlepaddle 2.6.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.19.6 which is incompatible. + tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. + tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible. + tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. + torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1+cpu which is incompatible. + visualdl 2.5.3 requires protobuf>=3.20.0, but you have protobuf 3.19.6 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + + Load the original model and inference ------------------------------------- @@ -95,6 +115,19 @@ Load the original model and inference audio_arr = generation.cpu().numpy().squeeze() sf.write("parler_tts_out.wav", audio_arr, model.config.sampling_rate) + +.. parsed-literal:: + + 2024-10-08 03:20:24.075446: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 03:20:24.108997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + Flash attention 2 is not installed + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. + + .. code:: ipython3 import IPython.display as ipd @@ -108,10 +141,10 @@ Load the original model and inference - + @@ -159,6 +192,13 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. text_encoder_ov_model = convert(model.text_encoder, TEXT_ENCODER_OV_PATH, example_input) + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + + The Decoder Model performs in generation pipeline and we can separate it into two stage. In the first stage the model generates ``past_key_values`` into output for the second stage. In the second @@ -193,6 +233,17 @@ stage the model produces tokens during several runs. decoder_1_ov_model = convert(DecoderStage1Wrapper(model.decoder.model.decoder), DECODER_STAGE_1_OV_PATH, example_input) + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.weights.size(0): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if sequence_length != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): + + .. code:: ipython3 DECODER_STAGE_2_OV_PATH = Path("models/decoder_stage_2_ir.xml") @@ -258,7 +309,7 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') @@ -360,10 +411,10 @@ and run inference. - + @@ -406,13 +457,29 @@ Interactive inference demo = make_demo(fn=infer) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(share=True, debug=True) + demo.queue().launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB + + To create a public link, set `share=True` in `launch()`. + + + + + + + + .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index 1d51c882fd8111..8293308ef73078 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df80743f89245214fc9f1349a78985aa8c136a9371e17bc0df9f2cefd030670a -size 218894 +oid sha256:eb8c9f580a9b1dfe6c67e1ee3b4f0d25a254ef01ccdb10234899873f17a13093 +size 218731 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index f82023103669fd..bd1afd6a47a8ed 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -17,8 +17,9 @@ post `__ #### -Table of contents: +precision using `NNCF `__ + +**Table of contents:** - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ @@ -88,10 +89,10 @@ Select Model -The tutorial supports the following models from Phi-3 model family: - -- `Phi-3.5-vision-instruct `__ -- `Phi-3-vision-128k-instruct `__ +The tutorial supports the following models from Phi-3 model family: - +`Phi-3.5-vision-instruct `__ +- +`Phi-3-vision-128k-instruct `__ You can select one from the provided options below. @@ -264,10 +265,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/c69b58c1be8dfda05972fdf495be3511e3b0f61f/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + seq_len = seq_len or torch.max(position_ids) + 1 + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/c69b58c1be8dfda05972fdf495be3511e3b0f61f/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/c69b58c1be8dfda05972fdf495be3511e3b0f61f/modeling_phi3_v.py:690: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:690: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/c69b58c1be8dfda05972fdf495be3511e3b0f61f/modeling_phi3_v.py:702: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:702: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -388,13 +391,13 @@ documentation =1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -200,10 +200,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-09-24 02:09:33.127001: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 02:09:33.162058: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 03:26:26.755777: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 03:26:26.790097: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 02:09:33.850409: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 03:26:27.482079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -401,20 +401,20 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (151 / 151) │ 100% (151 / 151) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (151 / 151) │ 100% (151 / 151) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -478,20 +478,20 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (73 / 73) │ 100% (73 / 73) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (73 / 73) │ 100% (73 / 73) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -510,11 +510,11 @@ sequence of latent text embeddings. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (194 / 194) │ 100% (194 / 194) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (194 / 194) │ 100% (194 / 194) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -584,26 +584,26 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (794 / 794) │ 100% (794 / 794) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (794 / 794) │ 100% (794 / 794) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -659,11 +659,11 @@ VAE decoder. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (40 / 40) │ 100% (40 / 40) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index d9e4d996163db2..631e392394cfed 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-09-24 02:17:25.713102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 02:17:25.749212: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 03:34:29.221746: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 03:34:29.256826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 02:17:26.423710: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 03:34:29.928193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -130,6 +130,12 @@ directly in latent space, achieving super fast inference with few steps. Loading pipeline components...: 0%| | 0/5 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: - ['caption_projection.y_embedding'] @@ -229,7 +233,7 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -271,11 +275,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -300,9 +304,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -448,7 +452,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -563,7 +567,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -613,12 +617,13 @@ layers and (2) activations of other layers. The steps are the following: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino INFO:nncf:1 ignored nodes were found by types in the NNCFGraph + INFO:nncf:1 ignored nodes were found by types in the NNCFGraph INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (290 / 290) │ 100% (290 / 290) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (290 / 290) │ 100% (290 / 290) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1546,13 +1551,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 3% (3 / 194) │ 0% (0 / 191) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 97% (191 / 194) │ 100% (191 / 191) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 3% (3 / 194) │ 0% (0 / 191) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_sym │ 97% (191 / 194) │ 100% (191 / 191) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1571,13 +1576,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 98% (37 / 40) │ 0% (0 / 3) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 2% (3 / 40) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 98% (37 / 40) │ 0% (0 / 3) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_sym │ 2% (3 / 40) │ 100% (3 / 3) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1621,16 +1626,16 @@ pipelines. Loading pipeline components...: 0%| | 0/5 [00:00`__ and `model +card `__ + +In this tutorial we consider how to convert, optimize and run this model +using OpenVINO. + +.. warning:: + + Important note: Please take into account that pixtral is large model. + Its conversion requires at least 50GB disk space available + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize model <#convert-and-optimize-model>`__ +- `Run model inference <#run-model-inference>`__ + + - `Select inference device <#select-inference-device>`__ + - `Initialize inference pipeline <#initialize-inference-pipeline>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |image0| image:: https://mistral.ai/images/news/pixtral-12b/pixtral-model-architecture.png + +Prerequisites +------------- + + + +.. code:: ipython3 + + %pip install -q "torch>=2.1" torchvision "pillow" "tqdm" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/llava_model" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.13.0" "openvino>=2024.4" + %pip install -q "transformers>=4.45.0" --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + from pathlib import Path + import requests + + if not Path("notebook_utils.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") + open("notebook_utils.py", "w").write(r.text) + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/refs/heads/latest/notebooks/pixtral/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + +Convert and Optimize model +-------------------------- + + + +For convenience, we will use OpenVINO integration with HuggingFace +Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + model_base_dir = Path("pixtral-12b") + + precisions = ["FP16", "INT8", "INT4"] + + precision_selector = widgets.Dropdown(description="compression", options=precisions, value=precisions[-1]) + + precision_selector + + + + +.. parsed-literal:: + + Dropdown(description='compression', index=2, options=('FP16', 'INT8', 'INT4'), value='INT4') + + + +.. code:: ipython3 + + model_dir = model_base_dir / precision_selector.value + + if not (model_dir / "openvino_language_model.xml").exists(): + !optimum-cli export openvino -m "mistral-community/pixtral-12b" --weight-format {precision_selector.value.lower()} {model_dir} + + +.. parsed-literal:: + + 2024-10-08 04:21:32.940160: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 04:21:32.974138: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-10-08 04:21:33.587724: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/vq_model.py:20: FutureWarning: `VQEncoderOutput` is deprecated and will be removed in version 0.31. Importing `VQEncoderOutput` from `diffusers.models.vq_model` is deprecated and this will be removed in a future version. Please use `from diffusers.models.autoencoders.vq_model import VQEncoderOutput`, instead. + deprecate("VQEncoderOutput", "0.31", deprecation_message) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/vq_model.py:25: FutureWarning: `VQModel` is deprecated and will be removed in version 0.31. Importing `VQModel` from `diffusers.models.vq_model` is deprecated and this will be removed in a future version. Please use `from diffusers.models.autoencoders.vq_model import VQModel`, instead. + deprecate("VQModel", "0.31", deprecation_message) + Framework not specified. Using pt to export the model. + Loading checkpoint shards: 100%|██████████████████| 6/6 [00:01<00:00, 3.44it/s] + Automatic task detection to image-text-to-text. + Using framework PyTorch: 2.2.2+cpu + Overriding 1 configuration item(s) + - use_cache -> True + We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) + Using framework PyTorch: 2.2.2+cpu + [ WARNING ] Unexpectedly found already patched module language_model.model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.1.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.2.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.3.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.4.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.5.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.6.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.7.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.8.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.9.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.10.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.11.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.12.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.13.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.14.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.15.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.16.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.17.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.18.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.19.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.20.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.21.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.22.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.23.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.24.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.25.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.26.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.27.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.28.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.29.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.30.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.31.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.32.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.33.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.34.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.35.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.36.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.37.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.38.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.model.layers.39.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module language_model.lm_head while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/pixtral/modeling_pixtral.py:492: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + patch_embeds_list = [self.patch_conv(img.unsqueeze(0).to(self.dtype)) for img in pixel_values] + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + op1 = operator(\*args, \*\*kwargs) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/pixtral/modeling_pixtral.py:448: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + for start, end in zip(block_start_idx, block_end_idx): + Using framework PyTorch: 2.2.2+cpu + Overriding 1 configuration item(s) + - use_cache -> False + [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Export model to OpenVINO directly failed with: + Config dummy inputs are not a subset of the model inputs: {'input'} vs {'args', 'kwargs'}. + Model will be exported to ONNX + Using framework PyTorch: 2.2.2+cpu + Overriding 1 configuration item(s) + - use_cache -> False + Saving external data to one file... + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 6% (1 / 281) │ 0% (0 / 280) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_asym │ 94% (280 / 281) │ 100% (280 / 280) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:07 • 0:00:00 + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 6% (3 / 172) │ 0% (0 / 169) │ + ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ int4_asym │ 94% (169 / 172) │ 100% (169 / 169) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ int8_asym │ 100% (1 / 1) │ 0% (0 / 0) │ + ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:02 • 0:00:00 + + +Run model inference +------------------- + + + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(default="CPU", exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Initialize inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers Pixtral implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. + +.. code:: ipython3 + + from transformers import AutoProcessor + from optimum.intel.openvino import OVModelForVisualCausalLM + + processor = AutoProcessor.from_pretrained(model_dir) + ov_model = OVModelForVisualCausalLM.from_pretrained(model_dir, device=device.value) + + +.. parsed-literal:: + + 2024-10-08 04:29:37.124362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 04:29:37.158372: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-10-08 04:29:37.816800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Compiling the Language model to CPU ... + Compiling the Text embeddings model to CPU ... + Compiling the vision_embeddings to CPU ... + + +.. code:: ipython3 + + from PIL import Image + from transformers import TextStreamer + from gradio_helper import chat_template, resize_with_aspect_ratio + + if processor.chat_template is None: + processor.set_chat_template(chat_template) + + question = "What is unusual on this image?" + + messages = [ + {"role": "user", "content": [{"type": "text", "content": question}, {"type": "image"}]}, + ] + text = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + raw_image = Image.open(requests.get(url, stream=True).raw) + + inputs = processor(text=text, images=[resize_with_aspect_ratio(raw_image)], return_tensors="pt") + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + print(f"Question: {question}") + display(raw_image) + output = ov_model.generate(**inputs, do_sample=False, max_new_tokens=100, temperature=None, top_p=None, streamer=streamer) + + +.. parsed-literal:: + + Question: What is unusual on this image? + + + +.. image:: pixtral-with-output_files/pixtral-with-output_12_1.png + + +.. parsed-literal:: + + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + + +.. parsed-literal:: + + The unusual aspect of this image is the presence of a cat and a dog lying together peacefully inside a cardboard box. This is not a common sight, as cats and dogs are often perceived as being natural enemies or at least not inclined to share spaces closely. The image portrays a harmonious and playful interaction between the two animals, which challenges typical stereotypes about their relationship. + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + from gradio_helper import make_demo + + demo = make_demo(ov_model, processor) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(debug=False, share=True) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.jpg b/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.png b/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/pixtral-with-output_files/pixtral-with-output_12_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png index 990718939d85bd..acdee31b5fc986 100644 --- a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png +++ b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78d14efa749c6ee5f1e2d7f74d1c90eb301711ddc388946c932173c22cd4c9e8 -size 108057 +oid sha256:c27bcedcb93b8775d73cdf416666ce2517ea493b556386f5f7ccd35d53ed15c3 +size 108140 diff --git a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst index 5ad32dc9a54d4f..906f8bbc4434de 100644 --- a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst +++ b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst @@ -557,9 +557,9 @@ performance. .. parsed-literal:: - PyTorch model on CPU: 0.039 seconds per image, FPS: 25.54 - ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 56.76 - OpenVINO IR model in OpenVINO Runtime/AUTO: 0.028 seconds per image, FPS: 36.30 + PyTorch model on CPU: 0.039 seconds per image, FPS: 25.34 + ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 56.97 + OpenVINO IR model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 55.62 **Show Device Information** diff --git a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst index f4012932b4d419..d2b770f4402052 100644 --- a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst +++ b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst @@ -159,7 +159,7 @@ Settings .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') @@ -449,15 +449,15 @@ I. Evaluate the loaded model .. parsed-literal:: - Test: [ 0/79] Time 0.287 (0.287) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) - Test: [10/79] Time 0.256 (0.256) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.261 (0.254) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.259 (0.255) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.252 (0.255) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.260 (0.255) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.254 (0.255) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.256 (0.255) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 19.911 + Test: [ 0/79] Time 0.290 (0.290) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [10/79] Time 0.227 (0.240) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) + Test: [20/79] Time 0.232 (0.240) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [30/79] Time 0.243 (0.239) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) + Test: [40/79] Time 0.251 (0.240) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.234 (0.240) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.242 (0.242) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.240 (0.242) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 18.879 Test accuracy of FP32 model: 60.740 @@ -500,10 +500,10 @@ Guide =2.1" "librosa" "gradio>=4.36" "modelscope-studio>=0.4.2" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -qU "openvino>=2024.3.0" "nncf>=2.12.0" + %pip install -q "transformers>=4.45" "torch>=2.1" "librosa" "gradio>=4.36" "modelscope-studio>=0.4.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -qU "openvino>=2024.4.0" "nncf>=2.13.0" .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - optimum 1.22.0 requires transformers[sentencepiece]<4.45.0,>=4.29, but you have transformers 4.45.0.dev0 which is incompatible. - optimum-intel 1.20.0.dev0+f1517e3 requires transformers<4.45,>=4.36, but you have transformers 4.45.0.dev0 which is incompatible. - parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.0.dev0 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -236,10 +230,10 @@ documentation =2.1" "torchvision" "qwen-vl-utils" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -qU "openvino>=2024.3.0" "nncf>=2.12.0" + %pip install -q "transformers>=4.45" "torch>=2.1" "torchvision" "qwen-vl-utils" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -qU "openvino>=2024.4.0" "nncf>=2.13.0" .. parsed-literal:: @@ -140,10 +140,10 @@ using widget bellow: .. parsed-literal:: - 2024-09-24 04:11:54.386885: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 04:11:54.423332: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 05:39:51.401499: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 05:39:51.436807: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 04:11:54.976263: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 05:39:52.013737: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -310,17 +310,6 @@ documentation `__ The spectrogram can be computed from audio using the `Short-time Fourier transform @@ -57,7 +63,12 @@ transform which approximates the audio as a combination of sine waves of varying amplitudes and phases. +.. figure:: https://www.riffusion.com/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffourier_transform.9d53f3d7.png&w=640&q=75 + :alt: stft.png + stft.png + +`\*image source `__ The STFT is invertible, so the original audio can be reconstructed from a spectrogram. This idea is a behind approach to using Riffusion for @@ -133,7 +144,7 @@ running. .. code:: ipython3 from pathlib import Path - + MODEL_ID = "riffusion/riffusion-model-v1" MODEL_DIR = Path("riffusion_pipeline") @@ -147,16 +158,16 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget() - + device @@ -171,9 +182,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 from optimum.intel.openvino import OVStableDiffusionPipeline - + DEVICE = device.value - + if not MODEL_DIR.exists(): pipe = OVStableDiffusionPipeline.from_pretrained(MODEL_ID, export=True, device=DEVICE, compile=False) pipe.half() @@ -230,46 +241,46 @@ from a spectrogram image using Griffin-Lim Algorithm. import io from typing import Tuple - + import numpy as np from PIL import Image from scipy.io import wavfile import torch import torchaudio - - + + def wav_bytes_from_spectrogram_image(image: Image.Image) -> Tuple[io.BytesIO, float]: """ Reconstruct a WAV audio clip from a spectrogram image. Also returns the duration in seconds. - + Parameters: image (Image.Image): generated spectrogram image Returns: wav_bytes (io.BytesIO): audio signal encoded in wav bytes duration_s (float): duration in seconds """ - + max_volume = 50 power_for_image = 0.25 Sxx = spectrogram_from_image(image, max_volume=max_volume, power_for_image=power_for_image) - + sample_rate = 44100 # [Hz] clip_duration_ms = 5000 # [ms] - + bins_per_image = 512 n_mels = 512 - + # FFT parameters window_duration_ms = 100 # [ms] padded_duration_ms = 400 # [ms] step_size_ms = 10 # [ms] - + # Derived parameters num_samples = int(image.width / float(bins_per_image) * clip_duration_ms) * sample_rate n_fft = int(padded_duration_ms / 1000.0 * sample_rate) hop_length = int(step_size_ms / 1000.0 * sample_rate) win_length = int(window_duration_ms / 1000.0 * sample_rate) - + samples = waveform_from_spectrogram( Sxx=Sxx, n_fft=n_fft, @@ -281,20 +292,20 @@ from a spectrogram image using Griffin-Lim Algorithm. n_mels=n_mels, num_griffin_lim_iters=32, ) - + wav_bytes = io.BytesIO() wavfile.write(wav_bytes, sample_rate, samples.astype(np.int16)) wav_bytes.seek(0) - + duration_s = float(len(samples)) / sample_rate - + return wav_bytes, duration_s - - + + def spectrogram_from_image(image: Image.Image, max_volume: float = 50, power_for_image: float = 0.25) -> np.ndarray: """ Compute a spectrogram magnitude array from a spectrogram image. - + Parameters: image (image.Image): input image max_volume (float, *optional*, 50): max volume for spectrogram magnitude @@ -302,22 +313,22 @@ from a spectrogram image using Griffin-Lim Algorithm. """ # Convert to a numpy array of floats data = np.array(image).astype(np.float32) - + # Flip Y take a single channel data = data[::-1, :, 0] - + # Invert data = 255 - data - + # Rescale to max volume data = data * max_volume / 255 - + # Reverse the power curve data = np.power(data, 1 / power_for_image) - + return data - - + + def waveform_from_spectrogram( Sxx: np.ndarray, n_fft: int, @@ -336,7 +347,7 @@ from a spectrogram image using Griffin-Lim Algorithm. to approximate the phase. """ Sxx_torch = torch.from_numpy(Sxx).to(device) - + if mel_scale: mel_inv_scaler = torchaudio.transforms.InverseMelScale( n_mels=n_mels, @@ -347,9 +358,9 @@ from a spectrogram image using Griffin-Lim Algorithm. norm=None, mel_scale="htk", ).to(device) - + Sxx_torch = mel_inv_scaler(Sxx_torch) - + griffin_lim = torchaudio.transforms.GriffinLim( n_fft=n_fft, win_length=win_length, @@ -357,9 +368,9 @@ from a spectrogram image using Griffin-Lim Algorithm. power=1.0, n_iter=num_griffin_lim_iters, ).to(device) - + waveform = griffin_lim(Sxx_torch).cpu().numpy() - + return waveform Run Inference pipeline @@ -398,12 +409,12 @@ reconstructed audio. pipe.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1) pipe.compile() - - + + def generate(prompt: str, negative_prompt: str = "") -> Tuple[Image.Image, str]: """ function for generation audio from text prompt - + Parameters: prompt (str): input prompt for generation. negative_prompt (str): negative prompt for generation, contains undesired concepts for generation, which should be avoided. Can be empty. @@ -476,7 +487,7 @@ without the other. More explanation of how it works can be found in this .. code:: ipython3 import IPython.display as ipd - + ipd.Audio(wav_path) @@ -484,7 +495,7 @@ without the other. More explanation of how it works can be found in this .. raw:: html - + + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/generation_whisper.py:496: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead. + warnings.warn( + + +.. parsed-literal:: + + Result: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + + +If the multilingual model was chosen, let’s see how task ``translate`` +is working. We will use ``facebook/multilingual_librispeech`` +multilingual dataset, so you can choose the language. The model will +translate audio from the selected language into English. Conversion of +audio to numpy format is handled by Hugging Face datasets +implementation. A complete list of languages ​​supported by the model +can be found in the +`paper `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + languages = {"japanese": "ja_jp", "dutch": "da_dk", "french": "fr_fr", "spanish": "ca_es", "italian": "it_it", "portuguese": "pt_br", "polish": "pl_pl"} + + SAMPLE_LANG = None + if model_type.value == "Multilingual models": + SAMPLE_LANG = widgets.Dropdown( + options=languages.keys(), + value="italian", + description="Dataset language:", + disabled=False, + ) + + SAMPLE_LANG + + + + +.. parsed-literal:: + + Dropdown(description='Dataset language:', index=4, options=('japanese', 'dutch', 'french', 'spanish', 'italian… + + + +.. code:: ipython3 + + from datasets import load_dataset + + mls_dataset = None + if model_type.value == "Multilingual models": + mls_dataset = load_dataset("google/fleurs", languages[SAMPLE_LANG.value], split="test", streaming=True, trust_remote_code=True) + mls_dataset = iter(mls_dataset) # make it iterable + mls_example = next(mls_dataset) # get one example + +.. code:: ipython3 + + if model_type.value == "Multilingual models": + sample = copy.deepcopy(mls_example["audio"]) + + display(ipd.Audio(sample["array"], rate=sample["sampling_rate"])) + print(f"Reference: {mls_example['raw_transcription']}") + + pt_result = pipe_pt(sample, generate_kwargs={"task": "translate"}) + print(f"\nResult: {pt_result['text']}") + + + +.. raw:: html + + + + + + +.. parsed-literal:: + + Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. + + Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. + + +Download and convert model to OpenVINO IR via Optimum Intel CLI +--------------------------------------------------------------- + + + +Listed Whisper model are available for downloading via the `HuggingFace +hub `__. We will use optimum-cli +interface for exporting it into OpenVINO Intermediate Representation +(IR) format. + +Optimum CLI interface for converting models supports export to OpenVINO +(supported starting optimum-intel 1.12 version). General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For LLMs it will be +``automatic-speech-recognition-with-past``. If model initialization +requires to use remote code, ``--trust-remote-code`` flag additionally +should be passed. Full list of supported arguments available via +``--help`` For more details and examples of usage, please check `optimum +documentation `__. + +.. code:: ipython3 + + import logging + import nncf + import os + from IPython.display import display, Markdown + + nncf.set_log_level(logging.ERROR) + + model_path = Path(model_id.value.split("/")[1]) + export_command = f"optimum-cli export openvino --model {model_id.value} --library transformers --task automatic-speech-recognition-with-past --framework pt {str(model_path)}" + + display(Markdown("**Export command:**")) + display(Markdown(f"`{export_command}`")) + + exit_code = os.system(export_command) + if exit_code != 0: + raise Exception("Failed to load and convert model!") + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + + +**Export command:** + + + +``optimum-cli export openvino --model openai/whisper-tiny --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-tiny`` + + +.. parsed-literal:: + + 2024-10-08 06:43:10.758697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Moving the following attributes in the config to the generation config: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. + Using framework PyTorch: 2.3.1+cpu + Overriding 1 configuration item(s) + - use_cache -> False + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:1071: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if input_features.shape[-1] != expected_seq_length: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:388: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): + Using framework PyTorch: 2.3.1+cpu + Overriding 1 configuration item(s) + - use_cache -> True + Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if sequence_length != 1: + Using framework PyTorch: 2.3.1+cpu + Overriding 1 configuration item(s) + - use_cache -> True + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + + +Run inference OpenVINO model with WhisperPipeline +------------------------------------------------- + + + +To simplify user experience we will use `OpenVINO Generate +API `__. +Firstly we will create pipeline with ``WhisperPipeline``. You can +construct it straight away from the folder with the converted model. It +will automatically load the ``model``, ``tokenizer``, ``detokenizer`` +and default ``generation configuration``. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(default="CPU", exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +.. code:: ipython3 + + import openvino_genai + + ov_pipe = openvino_genai.WhisperPipeline(str(model_path), device=device.value) + +Let’s run the ``transcribe`` task. We just call ``generate`` for that +and put array as input. + +.. code:: ipython3 + + sample = copy.deepcopy(en_raw_speech) + + genai_result = ov_pipe.generate(sample) + + display(ipd.Audio(sample, rate=samplerate)) + print(f"Result: {genai_result}") + + + +.. raw:: html + + + + + + +.. parsed-literal:: + + Result: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + + +Let’s see how to work the ``translate`` task. It supports for +multilingual models only. For that case we will specify ``language`` and +``task`` options. We can do this in different ways. We can get default +config with ``get_generation_config()``, setup parameters and put config +directly to ``generate()``. It’s also possible to specify the needed +options just as inputs in the ``generate()`` method and we will use this +way. Then we just run ``generate`` method and get the output in text +format. + +.. code:: ipython3 + + languages_genai = { + "japanese": "<|ja|>", + "dutch": "<|da|>", + "french": "<|fr|>", + "spanish": "<|es|>", + "italian": "<|it|>", + "portuguese": "<|pt|>", + "polish": "<|pl|>", + } + + if model_type.value == "Multilingual models": + sample = copy.deepcopy(mls_example["audio"]) + + genai_result_ml = ov_pipe.generate(sample["array"], max_new_tokens=100, task="translate", language=languages_genai[SAMPLE_LANG.value]) + + display(ipd.Audio(sample["array"], rate=sample["sampling_rate"])) + print(f"Reference: {mls_example['raw_transcription']}") + print(f"\nResult: {genai_result_ml}") + + + +.. raw:: html + + + + + + +.. parsed-literal:: + + Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. + + Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. + + +Compare performance PyTorch vs OpenVINO +--------------------------------------- + + + +.. code:: ipython3 + + import time + import numpy as np + from tqdm.notebook import tqdm + + + def measure_perf(pipe, n=10, model_type="ov"): + timers = [] + for _ in tqdm(range(n), desc="Measuring performance"): + sample = copy.deepcopy(en_raw_speech) + start = time.perf_counter() + if model_type == "pt": + pipe(sample) + elif model_type == "ov": + pipe.generate(sample) + end = time.perf_counter() + timers.append(end - start) + return np.median(timers) + +.. code:: ipython3 + + perf_torch = measure_perf(pipe_pt, model_type="pt") + perf_ov = measure_perf(ov_pipe) + + + +.. parsed-literal:: + + Measuring performance: 0%| | 0/10 [00:00`__ enables +post-training quantization by adding the quantization layers into the +model graph and then using a subset of the training dataset to +initialize the parameters of these additional quantization layers. The +framework is designed so that modifications to your original training +code are minor. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize`` to obtain quantized encoder and decoder models. +3. Serialize the ``INT8`` model using ``openvino.save_model`` function. + +.. + + **Note**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +Please select below whether you would like to run Whisper quantization. + +.. code:: ipython3 + + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +.. code:: ipython3 + + # Fetch `skip_kernel_extension` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Let’s load converted OpenVINO model format using Optimum-Intel to easily +quantize it. + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ or +local folder to create pipelines to run an inference with OpenVINO +Runtime using Hugging Face APIs. The Optimum Inference models are API +compatible with Hugging Face Transformers models. This means we just +need to replace the ``AutoModelForXxx`` class with the corresponding +``OVModelForXxx`` class. + +Below is an example of the whisper-tiny model + +.. code:: diff + + -from transformers import AutoModelForSpeechSeq2Seq + +from optimum.intel.openvino import OVModelForSpeechSeq2Seq + from transformers import AutoTokenizer, pipeline + + model_id = "openai/whisper-tiny" + -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) + +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) + +Like the original PyTorch model, the OpenVINO model is also compatible +with HuggingFace +`pipeline `__ +interface for ``automatic-speech-recognition``. Pipeline can be used for +long audio transcription. Distil-Whisper uses a chunked algorithm to +transcribe long-form audio files. In practice, this chunked long-form +algorithm is 9x faster than the sequential algorithm proposed by OpenAI +in the Whisper paper. To enable chunking, pass the chunk_length_s +parameter to the pipeline. For Distil-Whisper, a chunk length of 15 +seconds is optimal. To activate batching, pass the argument batch_size. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForSpeechSeq2Seq + + ov_model = OVModelForSpeechSeq2Seq.from_pretrained(str(model_path), device=device.value) + ov_processor = AutoProcessor.from_pretrained(str(model_path)) + + +.. parsed-literal:: + + Compiling the encoder to CPU ... + Compiling the decoder to CPU ... + Compiling the decoder to CPU ... + + +Prepare calibration datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +First step is to prepare calibration datasets for quantization. Since we +quantize whisper encoder and decoder separately, we need to prepare a +calibration dataset for each of the models. We import an +``InferRequestWrapper`` class that will intercept model inputs and +collect them to a list. Then we run model inference on some small amount +of audio samples. Generally, increasing the calibration dataset size +improves quantization quality. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from itertools import islice + from optimum.intel.openvino.quantization import InferRequestWrapper + + + def collect_calibration_dataset(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int): + # Overwrite model request properties, saving the original ones for restoring later + encoder_calibration_data = [] + decoder_calibration_data = [] + ov_model.encoder.request = InferRequestWrapper(ov_model.encoder.request, encoder_calibration_data, apply_caching=True) + ov_model.decoder_with_past.request = InferRequestWrapper(ov_model.decoder_with_past.request, + decoder_calibration_data, + apply_caching=True) + + pipe = pipeline( + "automatic-speech-recognition", + model=ov_model, + chunk_length_s=30, + tokenizer=ov_processor.tokenizer, + feature_extractor=ov_processor.feature_extractor) + try: + calibration_dataset = dataset = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True) + for sample in tqdm(islice(calibration_dataset, calibration_dataset_size), desc="Collecting calibration data", + total=calibration_dataset_size): + pipe(sample["audio"], return_timestamps=True) + finally: + ov_model.encoder.request = ov_model.encoder.request.request + ov_model.decoder_with_past.request = ov_model.decoder_with_past.request.request + + return encoder_calibration_data, decoder_calibration_data + +Quantize Whisper encoder and decoder models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Below we run the ``quantize`` function which calls ``nncf.quantize`` on +Whisper encoder and decoder-with-past models. We don’t quantize +first-step-decoder because its share in whole inference time is +negligible. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import gc + import shutil + import nncf + import openvino as ov + from datasets import load_dataset + from tqdm.notebook import tqdm + + def extract_input_features(sample): + input_features = processor( + sample["audio"]["array"], + sampling_rate=sample["audio"]["sampling_rate"], + return_tensors="pt", + ).input_features + return input_features + + + + CALIBRATION_DATASET_SIZE = 30 + quantized_model_path = Path(f"{model_path}-quantized") + + + def quantize(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int): + if not quantized_model_path.exists(): + encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset( + ov_model, calibration_dataset_size + ) + print("Quantizing encoder") + quantized_encoder = nncf.quantize( + ov_model.encoder.model, + nncf.Dataset(encoder_calibration_data), + subset_size=len(encoder_calibration_data), + model_type=nncf.ModelType.TRANSFORMER, + # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.80) + ) + ov.save_model(quantized_encoder, quantized_model_path / "openvino_encoder_model.xml") + del quantized_encoder + del encoder_calibration_data + gc.collect() + + print("Quantizing decoder with past") + quantized_decoder_with_past = nncf.quantize( + ov_model.decoder_with_past.model, + nncf.Dataset(decoder_calibration_data), + subset_size=len(decoder_calibration_data), + model_type=nncf.ModelType.TRANSFORMER, + # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96) + ) + ov.save_model(quantized_decoder_with_past, quantized_model_path / "openvino_decoder_with_past_model.xml") + del quantized_decoder_with_past + del decoder_calibration_data + gc.collect() + + # Copy the config file and the first-step-decoder manually + shutil.copy(model_path / "config.json", quantized_model_path / "config.json") + shutil.copy(model_path / "generation_config.json", quantized_model_path / "generation_config.json") + shutil.copy(model_path / "openvino_decoder_model.xml", quantized_model_path / "openvino_decoder_model.xml") + shutil.copy(model_path / "openvino_decoder_model.bin", quantized_model_path / "openvino_decoder_model.bin") + shutil.copy(model_path / "openvino_tokenizer.xml", quantized_model_path / "openvino_tokenizer.xml") + shutil.copy(model_path / "openvino_tokenizer.bin", quantized_model_path / "openvino_tokenizer.bin") + shutil.copy(model_path / "openvino_detokenizer.xml", quantized_model_path / "openvino_detokenizer.xml") + shutil.copy(model_path / "openvino_detokenizer.bin", quantized_model_path / "openvino_detokenizer.bin") + shutil.copy(model_path / "tokenizer_config.json", quantized_model_path / "tokenizer_config.json") + shutil.copy(model_path / "tokenizer.json", quantized_model_path / "tokenizer.json") + shutil.copy(model_path / "vocab.json", quantized_model_path / "vocab.json") + shutil.copy(model_path / "preprocessor_config.json", quantized_model_path / "preprocessor_config.json") + shutil.copy(model_path / "special_tokens_map.json", quantized_model_path / "special_tokens_map.json") + shutil.copy(model_path / "normalizer.json", quantized_model_path / "normalizer.json") + shutil.copy(model_path / "merges.txt", quantized_model_path / "merges.txt") + shutil.copy(model_path / "added_tokens.json", quantized_model_path / "added_tokens.json") + + quantized_ov_pipe = openvino_genai.WhisperPipeline(str(quantized_model_path), device=device.value) + return quantized_ov_pipe + + + ov_quantized_pipe = quantize(ov_model, CALIBRATION_DATASET_SIZE) + + + +.. parsed-literal:: + + Collecting calibration data: 0%| | 0/30 [00:00 + + Your browser does not support the audio element. + + + + +.. parsed-literal:: + + Original : Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + Quantized: Mr Quilder is the apostle of the middle classes and we are glad to welcome his gospel. + + +Compare performance and accuracy of the original and quantized models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Finally, we compare original and quantized Whisper models from accuracy +and performance stand-points. + +To measure accuracy, we use ``1 - WER`` as a metric, where WER stands +for Word Error Rate. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import time + from contextlib import contextmanager + from jiwer import wer, wer_standardize + + + TEST_DATASET_SIZE = 50 + + def calculate_transcription_time_and_accuracy(ov_model, test_samples): + whole_infer_times = [] + + ground_truths = [] + predictions = [] + for data_item in tqdm(test_samples, desc="Measuring performance and accuracy"): + + start_time = time.perf_counter() + transcription = ov_model.generate(data_item["audio"]["array"]) + end_time = time.perf_counter() + whole_infer_times.append(end_time - start_time) + + ground_truths.append(data_item["text"]) + predictions.append(transcription.texts[0]) + + word_accuracy = (1 - wer(ground_truths, predictions, reference_transform=wer_standardize, + hypothesis_transform=wer_standardize)) * 100 + mean_whole_infer_time = sum(whole_infer_times) + return word_accuracy, mean_whole_infer_time + + test_dataset = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True) + test_dataset = test_dataset.shuffle(seed=42).take(TEST_DATASET_SIZE) + test_samples = [sample for sample in test_dataset] + + accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_pipe, test_samples) + accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(ov_quantized_pipe, test_samples) + print(f"Whole pipeline performance speedup: {times_original / times_quantized:.3f}") + print(f"Whisper transcription word accuracy. Original model: {accuracy_original:.2f}%. Quantized model: {accuracy_quantized:.2f}%.") + print(f"Accuracy drop: {accuracy_original - accuracy_quantized:.2f}%.") + + + +.. parsed-literal:: + + Measuring performance and accuracy: 0%| | 0/50 [00:00`__ in +Ultralytics documentation. + +The tutorial consists of the following steps: - Prepare the PyTorch +model. - Download and prepare a dataset. - Validate the original model. +- Convert the PyTorch model to OpenVINO IR. - Validate the converted +model. - Prepare and run optimization pipeline. - Compare performance of +the FP32 and quantized models. - Compare accuracy of the FP32 and +quantized models. - Live demo + + +**Table of contents:** + + +- `Get PyTorch model <#get-pytorch-model>`__ + + - `Prerequisites <#prerequisites>`__ + +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ + +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ + +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ + + - `Compare performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ + +- `Other ways to optimize model <#other-ways-to-optimize-model>`__ +- `Live demo <#live-demo>`__ + + - `Run Live Object Detection and + Segmentation <#run-live-object-detection-and-segmentation>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Get PyTorch model +----------------- + + + +Generally, PyTorch models represent an instance of the +`torch.nn.Module `__ +class, initialized by a state dictionary with model weights. We will use +the YOLOv11 nano model (also known as ``yolo11n-seg``) pre-trained on a +COCO dataset, which is available in this +`repo `__. Similar steps are +also applicable to other YOLOv11 models. Typical steps to obtain a +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. + +In this case, the creators of the model provide an API that enables +converting the YOLOv11 model to OpenVINO IR. Therefore, we do not need +to do these steps manually. + +Prerequisites +^^^^^^^^^^^^^ + + + +Install necessary packages. + +.. code:: ipython3 + + %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" + %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" opencv-python tqdm --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Import required utility functions. The lower cell will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + from pathlib import Path + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, VideoPlayer, device_widget + +.. code:: ipython3 + + # Download a test sample + IMAGE_PATH = Path("./data/coco_bike.jpg") + download_file( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg", + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent, + ) + + + +.. parsed-literal:: + + data/coco_bike.jpg: 0%| | 0.00/182k [00:00`__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) to optimize +YOLOv11. + +The optimization process contains the following steps: + +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. +3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` + function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + import ipywidgets as widgets + + int8_model_seg_path = Path(f"{SEG_MODEL_NAME}_openvino_int8_model/{SEG_MODEL_NAME}.xml") + quantized_seg_model = None + + to_quantize = widgets.Checkbox( + value=True, + description="Quantization", + disabled=False, + ) + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + # Fetch skip_kernel_extension module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Reuse validation dataloader in accuracy testing for quantization. For +that, it should be wrapped into the ``nncf.Dataset`` object and define a +transformation function for getting only input tensors. + +.. code:: ipython3 + + # %%skip not $to_quantize.value + + + import nncf + from typing import Dict + + from zipfile import ZipFile + + from ultralytics.data.utils import DATASETS_DIR + from ultralytics.utils import DEFAULT_CFG + from ultralytics.cfg import get_cfg + from ultralytics.data.converter import coco80_to_coco91_class + from ultralytics.data.utils import check_det_dataset + from ultralytics.utils import ops + + + if not int8_model_seg_path.exists(): + DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" + LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco.yaml" + + OUT_DIR = DATASETS_DIR + + DATA_PATH = OUT_DIR / "val2017.zip" + LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" + CFG_PATH = OUT_DIR / "coco.yaml" + + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) + download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + + if not (OUT_DIR / "coco/labels").exists(): + with ZipFile(LABELS_PATH, "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + with ZipFile(DATA_PATH, "r") as zip_ref: + zip_ref.extractall(OUT_DIR / "coco/images") + + args = get_cfg(cfg=DEFAULT_CFG) + args.data = str(CFG_PATH) + seg_validator = seg_model.task_map[seg_model.task]["validator"](args=args) + seg_validator.data = check_det_dataset(args.data) + seg_validator.stride = 32 + seg_data_loader = seg_validator.get_dataloader(OUT_DIR / "coco/", 1) + + seg_validator.is_coco = True + seg_validator.class_map = coco80_to_coco91_class() + seg_validator.names = label_map + seg_validator.metrics.names = seg_validator.names + seg_validator.nc = 80 + seg_validator.nm = 32 + seg_validator.process = ops.process_mask + seg_validator.plot_masks = [] + + def transform_fn(data_item: Dict): + """ + Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: + data_item: Dict with data item produced by DataLoader during iteration + Returns: + input_tensor: Input data for quantization + """ + input_tensor = seg_validator.preprocess(data_item)["img"].numpy() + return input_tensor + + quantization_dataset = nncf.Dataset(seg_data_loader, transform_fn) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/val2017.zip' already exists. + '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco2017labels-segments.zip' already exists. + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco.yaml: 0%| … + + +.. parsed-literal:: + + val: Scanning /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00`__ +to measure the inference performance of the ``FP32`` and ``INT8`` +models. + + **Note**: For more accurate performance, it is recommended to run + ``benchmark_app`` in a terminal/command prompt after closing other + applications. Run + ``benchmark_app -m -d CPU -shape ""`` to + benchmark async inference on CPU on specific input data shape for one + minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + device + +.. code:: ipython3 + + if int8_model_seg_path.exists(): + !benchmark_app -m $seg_model_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 19.71 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [?,116,21..] + [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [?,32,8..,8..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 5.12 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] + [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 385.36 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 37.32 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 1794 iterations + [ INFO ] Duration: 15062.45 ms + [ INFO ] Latency: + [ INFO ] Median: 50.09 ms + [ INFO ] Average: 50.21 ms + [ INFO ] Min: 43.19 ms + [ INFO ] Max: 65.81 ms + [ INFO ] Throughput: 119.10 FPS + + +.. code:: ipython3 + + if int8_model_seg_path.exists(): + !benchmark_app -m $int8_model_seg_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 29.59 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] + [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 0.04 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] + [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 608.94 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 26.24 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 3636 iterations + [ INFO ] Duration: 15046.89 ms + [ INFO ] Latency: + [ INFO ] Median: 24.53 ms + [ INFO ] Average: 24.70 ms + [ INFO ] Min: 12.80 ms + [ INFO ] Max: 40.79 ms + [ INFO ] Throughput: 241.64 FPS + + +Other ways to optimize model +---------------------------- + + + +The performance could be also improved by another OpenVINO method such +as async inference pipeline or preprocessing API. + +Async Inference pipeline help to utilize the device more optimal. The +key advantage of the Async API is that when a device is busy with +inference, the application can perform other tasks in parallel (for +example, populating inputs or scheduling other requests) rather than +wait for the current inference to complete first. To understand how to +perform async inference using openvino, refer to `Async API +tutorial `__ + +Preprocessing API enables making preprocessing a part of the model +reducing application code and dependency on additional image processing +libraries. The main advantage of Preprocessing API is that preprocessing +steps will be integrated into the execution graph and will be performed +on a selected device (CPU/GPU etc.) rather than always being executed on +CPU as part of an application. This will also improve selected device +utilization. For more information, refer to the overview of +`Preprocessing API +tutorial `__. To +see, how it could be used with YOLOV8 object detection model, please, +see `Convert and Optimize YOLOv8 real-time object detection with +OpenVINO tutorial `__ + +Live demo +--------- + + + +The following code runs model inference on a video: + +.. code:: ipython3 + + import collections + import time + import cv2 + from IPython import display + import numpy as np + + + def run_instance_segmentation( + source=0, + flip=False, + use_popup=False, + skip_first_frames=0, + model=seg_model, + device=device.value, + ): + player = None + + ov_config = {} + if device != "CPU": + model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + compiled_model = core.compile_model(model, device, ov_config) + + if seg_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**seg_model.overrides, **custom} + seg_model.predictor = seg_model._smart_load("predictor")(overrides=args, _callbacks=seg_model.callbacks) + seg_model.predictor.setup_model(model=seg_model.model) + + seg_model.predictor.model.ov_compiled_model = compiled_model + + try: + # Create a video player to play with target fps. + player = VideoPlayer(source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames) + # Start capturing. + player.start() + if use_popup: + title = "Press ESC to Exit" + cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) + + processing_times = collections.deque() + while True: + # Grab the frame. + frame = player.next() + if frame is None: + print("Source ended") + break + # If the frame is larger than full HD, reduce size to improve the performance. + scale = 1280 / max(frame.shape) + if scale < 1: + frame = cv2.resize( + src=frame, + dsize=None, + fx=scale, + fy=scale, + interpolation=cv2.INTER_AREA, + ) + # Get the results. + input_image = np.array(frame) + + start_time = time.time() + detections = seg_model(input_image) + stop_time = time.time() + frame = detections[0].plot() + + processing_times.append(stop_time - start_time) + # Use processing times from last 200 frames. + if len(processing_times) > 200: + processing_times.popleft() + + _, f_width = frame.shape[:2] + # Mean processing time [ms]. + processing_time = np.mean(processing_times) * 1000 + fps = 1000 / processing_time + cv2.putText( + img=frame, + text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", + org=(20, 40), + fontFace=cv2.FONT_HERSHEY_COMPLEX, + fontScale=f_width / 1000, + color=(0, 0, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + # Use this workaround if there is flickering. + if use_popup: + cv2.imshow(winname=title, mat=frame) + key = cv2.waitKey(1) + # escape = 27 + if key == 27: + break + else: + # Encode numpy array to jpg. + _, encoded_img = cv2.imencode(ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100]) + # Create an IPython image. + i = display.Image(data=encoded_img) + # Display the image in this notebook. + display.clear_output(wait=True) + display.display(i) + # ctrl-c + except KeyboardInterrupt: + print("Interrupted") + # any different error + except RuntimeError as e: + print(e) + finally: + if player is not None: + # Stop capturing. + player.stop() + if use_popup: + cv2.destroyAllWindows() + +Run Live Object Detection and Segmentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Use a webcam as the video input. By default, the primary webcam is set +with \ ``source=0``. If you have multiple webcams, each one will be +assigned a consecutive number starting at 0. Set \ ``flip=True`` when +using a front-facing camera. Some web browsers, especially Mozilla +Firefox, may cause flickering. If you experience flickering, +set \ ``use_popup=True``. + + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + remote server (for example, in Binder or Google Colab service), the + webcam will not work. By default, the lower cell will run model + inference on a video file. If you want to try live inference on your + webcam set ``WEBCAM_INFERENCE = True`` + +.. code:: ipython3 + + WEBCAM_INFERENCE = False + + if WEBCAM_INFERENCE: + VIDEO_SOURCE = 0 # Webcam + else: + VIDEO_SOURCE = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4" + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + run_instance_segmentation( + source=VIDEO_SOURCE, + flip=True, + use_popup=False, + model=seg_ov_model, + device=device.value, + ) + + + +.. image:: yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png + + +.. parsed-literal:: + + Source ended + diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.jpg b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.jpg new file mode 100644 index 00000000000000..6af162d588f1c9 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1e75e120cff1a1712a4a674f8a0040510f02abb0378a2e43e913bd08564ed0 +size 85215 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.png new file mode 100644 index 00000000000000..68c7cbf06f0558 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8113b21b8d62794465895372f799cd69b3d1d51260521893a7fc8767dcb8b02b +size 773890 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.jpg b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.jpg new file mode 100644 index 00000000000000..22886c54cd44db --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd9c6e6398837315b6bc57c03f059a1299204de2990c9c3e3ff8bbdb7432d66 +size 85069 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.png new file mode 100644 index 00000000000000..123911efcb1f68 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_19_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b649babe11466c4ba5ffe65e0d5cb5cbfd1609f3c8c027b8e4cee51e9f34dc0 +size 781439 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.jpg b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.jpg new file mode 100644 index 00000000000000..fef271afdbc7d5 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c499fe056e57956b48dcc00c9c296a77dc87bcbaf4d191dff14838040f93ce +size 84997 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.png new file mode 100644 index 00000000000000..84af202e294894 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_34_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6585b04c1a519b056ce95e4205f04000db757cc9d64f53194d3a65dd4578cdf5 +size 782898 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png new file mode 100644 index 00000000000000..32694d34889741 --- /dev/null +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb4f1951cf92d0a2365750af32b945a90f65102311933328225f2dbd7b802c1 +size 496675 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output.rst b/docs/notebooks/yolov11-keypoint-detection-with-output.rst new file mode 100644 index 00000000000000..cc830279de6753 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output.rst @@ -0,0 +1,1109 @@ +Convert and Optimize YOLOv11 keypoint detection model with OpenVINO™ +==================================================================== + +Keypoint detection/Pose is a task that involves detecting specific +points in an image or video frame. These points are referred to as +keypoints and are used to track movement or pose estimation. YOLOv11 can +detect keypoints in an image or video frame with high accuracy and +speed. + +This tutorial demonstrates step-by-step instructions on how to run and +optimize `PyTorch YOLOv11 Pose +model `__ with OpenVINO. We +consider the steps required for keypoint detection scenario. You can +find more details about model on `model +page `__ in Ultralytics +documentation. + +The tutorial consists of the following steps: - Prepare the PyTorch +model. - Download and prepare a dataset. - Validate the original model. +- Convert the PyTorch model to OpenVINO IR. - Validate the converted +model. - Prepare and run optimization pipeline. - Compare performance of +the FP32 and quantized models. - Compare accuracy of the FP32 and +quantized models. - Live demo + + +**Table of contents:** + + +- `Get PyTorch model <#get-pytorch-model>`__ + + - `Prerequisites <#prerequisites>`__ + +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ + +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ + +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ + + - `Compare performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ + +- `Other ways to optimize model <#other-ways-to-optimize-model>`__ +- `Live demo <#live-demo>`__ + + - `Run Keypoint Detection on + video <#run-keypoint-detection-on-video>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Get PyTorch model +----------------- + + + +Generally, PyTorch models represent an instance of the +`torch.nn.Module `__ +class, initialized by a state dictionary with model weights. We will use +the YOLOv11 nano model (also known as ``yolo11n-pose``) pre-trained on a +COCO dataset, which is available in this +`repo `__. Similar steps are +also applicable to other Ultralytics models. Typical steps to obtain a +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. + +In this case, the creators of the model provide an API that enables +converting the YOLOv11 model to OpenVINO IR. Therefore, we do not need +to do these steps manually. + +Prerequisites +^^^^^^^^^^^^^ + + + +Install necessary packages. + +.. code:: ipython3 + + %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" + %pip install -q "protobuf==3.20.*" "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Import required utility functions. The lower cell will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + from pathlib import Path + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, VideoPlayer, device_widget + +.. code:: ipython3 + + # Download a test sample + IMAGE_PATH = Path("./data/intel_rnb.jpg") + download_file( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg", + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent, + ) + + + +.. parsed-literal:: + + data/intel_rnb.jpg: 0%| | 0.00/288k [00:00`__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) to optimize +YOLOv8. + +The optimization process contains the following steps: + +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. +3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` + function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + import ipywidgets as widgets + + int8_model_pose_path = Path(f"{POSE_MODEL_NAME}_openvino_int8_model/{POSE_MODEL_NAME}.xml") + quantized_pose_model = None + + to_quantize = widgets.Checkbox( + value=True, + description="Quantization", + disabled=False, + ) + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + # Fetch skip_kernel_extension module + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Reuse validation dataloader in accuracy testing for quantization. For +that, it should be wrapped into the ``nncf.Dataset`` object and define a +transformation function for getting only input tensors. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import nncf + from typing import Dict + + from zipfile import ZipFile + + from ultralytics.data.utils import DATASETS_DIR + from ultralytics.utils import DEFAULT_CFG + from ultralytics.cfg import get_cfg + from ultralytics.data.utils import check_det_dataset + from ultralytics.models.yolo.pose import PoseValidator + from ultralytics.utils.metrics import OKS_SIGMA + + if not int8_model_pose_path.exists(): + + DATA_URL = "https://ultralytics.com/assets/coco8-pose.zip" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco8-pose.yaml" + + OUT_DIR = DATASETS_DIR + + DATA_PATH = OUT_DIR / "val2017.zip" + CFG_PATH = OUT_DIR / "coco8-pose.yaml" + + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + + if not (OUT_DIR / "coco8-pose/labels").exists(): + with ZipFile(DATA_PATH, "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + + args = get_cfg(cfg=DEFAULT_CFG) + args.data = "coco8-pose.yaml" + + pose_validator = PoseValidator(args=args) + pose_validator.data = check_det_dataset(args.data) + pose_validator.stride = 32 + pose_data_loader = pose_validator.get_dataloader(OUT_DIR / "coco8-pose", 1) + + pose_validator.is_coco = True + pose_validator.names = label_map + pose_validator.metrics.names = pose_validator.names + pose_validator.nc = 1 + pose_validator.sigma = OKS_SIGMA + + + def transform_fn(data_item:Dict): + """ + Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: + data_item: Dict with data item produced by DataLoader during iteration + Returns: + input_tensor: Input data for quantization + """ + input_tensor = pose_validator.preprocess(data_item)['img'].numpy() + return input_tensor + + + quantization_dataset = nncf.Dataset(pose_data_loader, transform_fn) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/val2017.zip: 0%| … + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco8-pose.yaml: 0%… + + +.. parsed-literal:: + + val: Scanning /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco8-pose/labels/train.cache... 8 images, 0 backgrounds, 0 corrupt: 100%|██████████| 8/8 [00:00`__ +to measure the inference performance of the ``FP32`` and ``INT8`` +models. + + **Note**: For more accurate performance, it is recommended to run + ``benchmark_app`` in a terminal/command prompt after closing other + applications. Run + ``benchmark_app -m -d CPU -shape ""`` to + benchmark async inference on CPU on specific input data shape for one + minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + device + +.. code:: ipython3 + + if int8_model_pose_path.exists(): + # Inference FP32 model (OpenVINO IR) + !benchmark_app -m $pose_model_path -d $device.value -api async -shape "[1,3,640,640]" + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ WARNING ] Default duration 120 seconds is used for unknown device AUTO + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 19.80 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [?,56,21..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 5.17 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 338.05 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 33.00 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 17292 iterations + [ INFO ] Duration: 120045.86 ms + [ INFO ] Latency: + [ INFO ] Median: 40.71 ms + [ INFO ] Average: 41.52 ms + [ INFO ] Min: 29.33 ms + [ INFO ] Max: 102.50 ms + [ INFO ] Throughput: 144.04 FPS + + +.. code:: ipython3 + + if int8_model_pose_path.exists(): + # Inference INT8 model (OpenVINO IR) + !benchmark_app -m $int8_model_pose_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 29.03 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 0.04 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 558.98 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 31.29 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 5184 iterations + [ INFO ] Duration: 15060.72 ms + [ INFO ] Latency: + [ INFO ] Median: 34.51 ms + [ INFO ] Average: 34.67 ms + [ INFO ] Min: 21.44 ms + [ INFO ] Max: 51.10 ms + [ INFO ] Throughput: 344.21 FPS + + +Compare accuracy of the Original and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +As we can see, there is no significant difference between ``INT8`` and +float model result in a single image test. To understand how +quantization influences model prediction precision, we can compare model +accuracy on a dataset. + +Other ways to optimize model +---------------------------- + + + +The performance could be also improved by another OpenVINO method such +as async inference pipeline or preprocessing API. + +Async Inference pipeline help to utilize the device more optimal. The +key advantage of the Async API is that when a device is busy with +inference, the application can perform other tasks in parallel (for +example, populating inputs or scheduling other requests) rather than +wait for the current inference to complete first. To understand how to +perform async inference using openvino, refer to `Async API +tutorial `__ + +Preprocessing API enables making preprocessing a part of the model +reducing application code and dependency on additional image processing +libraries. The main advantage of Preprocessing API is that preprocessing +steps will be integrated into the execution graph and will be performed +on a selected device (CPU/GPU etc.) rather than always being executed on +CPU as part of an application. This will also improve selected device +utilization. For more information, refer to the overview of +`Preprocessing API +tutorial `__. To +see, how it could be used with YOLOV8 object detection model , please, +see `Convert and Optimize YOLOv8 real-time object detection with +OpenVINO tutorial `__ + +Live demo +--------- + + + +The following code runs model inference on a video: + +.. code:: ipython3 + + import collections + import time + from IPython import display + import cv2 + import numpy as np + + + def run_keypoint_detection( + source=0, + flip=False, + use_popup=False, + skip_first_frames=0, + model=pose_model, + device=device.value, + ): + player = None + + ov_config = {} + if device != "CPU": + model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + compiled_model = core.compile_model(model, device, ov_config) + + if pose_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**seg_model.overrides, **custom} + pose_model.predictor = pose_model._smart_load("predictor")(overrides=args, _callbacks=pose_model.callbacks) + pose_model.predictor.setup_model(model=pose_model.model) + + pose_model.predictor.model.ov_compiled_model = compiled_model + + try: + # Create a video player to play with target fps. + player = VideoPlayer(source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames) + # Start capturing. + player.start() + if use_popup: + title = "Press ESC to Exit" + cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) + + processing_times = collections.deque() + while True: + # Grab the frame. + frame = player.next() + if frame is None: + print("Source ended") + break + # If the frame is larger than full HD, reduce size to improve the performance. + scale = 1280 / max(frame.shape) + if scale < 1: + frame = cv2.resize( + src=frame, + dsize=None, + fx=scale, + fy=scale, + interpolation=cv2.INTER_AREA, + ) + # Get the results + input_image = np.array(frame) + + start_time = time.time() + + detections = pose_model(input_image) + stop_time = time.time() + frame = detections[0].plot() + + processing_times.append(stop_time - start_time) + # Use processing times from last 200 frames. + if len(processing_times) > 200: + processing_times.popleft() + + _, f_width = frame.shape[:2] + # Mean processing time [ms]. + processing_time = np.mean(processing_times) * 1000 + fps = 1000 / processing_time + cv2.putText( + img=frame, + text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", + org=(20, 40), + fontFace=cv2.FONT_HERSHEY_COMPLEX, + fontScale=f_width / 1000, + color=(0, 0, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + # Use this workaround if there is flickering. + if use_popup: + cv2.imshow(winname=title, mat=frame) + key = cv2.waitKey(1) + # escape = 27 + if key == 27: + break + else: + # Encode numpy array to jpg. + _, encoded_img = cv2.imencode(ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100]) + # Create an IPython image. + i = display.Image(data=encoded_img) + # Display the image in this notebook. + display.clear_output(wait=True) + display.display(i) + # ctrl-c + except KeyboardInterrupt: + print("Interrupted") + # any different error + except RuntimeError as e: + print(e) + finally: + if player is not None: + # Stop capturing. + player.stop() + if use_popup: + cv2.destroyAllWindows() + +Run Keypoint Detection on video +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + VIDEO_SOURCE = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4" + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + run_keypoint_detection( + source=VIDEO_SOURCE, + flip=True, + use_popup=False, + model=pose_ov_model, + device=device.value, + ) + + + +.. image:: yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png + + +.. parsed-literal:: + + Source ended + diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.jpg b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.jpg new file mode 100644 index 00000000000000..0fe619b4d25368 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d9b79f4e9821906a5550d2872da58719e5fdf14f21ec386d73a11f21466293 +size 59324 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.png new file mode 100644 index 00000000000000..911d0dc24506c1 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_16_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ebeed7fce17f3ff093d8a8c3ed5f20e4caf98957b7c807215eb5f58c72b851 +size 581548 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.jpg b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.jpg new file mode 100644 index 00000000000000..0fe619b4d25368 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d9b79f4e9821906a5550d2872da58719e5fdf14f21ec386d73a11f21466293 +size 59324 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.png new file mode 100644 index 00000000000000..911d0dc24506c1 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_30_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ebeed7fce17f3ff093d8a8c3ed5f20e4caf98957b7c807215eb5f58c72b851 +size 581548 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png new file mode 100644 index 00000000000000..a03840fa0f4bec --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b92af78802b6615cc954b803042caa64f3969c1947652978beaaf6481a5104 +size 507485 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.jpg b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.jpg new file mode 100644 index 00000000000000..036407491e7f8f --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b389ac8525169a7070c184be29337d4a7af221d63e9ff6a089683064c1d4389 +size 59379 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.png new file mode 100644 index 00000000000000..76ea9232fe5123 --- /dev/null +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_9_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f91e9067fabaaa5433f0866617c33e508eaa7d10af97bec52e77b25529c13cb +size 581985 diff --git a/docs/notebooks/yolov11-object-detection-with-output.rst b/docs/notebooks/yolov11-object-detection-with-output.rst new file mode 100644 index 00000000000000..40256995ebd00f --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output.rst @@ -0,0 +1,1067 @@ +Convert and Optimize YOLOv11 real-time object detection with OpenVINO™ +====================================================================== + +Real-time object detection is often used as a key component in computer +vision systems. Applications that use real-time object detection models +include video analytics, robotics, autonomous vehicles, multi-object +tracking and object counting, medical image analysis, and many others. + +This tutorial demonstrates step-by-step instructions on how to run and +optimize PyTorch YOLOv11 with OpenVINO. We consider the steps required +for object detection scenario. You can find more details about model on +`model page `__ in +Ultralytics documentation + +The tutorial consists of the following steps: - Prepare the PyTorch +model. - Download and prepare a dataset. - Validate the original model. +- Convert the PyTorch model to OpenVINO IR. - Prepare and run +optimization pipeline. - Compare performance of the FP32 and quantized +models. - Other optimization possibilities with OpenVINO api - Live demo + + +**Table of contents:** + + +- `Get PyTorch model <#get-pytorch-model>`__ + + - `Prerequisites <#prerequisites>`__ + +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ + +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ + +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ + + - `Compare performance object detection + models <#compare-performance-object-detection-models>`__ + +- `Next steps <#next-steps>`__ + + - `Async inference pipeline <#async-inference-pipeline>`__ + +- `Live demo <#live-demo>`__ + + - `Run Live Object Detection <#run-live-object-detection>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Get PyTorch model +----------------- + + + +Generally, PyTorch models represent an instance of the +`torch.nn.Module `__ +class, initialized by a state dictionary with model weights. We will use +the YOLOv11 nano model (also known as ``yolo11n``) pre-trained on a COCO +dataset, which is available in this +`repo `__. Similar steps are +also applicable to other YOLOv11 models. Typical steps to obtain a +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. + +In this case, the creators of the model provide an API that enables +converting the YOLOv11 model to OpenVINO IR. Therefore, we do not need +to do these steps manually. + +Prerequisites +^^^^^^^^^^^^^ + + + +Install necessary packages. + +.. code:: ipython3 + + %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" + %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Import required utility functions. The lower cell will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + from pathlib import Path + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import download_file, VideoPlayer, device_widget, quantization_widget + +.. code:: ipython3 + + # Download a test sample + IMAGE_PATH = Path("./data/coco_bike.jpg") + download_file( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg", + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent, + ) + + +.. parsed-literal:: + + 'data/coco_bike.jpg' already exists. + + + + +.. parsed-literal:: + + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + + + +Instantiate model +----------------- + + + +There are `several +models `__ available in the +original repository, targeted for different tasks. For loading the +model, required to specify a path to the model checkpoint. It can be +some local path or name available on models hub (in this case model +checkpoint will be downloaded automatically). + +You can select one of represented model using widget bellow: + +.. code:: ipython3 + + import ipywidgets as widgets + + model_id = ["yolo11n", "yolo11s", "yolo11m", "yolo11l", "yolo11x", "yolov8n", "yolov8s", "yolov8m", "yolov8l", "yolov8x"] + + model_name = widgets.Dropdown(options=model_id, value=model_id[0], description="Model") + + model_name + + + + +.. parsed-literal:: + + Dropdown(description='Model', options=('yolo11n', 'yolo11s', 'yolo11m', 'yolo11l', 'yolo11x', 'yolov8n', 'yolo… + + + +Making prediction, the model accepts a path to input image and returns +list with Results class object. Results contains boxes for object +detection model. Also it contains utilities for processing results, for +example, ``plot()`` method for drawing. + +Let us consider the examples: + +.. code:: ipython3 + + from PIL import Image + from ultralytics import YOLO + + DET_MODEL_NAME = model_name.value + + det_model = YOLO(f"{DET_MODEL_NAME}.pt") + det_model.to("cpu") + label_map = det_model.model.names + + res = det_model(IMAGE_PATH) + Image.fromarray(res[0].plot()[:, :, ::-1]) + + +.. parsed-literal:: + + Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'... + + +.. parsed-literal:: + + 100%|██████████| 5.35M/5.35M [00:00<00:00, 24.0MB/s] + + +.. parsed-literal:: + + + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 79.4ms + Speed: 2.5ms preprocess, 79.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) + + + + +.. image:: yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.png + + + +Convert model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Ultralytics provides API for convenient model exporting to different +formats including OpenVINO IR. ``model.export`` is responsible for model +conversion. We need to specify the format, and additionally, we can +preserve dynamic shapes in the model. + +.. code:: ipython3 + + # object detection model + det_model_path = Path(f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml") + if not det_model_path.exists(): + det_model.export(format="openvino", dynamic=True, half=True) + + +.. parsed-literal:: + + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + + PyTorch: starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB) + + OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: export success ✅ 1.8s, saved as 'yolo11n_openvino_model/' (5.4 MB) + + Export complete (2.0s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Predict: yolo predict task=detect model=yolo11n_openvino_model imgsz=640 half + Validate: yolo val task=detect model=yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half + Visualize: https://netron.app + + +Verify model inference +~~~~~~~~~~~~~~~~~~~~~~ + + + +We can reuse the base model pipeline for pre- and postprocessing just +replacing the inference method where we will use the IR model for +inference. + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Test on single image +~~~~~~~~~~~~~~~~~~~~ + + + +Now, once we have defined preprocessing and postprocessing steps, we are +ready to check model prediction for object detection. + +.. code:: ipython3 + + import openvino as ov + + core = ov.Core() + + det_ov_model = core.read_model(det_model_path) + + ov_config = {} + if device.value != "CPU": + det_ov_model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device.value or ("AUTO" in device.value and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + det_compiled_model = core.compile_model(det_ov_model, device.value, ov_config) + det_model = YOLO(det_model_path.parent, task="detect") + + if det_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**det_model.overrides, **custom} + det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks) + det_model.predictor.setup_model(model=det_model.model) + + det_model.predictor.model.ov_compiled_model = det_compiled_model + + res = det_model(IMAGE_PATH) + Image.fromarray(res[0].plot()[:, :, ::-1]) + + +.. parsed-literal:: + + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Loading yolo11n_openvino_model for OpenVINO inference... + Using OpenVINO LATENCY mode for batch=1 inference... + + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 20.7ms + Speed: 2.8ms preprocess, 20.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + + + + +.. image:: yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.png + + + +Optimize model using NNCF Post-training Quantization API +-------------------------------------------------------- + + + +`NNCF `__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) to optimize model. + +The optimization process contains the following steps: + +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. +3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` + function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + int8_model_det_path = Path(f"{DET_MODEL_NAME}_openvino_int8_model/{DET_MODEL_NAME}.xml") + quantized_det_model = None + + to_quantize = quantization_widget() + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + # Fetch skip_kernel_extension module + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from ultralytics.utils import DEFAULT_CFG + from ultralytics.cfg import get_cfg + from ultralytics.data.converter import coco80_to_coco91_class + from ultralytics.data.utils import check_det_dataset + from zipfile import ZipFile + + from ultralytics.data.utils import DATASETS_DIR + + + DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" + LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco.yaml" + + OUT_DIR = DATASETS_DIR + + DATA_PATH = OUT_DIR / "val2017.zip" + LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" + CFG_PATH = OUT_DIR / "coco.yaml" + + if not int8_model_det_path.exists(): + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) + download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + + if not (OUT_DIR / "coco/labels").exists(): + with ZipFile(LABELS_PATH, "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + with ZipFile(DATA_PATH, "r") as zip_ref: + zip_ref.extractall(OUT_DIR / "coco/images") + + + args = get_cfg(cfg=DEFAULT_CFG) + args.data = str(CFG_PATH) + det_validator = det_model.task_map[det_model.task]["validator"](args=args) + det_validator.data = check_det_dataset(args.data) + det_validator.stride = 32 + det_data_loader = det_validator.get_dataloader(OUT_DIR / "coco", 1) + + det_validator.is_coco = True + det_validator.class_map = coco80_to_coco91_class() + det_validator.names = label_map + det_validator.metrics.names = det_validator.names + det_validator.nc = 80 + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/val2017.zip: 0%| … + + +.. parsed-literal:: + + '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco2017labels-segments.zip' already exists. + + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco.yaml: 0%| … + + +.. parsed-literal:: + + val: Scanning /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-785/.workspace/scm/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00`__ +to measure the inference performance of the ``FP32`` and ``INT8`` +models. + + **Note**: For more accurate performance, it is recommended to run + ``benchmark_app`` in a terminal/command prompt after closing other + applications. Run + ``benchmark_app -m -d CPU -shape ""`` to + benchmark async inference on CPU on specific input data shape for one + minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + device + +.. code:: ipython3 + + if int8_model_det_path.exists(): + # Inference FP32 model (OpenVINO IR) + !benchmark_app -m $det_model_path -d $device.value -api async -shape "[1,3,640,640]" + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ WARNING ] Default duration 120 seconds is used for unknown device AUTO + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 18.57 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [?,84,21..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 4.68 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 319.17 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 31.01 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 18528 iterations + [ INFO ] Duration: 120050.83 ms + [ INFO ] Latency: + [ INFO ] Median: 37.96 ms + [ INFO ] Average: 38.74 ms + [ INFO ] Min: 19.79 ms + [ INFO ] Max: 98.97 ms + [ INFO ] Throughput: 154.33 FPS + + +.. code:: ipython3 + + if int8_model_det_path.exists(): + # Inference INT8 model (OpenVINO IR) + !benchmark_app -m $int8_model_det_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 26.43 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'x': [1,3,640,640] + [ INFO ] Reshape model took 0.04 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 515.22 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] KV_CACHE_PRECISION: + [ INFO ] LOG_LEVEL: Level.NO + [ INFO ] MODEL_DISTRIBUTION_POLICY: set() + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: NO + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [ INFO ] PERF_COUNT: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 19.14 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 5184 iterations + [ INFO ] Duration: 15055.81 ms + [ INFO ] Latency: + [ INFO ] Median: 34.47 ms + [ INFO ] Average: 34.65 ms + [ INFO ] Min: 18.19 ms + [ INFO ] Max: 51.47 ms + [ INFO ] Throughput: 344.32 FPS + + +Next steps +---------- + +This section contains +suggestions on how to additionally improve the performance of your +application using OpenVINO. + +Async inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~ + +The key advantage of the Async +API is that when a device is busy with inference, the application can +perform other tasks in parallel (for example, populating inputs or +scheduling other requests) rather than wait for the current inference to +complete first. To understand how to perform async inference using +openvino, refer to `Async API tutorial `__ + +Live demo +--------- + + + +The following code runs model inference on a video: + +.. code:: ipython3 + + import collections + import time + from IPython import display + import cv2 + import numpy as np + + + # Main processing function to run object detection. + def run_object_detection( + source=0, + flip=False, + use_popup=False, + skip_first_frames=0, + model=det_model, + device=device.value, + ): + player = None + ov_config = {} + if device != "CPU": + model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + compiled_model = core.compile_model(model, device, ov_config) + + if det_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**det_model.overrides, **custom} + det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks) + det_model.predictor.setup_model(model=det_model.model) + + det_model.predictor.model.ov_compiled_model = compiled_model + + try: + # Create a video player to play with target fps. + player = VideoPlayer(source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames) + # Start capturing. + player.start() + if use_popup: + title = "Press ESC to Exit" + cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) + + processing_times = collections.deque() + while True: + # Grab the frame. + frame = player.next() + if frame is None: + print("Source ended") + break + # If the frame is larger than full HD, reduce size to improve the performance. + scale = 1280 / max(frame.shape) + if scale < 1: + frame = cv2.resize( + src=frame, + dsize=None, + fx=scale, + fy=scale, + interpolation=cv2.INTER_AREA, + ) + # Get the results. + input_image = np.array(frame) + + start_time = time.time() + detections = det_model(input_image, verbose=False) + stop_time = time.time() + frame = detections[0].plot() + + processing_times.append(stop_time - start_time) + # Use processing times from last 200 frames. + if len(processing_times) > 200: + processing_times.popleft() + + _, f_width = frame.shape[:2] + # Mean processing time [ms]. + processing_time = np.mean(processing_times) * 1000 + fps = 1000 / processing_time + cv2.putText( + img=frame, + text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", + org=(20, 40), + fontFace=cv2.FONT_HERSHEY_COMPLEX, + fontScale=f_width / 1000, + color=(0, 0, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + # Use this workaround if there is flickering. + if use_popup: + cv2.imshow(winname=title, mat=frame) + key = cv2.waitKey(1) + # escape = 27 + if key == 27: + break + else: + # Encode numpy array to jpg. + _, encoded_img = cv2.imencode(ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100]) + # Create an IPython image. + i = display.Image(data=encoded_img) + # Display the image in this notebook. + display.clear_output(wait=True) + display.display(i) + # ctrl-c + except KeyboardInterrupt: + print("Interrupted") + # any different error + except RuntimeError as e: + print(e) + finally: + if player is not None: + # Stop capturing. + player.stop() + if use_popup: + cv2.destroyAllWindows() + +Run Live Object Detection +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Use a webcam as the video input. By default, the primary webcam is set +with \ ``source=0``. If you have multiple webcams, each one will be +assigned a consecutive number starting at 0. Set \ ``flip=True`` when +using a front-facing camera. Some web browsers, especially Mozilla +Firefox, may cause flickering. If you experience flickering, +set \ ``use_popup=True``. + + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + remote server (for example, in Binder or Google Colab service), the + webcam will not work. By default, the lower cell will run model + inference on a video file. If you want to try live inference on your + webcam set ``WEBCAM_INFERENCE = True`` + +Run the object detection: + +.. code:: ipython3 + + WEBCAM_INFERENCE = False + + if WEBCAM_INFERENCE: + VIDEO_SOURCE = 0 # Webcam + else: + VIDEO_SOURCE = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4" + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + run_object_detection( + source=VIDEO_SOURCE, + flip=True, + use_popup=False, + model=det_ov_model, + device=device.value, + ) + + + +.. image:: yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png + + +.. parsed-literal:: + + Source ended + diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.jpg b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.jpg new file mode 100644 index 00000000000000..bcf00f3d50dcb5 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc502c95e9f5526befe662fcc79a6a91d4f5ff0cfce09165436e91f4ef8224ad +size 113613 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.png new file mode 100644 index 00000000000000..e886ccd29b47f9 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7bb27d4264dbc22e0006e42fb2f1619992aeac7220232119578e42a0d8a083 +size 904352 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.jpg b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.jpg new file mode 100644 index 00000000000000..7a742941b54c17 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394895a09cc413a6343edb2a6f6242258701b6911c09d8405eacc7c27f6fac62 +size 110819 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.png new file mode 100644 index 00000000000000..6d4bfe012b6187 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_17_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b18cc2e60a802c059cd5cc05598a06c4affd959216e03674eeb68e822295843 +size 913173 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.jpg b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.jpg new file mode 100644 index 00000000000000..7a742941b54c17 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394895a09cc413a6343edb2a6f6242258701b6911c09d8405eacc7c27f6fac62 +size 110819 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.png new file mode 100644 index 00000000000000..6d4bfe012b6187 --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_30_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b18cc2e60a802c059cd5cc05598a06c4affd959216e03674eeb68e822295843 +size 913173 diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png new file mode 100644 index 00000000000000..f6ad963c53f95c --- /dev/null +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed070bb0687ea1e958c91f59b98f38b9834911f23f78c1a8aad55bed3c7cf6d +size 569483 diff --git a/docs/notebooks/yolov8-instance-segmentation-with-output.rst b/docs/notebooks/yolov8-instance-segmentation-with-output.rst index 31b3debddf5d7c..742aff5ca6905a 100644 --- a/docs/notebooks/yolov8-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov8-instance-segmentation-with-output.rst @@ -13,17 +13,12 @@ This tutorial demonstrates step-by-step instructions on how to run and optimize PyTorch YOLOv8 with OpenVINO. We consider the steps required for instance segmentation scenario. -The tutorial consists of the following steps: - -- Prepare the PyTorch model. -- Download and prepare a dataset. -- Validate the original model. -- Convert the PyTorch model to OpenVINO IR. -- Validate the converted model. -- Prepare and run optimization pipeline. -- Compare performance of the FP32 and quantized models. -- Compare accuracy of the FP32 and quantized models. -- Live demo +The tutorial consists of the following steps: - Prepare the PyTorch +model. - Download and prepare a dataset. - Validate the original model. +- Convert the PyTorch model to OpenVINO IR. - Validate the converted +model. - Prepare and run optimization pipeline. - Compare performance of +the FP32 and quantized models. - Compare accuracy of the FP32 and +quantized models. - Live demo **Table of contents:** @@ -118,14 +113,14 @@ Import required utility functions. The lower cell will download the .. code:: ipython3 from pathlib import Path - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, VideoPlayer, device_widget @@ -180,12 +175,12 @@ Let us consider the examples: from PIL import Image from ultralytics import YOLO - + SEG_MODEL_NAME = "yolov8n-seg" - + seg_model = YOLO(models_dir / f"{SEG_MODEL_NAME}.pt") label_map = seg_model.model.names - + res = seg_model(IMAGE_PATH) Image.fromarray(res[0].plot()[:, :, ::-1]) @@ -202,7 +197,7 @@ Let us consider the examples: .. parsed-literal:: - + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 dog, 111.7ms Speed: 2.5ms preprocess, 111.7ms inference, 528.4ms postprocess per image at shape (1, 3, 480, 640) @@ -234,16 +229,16 @@ preserve dynamic shapes in the model. .. parsed-literal:: Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.1.0+cu121 CPU (Intel Core(TM) i9-10980XE 3.00GHz) - + PyTorch: starting from 'models/yolov8n-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (6.7 MB) - + OpenVINO: starting export with openvino 2024.3.0-16041-1e3b88e4e3f-releases/2024/3... OpenVINO: export success ✅ 2.2s, saved as 'models/yolov8n-seg_openvino_model/' (6.9 MB) - + Export complete (3.7s) Results saved to /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/models - Predict: yolo predict task=segment model=models/yolov8n-seg_openvino_model imgsz=640 half - Validate: yolo val task=segment model=models/yolov8n-seg_openvino_model imgsz=640 data=coco.yaml half + Predict: yolo predict task=segment model=models/yolov8n-seg_openvino_model imgsz=640 half + Validate: yolo val task=segment model=models/yolov8n-seg_openvino_model imgsz=640 data=coco.yaml half Visualize: https://netron.app @@ -266,7 +261,7 @@ Select device from dropdown list for running inference using OpenVINO .. code:: ipython3 device = device_widget() - + device @@ -286,10 +281,10 @@ Test on single image .. code:: ipython3 import openvino as ov - + core = ov.Core() seg_ov_model = core.read_model(seg_model_path) - + ov_config = {} if device.value != "CPU": seg_ov_model.reshape({0: [1, 3, 640, 640]}) @@ -300,13 +295,13 @@ Test on single image .. code:: ipython3 import torch - - + + def infer(*args): result = seg_compiled_model(args) return torch.from_numpy(result[0]), torch.from_numpy(result[1]) - - + + seg_model.predictor.inference = infer seg_model.predictor.model.pt = False @@ -318,7 +313,7 @@ Test on single image .. parsed-literal:: - + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 24.2ms Speed: 6.0ms preprocess, 24.2ms inference, 14.8ms postprocess per image at shape (1, 3, 640, 640) @@ -358,24 +353,24 @@ evaluation function. .. code:: ipython3 from zipfile import ZipFile - + from ultralytics.data.utils import DATASETS_DIR - - + + DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco.yaml" - + OUT_DIR = DATASETS_DIR - + DATA_PATH = OUT_DIR / "val2017.zip" LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" CFG_PATH = OUT_DIR / "coco.yaml" - + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) - + if not (OUT_DIR / "coco/labels").exists(): with ZipFile(LABELS_PATH, "r") as zip_ref: zip_ref.extractall(OUT_DIR) @@ -405,8 +400,8 @@ Define validation function import numpy as np from tqdm.notebook import tqdm from ultralytics.utils.metrics import ConfusionMatrix - - + + def test( model: ov.Model, core: ov.Core, @@ -448,8 +443,8 @@ Define validation function validator.update_metrics(preds, batch) stats = validator.get_stats() return stats - - + + def print_stats(stats: np.ndarray, total_images: int, total_objects: int): """ Helper function for printing accuracy statistic @@ -525,7 +520,7 @@ validator class instance. from ultralytics.data.converter import coco80_to_coco91_class from ultralytics.data.utils import check_det_dataset from ultralytics.utils import ops - + args = get_cfg(cfg=DEFAULT_CFG) args.data = str(CFG_PATH) @@ -535,7 +530,7 @@ validator class instance. seg_validator.data = check_det_dataset(args.data) seg_validator.stride = 32 seg_data_loader = seg_validator.get_dataloader(OUT_DIR / "coco/", 1) - + seg_validator.is_coco = True seg_validator.class_map = coco80_to_coco91_class() seg_validator.names = seg_model.model.names @@ -629,15 +624,15 @@ improve model inference speed. .. code:: ipython3 import ipywidgets as widgets - + int8_model_seg_path = models_dir / f"{SEG_MODEL_NAME}_openvino_int8_model/{SEG_MODEL_NAME}.xml" - + to_quantize = widgets.Checkbox( value=True, description="Quantization", disabled=False, ) - + to_quantize @@ -656,12 +651,12 @@ Let’s load ``skip magic`` extension to skip quantization if # Fetch skip_kernel_extension module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Reuse validation dataloader in accuracy testing for quantization. For @@ -671,12 +666,12 @@ transformation function for getting only input tensors. .. code:: ipython3 %%skip not $to_quantize.value - - + + import nncf from typing import Dict - - + + def transform_fn(data_item:Dict): """ Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. @@ -687,8 +682,8 @@ transformation function for getting only input tensors. """ input_tensor = seg_validator.preprocess(data_item)['img'].numpy() return input_tensor - - + + quantization_dataset = nncf.Dataset(seg_data_loader, transform_fn) @@ -715,7 +710,7 @@ point precision, using the ``ignored_scope`` parameter. .. code:: ipython3 %%skip not $to_quantize.value - + ignored_scope = nncf.IgnoredScope( # post-processing subgraphs=[ nncf.Subgraph(inputs=['__module.model.22/aten::cat/Concat', @@ -725,7 +720,7 @@ point precision, using the ``ignored_scope`` parameter. outputs=['__module.model.22/aten::cat/Concat_8']) ] ) - + # Segmentation model quantized_seg_model = nncf.quantize( seg_ov_model, @@ -758,7 +753,7 @@ point precision, using the ``ignored_scope`` parameter. INFO:nncf:Not adding activation input quantizer for operation: 255 __module.model.22/aten::add/Add_6 INFO:nncf:Not adding activation input quantizer for operation: 265 __module.model.22/aten::add/Add_7 275 __module.model.22/aten::div/Divide - + INFO:nncf:Not adding activation input quantizer for operation: 266 __module.model.22/aten::sub/Subtract_1 INFO:nncf:Not adding activation input quantizer for operation: 276 __module.model.22/aten::cat/Concat_5 INFO:nncf:Not adding activation input quantizer for operation: 242 __module.model.22/aten::mul/Multiply_3 @@ -810,7 +805,7 @@ point precision, using the ``ignored_scope`` parameter. .. code:: ipython3 %%skip not $to_quantize.value - + print(f"Quantized segmentation model will be saved to {int8_model_seg_path}") ov.save_model(quantized_seg_model, str(int8_model_seg_path)) @@ -835,43 +830,43 @@ on the image. .. code:: ipython3 %%skip not $to_quantize.value - + device .. code:: ipython3 %%skip not $to_quantize.value - + ov_config = {} if device.value != "CPU": quantized_seg_model.reshape({0: [1, 3, 640, 640]}) if "GPU" in device.value or ("AUTO" in device.value and "GPU" in core.available_devices): ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} - + quantized_seg_compiled_model = core.compile_model(quantized_seg_model, device.value, ov_config) .. code:: ipython3 %%skip not $to_quantize.value - - + + def infer(*args): result = quantized_seg_compiled_model(args) return torch.from_numpy(result[0]), torch.from_numpy(result[1]) - + seg_model.predictor.inference = infer .. code:: ipython3 %%skip not $to_quantize.value - + res = seg_model(IMAGE_PATH) display(Image.fromarray(res[0].plot()[:, :, ::-1])) .. parsed-literal:: - + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 20.0ms Speed: 4.5ms preprocess, 20.0ms inference, 16.9ms postprocess per image at shape (1, 3, 640, 640) @@ -906,7 +901,7 @@ models. .. code:: ipython3 %%skip not $to_quantize.value - + device .. code:: ipython3 @@ -922,12 +917,12 @@ models. [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] + [ INFO ] [ INFO ] Device info: [ INFO ] AUTO [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] - [ INFO ] + [ INFO ] + [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files @@ -984,7 +979,7 @@ models. [ INFO ] PERF_COUNT: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! - [ INFO ] Fill input 'x' with random values + [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). [ INFO ] First inference took 47.21 ms @@ -1013,12 +1008,12 @@ models. [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] + [ INFO ] [ INFO ] Device info: [ INFO ] AUTO [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] - [ INFO ] + [ INFO ] + [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files @@ -1075,7 +1070,7 @@ models. [ INFO ] PERF_COUNT: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! - [ INFO ] Fill input 'x' with random values + [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). [ INFO ] First inference took 31.05 ms @@ -1104,7 +1099,7 @@ accuracy on a dataset. .. code:: ipython3 %%skip not $to_quantize.value - + int8_seg_stats = test(quantized_seg_model, core, seg_data_loader, seg_validator, num_samples=NUM_TEST_SAMPLES) @@ -1117,10 +1112,10 @@ accuracy on a dataset. .. code:: ipython3 %%skip not $to_quantize.value - + print("FP32 model accuracy") print_stats(fp_seg_stats, seg_validator.seen, seg_validator.nt_per_class.sum()) - + print("INT8 model accuracy") print_stats(int8_seg_stats, seg_validator.seen, seg_validator.nt_per_class.sum()) @@ -1190,8 +1185,8 @@ The following code runs model inference on a video: import time import cv2 from IPython import display - - + + def run_instance_segmentation( source=0, flip=False, @@ -1201,20 +1196,20 @@ The following code runs model inference on a video: device=device.value, ): player = None - + ov_config = {} if device != "CPU": model.reshape({0: [1, 3, 640, 640]}) if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} compiled_model = core.compile_model(model, device, ov_config) - + def infer(*args): result = compiled_model(args) return torch.from_numpy(result[0]), torch.from_numpy(result[1]) - + seg_model.predictor.inference = infer - + try: # Create a video player to play with target fps. player = VideoPlayer(source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames) @@ -1223,7 +1218,7 @@ The following code runs model inference on a video: if use_popup: title = "Press ESC to Exit" cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) - + processing_times = collections.deque() while True: # Grab the frame. @@ -1243,17 +1238,17 @@ The following code runs model inference on a video: ) # Get the results. input_image = np.array(frame) - + start_time = time.time() detections = seg_model(input_image) stop_time = time.time() frame = detections[0].plot() - + processing_times.append(stop_time - start_time) # Use processing times from last 200 frames. if len(processing_times) > 200: processing_times.popleft() - + _, f_width = frame.shape[:2] # Mean processing time [ms]. processing_time = np.mean(processing_times) * 1000 @@ -1318,7 +1313,7 @@ set \ ``use_popup=True``. .. code:: ipython3 WEBCAM_INFERENCE = False - + if WEBCAM_INFERENCE: VIDEO_SOURCE = 0 # Webcam else: diff --git a/docs/notebooks/yolov8-obb-with-output.rst b/docs/notebooks/yolov8-obb-with-output.rst index b0993a93cb9f85..87e73f76558ffa 100644 --- a/docs/notebooks/yolov8-obb-with-output.rst +++ b/docs/notebooks/yolov8-obb-with-output.rst @@ -173,8 +173,6 @@ instance. - - Run inference ~~~~~~~~~~~~~ diff --git a/docs/notebooks/yolov8-object-detection-with-output.rst b/docs/notebooks/yolov8-object-detection-with-output.rst index 16648e4559f618..6b7dacd839a587 100644 --- a/docs/notebooks/yolov8-object-detection-with-output.rst +++ b/docs/notebooks/yolov8-object-detection-with-output.rst @@ -121,7 +121,7 @@ Install necessary packages. .. code:: ipython3 %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" - %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.2.24" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu Import required utility functions. The lower cell will download the ``notebook_utils`` Python module from GitHub. @@ -161,7 +161,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') + PosixPath('/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') @@ -194,9 +194,10 @@ Let us consider the examples: from PIL import Image from ultralytics import YOLO - DET_MODEL_NAME = "yolov8n" + DET_MODEL_NAME = "yolo11n" - det_model = YOLO(models_dir / f"{DET_MODEL_NAME}.pt") + det_model = YOLO(f"{DET_MODEL_NAME}.pt") + det_model.to("cpu") label_map = det_model.model.names res = det_model(IMAGE_PATH) @@ -205,24 +206,14 @@ Let us consider the examples: .. parsed-literal:: - Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'models/yolov8n.pt'... + image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 101.6ms + Speed: 3.1ms preprocess, 101.6ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640) -.. parsed-literal:: - - 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 6.25M/6.25M [00:02<00:00, 2.29MB/s] - - -.. parsed-literal:: - image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 1 dog, 82.9ms - Speed: 2.4ms preprocess, 82.9ms inference, 475.6ms postprocess per image at shape (1, 3, 480, 640) - - - -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png @@ -239,27 +230,10 @@ preserve dynamic shapes in the model. .. code:: ipython3 # object detection model - det_model_path = models_dir / f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml" + det_model_path = Path(f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml") if not det_model_path.exists(): det_model.export(format="openvino", dynamic=True, half=True) - -.. parsed-literal:: - - Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.1.0+cu121 CPU (Intel Core(TM) i9-10980XE 3.00GHz) - - PyTorch: starting from 'models/yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) - - OpenVINO: starting export with openvino 2024.3.0-16041-1e3b88e4e3f-releases/2024/3... - OpenVINO: export success ✅ 1.7s, saved as 'models/yolov8n_openvino_model/' (6.4 MB) - - Export complete (3.1s) - Results saved to /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/models - Predict: yolo predict task=detect model=models/yolov8n_openvino_model imgsz=640 half - Validate: yolo val task=detect model=models/yolov8n_openvino_model imgsz=640 data=coco.yaml half - Visualize: https://netron.app - - Verify model inference ~~~~~~~~~~~~~~~~~~~~~~ @@ -323,6 +297,7 @@ ready to check model prediction for object detection. det_model.predictor.inference = infer det_model.predictor.model.pt = False + det_model res = det_model(IMAGE_PATH) Image.fromarray(res[0].plot()[:, :, ::-1]) @@ -331,8 +306,8 @@ ready to check model prediction for object detection. .. parsed-literal:: - image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 16.1ms - Speed: 3.4ms preprocess, 16.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 16.9ms + Speed: 3.7ms preprocess, 16.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640) @@ -393,245 +368,16 @@ evaluation function. zip_ref.extractall(OUT_DIR / "coco/images") - -.. parsed-literal:: - - /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/val2017.zip: 0%| … - - -.. parsed-literal:: - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - IOPub message rate exceeded. - The Jupyter server will temporarily stop sending output - to the client in order to avoid crashing it. - To change this limit, set the config variable - `--ServerApp.iopub_msg_rate_limit`. - - Current values: - ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) - ServerApp.rate_limit_window=3.0 (secs) - - - .. parsed-literal:: - '/home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco2017labels-segments.zip' already exists. + '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/val2017.zip' already exists. + '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco2017labels-segments.zip' already exists. .. parsed-literal:: - /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco.yaml: 0%| … + /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco.yaml… Define validation function @@ -774,7 +520,7 @@ validator class instance. .. parsed-literal:: - val: Scanning /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco/labels/val2017.cache... 4952 images, + val: Scanning /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00\ **Note**: Model evaluation is time consuming -process and can take several minutes, depending on the hardware. For -reducing calculation time, we define ``num_samples`` parameter with -evaluation subset size, but in this case, accuracy can be noncomparable -with originally reported by the authors of the model, due to validation -subset difference. *To validate the models on the full dataset set -``NUM_TEST_SAMPLES = None``.* - -.. code:: ipython3 - - NUM_TEST_SAMPLES = 300 - -.. code:: ipython3 - - fp_det_stats = test(det_ov_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) - - - -.. parsed-literal:: - - 0%| | 0/300 [00:00 ([1,640,640,3], [N,H,W,C], f32) convert layout [N,C,H,W]: ([1,640,640,3], [N,H,W,C], f32) -> ([1,3,640,640], [N,C,H,W], f32) @@ -1704,7 +1348,7 @@ Now, we can skip these preprocessing steps in detect function: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png @@ -1883,7 +1527,7 @@ Run the object detection: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png .. parsed-literal:: diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg new file mode 100644 index 00000000000000..7a742941b54c17 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394895a09cc413a6343edb2a6f6242258701b6911c09d8405eacc7c27f6fac62 +size 110819 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png index 703c1375a98784..6d4bfe012b6187 100644 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e85f0434876997adc7cf2152fa667b4e271817a443a75ca350d284aee48f9145 -size 908034 +oid sha256:7b18cc2e60a802c059cd5cc05598a06c4affd959216e03674eeb68e822295843 +size 913173 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg new file mode 100644 index 00000000000000..40ac3a8da0dc2a --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d9079b877726f30f154885745b36d8bc281f4970ce10f86529b9739a56ac72 +size 112567 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png new file mode 100644 index 00000000000000..e7c4fb31035d98 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fad172495a797bc753d19d54bd4afaf8f08ed36dfd1b9761df7bfa948673aab +size 911538 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png deleted file mode 100644 index 44e7623ec662d1..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6778bdd31a39722d8ce3fb9ddea6383e451ab1b32cf23fad23b556bd89c99707 -size 907722 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg new file mode 100644 index 00000000000000..202af866d221dd --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d585dcd341e2580baf166b6f35337d2da17561a251fb5475041e2a20d6e558 +size 110078 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png new file mode 100644 index 00000000000000..d0d2028049c4f3 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c1e80aa859828b9aae3240fb544b483df8dbd8b7c7bca29479a2a0714612c2 +size 929124 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png new file mode 100644 index 00000000000000..cdae11c722ca65 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c6c2609efd786cba1d9baac015d06040939ff8d1e7644f67d10d6df5c171a62 +size 569438 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png deleted file mode 100644 index 3b025ea9323f25..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b44f35cf238dcd208fcebe73bb388947f5528a167b511c8098d040b05d9bb819 -size 931491 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png deleted file mode 100644 index 6d628536486d93..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc0d406144e3b07ae6bef629cd7b4973424847958b9865e7133bd0f0c7deeb5c -size 534154 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg new file mode 100644 index 00000000000000..bcf00f3d50dcb5 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc502c95e9f5526befe662fcc79a6a91d4f5ff0cfce09165436e91f4ef8224ad +size 113613 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png new file mode 100644 index 00000000000000..e886ccd29b47f9 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7bb27d4264dbc22e0006e42fb2f1619992aeac7220232119578e42a0d8a083 +size 904352 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png deleted file mode 100644 index 5073fa9f3eca09..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46d38bd078c6eeda706640d0e35907a9a504bf78f9dd8dbf2961de865c294802 -size 907783 diff --git a/docs/notebooks/yolov9-optimization-with-output.rst b/docs/notebooks/yolov9-optimization-with-output.rst index 0fa0597311c44d..0cf84003171753 100644 --- a/docs/notebooks/yolov9-optimization-with-output.rst +++ b/docs/notebooks/yolov9-optimization-with-output.rst @@ -21,6 +21,7 @@ The tutorial consists of the following steps: - Compare performance of the FP32 and quantized models. - Run optimized model inference on video + **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -99,9 +100,9 @@ Prerequisites Cloning into 'yolov9'... remote: Enumerating objects: 781, done. remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1) - Receiving objects: 100% (781/781), 3.27 MiB | 18.60 MiB/s, done. - Resolving deltas: 100% (331/331), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + Receiving objects: 100% (781/781), 3.27 MiB | 10.53 MiB/s, done. + Resolving deltas: 100% (330/330), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -139,7 +140,7 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') @@ -196,7 +197,7 @@ using ``ov.save_model``. Fusing layers... Model summary: 387 layers, 25288768 parameters, 0 gradients, 102.1 GFLOPs - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif self.dynamic or self.shape != shape: @@ -577,10 +578,10 @@ asymmetric quantization of activations. .. parsed-literal:: - 2024-09-24 06:15:26.718478: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-09-24 06:15:26.753483: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-08 06:55:28.833031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-08 06:55:28.867508: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-09-24 06:15:27.350591: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-08 06:55:29.471960: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -668,18 +669,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16765-f0c5d2f4346 + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16765-f0c5d2f4346 + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.59 ms + [ INFO ] Read model took 26.27 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [?,3,?,?] @@ -691,7 +692,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 7.87 ms + [ INFO ] Reshape model took 7.74 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -701,7 +702,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 494.90 ms + [ INFO ] Compile model took 487.17 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -738,17 +739,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 179.99 ms + [ INFO ] First inference took 173.19 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 228 iterations - [ INFO ] Duration: 15541.29 ms + [ INFO ] Count: 222 iterations + [ INFO ] Duration: 15646.59 ms [ INFO ] Latency: - [ INFO ] Median: 406.95 ms - [ INFO ] Average: 405.29 ms - [ INFO ] Min: 208.26 ms - [ INFO ] Max: 427.95 ms - [ INFO ] Throughput: 14.67 FPS + [ INFO ] Median: 412.14 ms + [ INFO ] Average: 418.38 ms + [ INFO ] Min: 285.25 ms + [ INFO ] Max: 798.40 ms + [ INFO ] Throughput: 14.19 FPS .. code:: ipython3 @@ -762,18 +763,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16765-f0c5d2f4346 + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16765-f0c5d2f4346 + [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 41.37 ms + [ INFO ] Read model took 41.47 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [1,3,640,640] @@ -785,7 +786,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 0.05 ms + [ INFO ] Reshape model took 0.04 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -795,7 +796,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 882.39 ms + [ INFO ] Compile model took 870.18 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -832,17 +833,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 68.84 ms + [ INFO ] First inference took 71.43 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 762 iterations - [ INFO ] Duration: 15208.40 ms + [ INFO ] Count: 726 iterations + [ INFO ] Duration: 15150.76 ms [ INFO ] Latency: - [ INFO ] Median: 119.61 ms - [ INFO ] Average: 119.42 ms - [ INFO ] Min: 74.71 ms - [ INFO ] Max: 133.04 ms - [ INFO ] Throughput: 50.10 FPS + [ INFO ] Median: 121.03 ms + [ INFO ] Average: 124.70 ms + [ INFO ] Min: 65.35 ms + [ INFO ] Max: 268.30 ms + [ INFO ] Throughput: 47.92 FPS Run Live Object Detection diff --git a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png index ee95b5c802a961..8d7867fc6e5b5a 100644 --- a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png +++ b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb87a4cb2455f4227d83cd5062e78a943da35a2843bbf733eead1f48a408daf7 -size 496324 +oid sha256:071b74de5f9e1088f4d187a6c3d339cb90758fe7d12d7b7de495fd5266e50946 +size 499096 diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index 3d44a04e5b462c..bb26683cd9e579 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -39,7 +39,8 @@ def create_sitemap(app, exception): urlset = app.builder.config.ov_sitemap_urlset meta = app.builder.config.ov_sitemap_meta - site_url = app.builder.config.site_url or app.builder.config.html_baseurl + site_url = app.builder.config.site_url + if site_url: site_url.rstrip("/") + "/" else: @@ -71,29 +72,29 @@ def create_sitemap(app, exception): else: version = "" + unique_links = set() while True: try: link = app.env.app.sitemap_links.get_nowait() # type: ignore + if link in unique_links: + continue + unique_links.add(link) except queue.Empty: break url = ET.SubElement(root, "url") - scheme = app.config.sitemap_url_scheme + if app.builder.config.language: lang = app.builder.config.language + "/" else: lang = "" + scheme = app.config.sitemap_url_scheme ET.SubElement(url, "loc").text = site_url + scheme.format( lang=lang, version=version, link=link ) - if meta: - for entry in meta: - namespace, values = entry - namespace_element = ET.SubElement(url, namespace) - for tag_name, tag_value in values.items(): - ET.SubElement(namespace_element, tag_name).text = tag_value + process_coveo_meta(meta, url, link) for lang in locales: lang = lang + "/" @@ -111,4 +112,23 @@ def create_sitemap(app, exception): encoding='utf-8', method="xml") print("%s was generated for URL %s in %s" % (app.config.sitemap_filename, - site_url, filename)) \ No newline at end of file + site_url, filename)) + +def process_coveo_meta(meta, url, link): + if not meta: + return + + for namespace, values in meta: + namespace_element = ET.SubElement(url, namespace) + + for tag_name, tag_value in values.items(): + if tag_name == 'ovcategory': + processed_link = process_link(link) + ET.SubElement(namespace_element, tag_name).text = processed_link + else: + ET.SubElement(namespace_element, tag_name).text = tag_value + +def process_link(link): + if '/' in link: + return link.split('/')[0].replace("-", " ") + return link.split('.html')[0].replace("-", " ") \ No newline at end of file diff --git a/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst b/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst index 995ceb3d29c003..72108b671891e7 100644 --- a/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst +++ b/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst @@ -43,10 +43,10 @@ API Development OpenVINO 2024.4 has introduced the following methods: -- Model.clone() -- Model.getoutputelementtype() -- CompiledModel getProperty() -- CompiledModel.setProperty() +- :ref:`Model.clone() ` +- :ref:`Model.getOutputElementType() ` +- :ref:`CompiledModel.getProperty() ` +- :ref:`CompiledModel.setProperty() ` Additional Resources diff --git a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/CompiledModel.rst b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/CompiledModel.rst index fca58c170210bd..86c6b41fa91ca6 100644 --- a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/CompiledModel.rst +++ b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/CompiledModel.rst @@ -243,6 +243,7 @@ Methods .. rubric:: setProperty + :name: setProperty * diff --git a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Model.rst b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Model.rst index 7f910ac6abd3d0..bd09b8d903d9fb 100644 --- a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Model.rst +++ b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Model.rst @@ -4,6 +4,7 @@ Interface Model .. code-block:: ts interface Model { + clone(): Model; inputs: Output[]; outputs: Output[]; getFriendlyName(): string; @@ -59,6 +60,7 @@ Methods ##################### .. rubric:: clone + :name: clone * @@ -143,6 +145,7 @@ Methods `addon.ts:198 `__ .. rubric:: getOutputElementType + :name: getOutputElementType * diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index ea144ca84d6154..351a6d6c5ea8b9 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -55,7 +55,9 @@ '.md': 'markdown', } -html_baseurl = 'https://docs.openvino.ai/canonical/' + +# html_baseurl = 'https://docs.openvino.ai/2024/' + # -- Sitemap configuration --------------------------------------------------- @@ -72,10 +74,10 @@ ov_sitemap_meta = [ ('coveo:metadata', { 'ovversion': version_name, + 'ovcategory': 'null' }) ] - # ---------------------------------------------------- html_favicon = '_static/favicon.ico' diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 2f91d0a4a8b597..bb3d708a0ca23d 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -16,7 +16,7 @@ wheel>=0.38.1 patchelf<=0.17.2.1 # Frontends -h5py>=3.1.0,<3.12.0 +h5py>=3.1.0,<3.13.0 docopt~=0.6.2 paddlepaddle==2.6.0 tensorflow>=1.15.5,<2.18.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index d9dae251aa64e7..a7e9f895b5334b 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -4,14 +4,14 @@ # flake8: noqa # mypy: ignore-errors +import logging +import torch + from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType -from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape +from openvino.runtime import PartialShape, Type as OVType, OVAny, Shape from openvino.frontend.pytorch.utils import make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const -import torch - -import logging logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/decompositions.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/decompositions.py index 368dbc4cbfa358..eb117f56ab167d 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/decompositions.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/decompositions.py @@ -46,7 +46,9 @@ def convolution_backward( return grad_input, grad_weight, grad_bias + if len(get_decompositions([aten._scaled_dot_product_flash_attention.default])) == 0: + @register_decomposition(aten._scaled_dot_product_flash_attention.default) def scaled_dot_product_flash_attention( query, @@ -101,16 +103,197 @@ def scaled_dot_product_flash_attention( def get_aot_decomposition_list(): - return ([torch.ops.aten._scaled_dot_product_flash_attention.default, - torch.ops.aten._softmax.default, - torch.ops.aten._softmax_backward_data.default, - torch.ops.aten.convolution_backward.default, - torch.ops.aten.gelu_backward.default, - torch.ops.aten.native_group_norm.default, - torch.ops.aten.native_group_norm_backward.default, - torch.ops.aten.native_layer_norm.default, - torch.ops.aten.native_layer_norm_backward.default, - torch.ops.aten.slice_backward.default]) + return [ + torch.ops.aten._scaled_dot_product_flash_attention.default, + torch.ops.aten._softmax.default, + torch.ops.aten._softmax_backward_data.default, + torch.ops.aten.convolution_backward.default, + torch.ops.aten.gelu_backward.default, + torch.ops.aten.native_group_norm.default, + torch.ops.aten.native_group_norm_backward.default, + torch.ops.aten.native_layer_norm.default, + torch.ops.aten.native_layer_norm_backward.default, + torch.ops.aten.slice_backward.default, + ] + def get_inf_decomposition_list(): - return ([torch.ops.aten.nll_loss_forward.default]) + return [torch.ops.aten.nll_loss_forward.default] + + +def get_export_decomposition_list(): + # List of decompositions from torch._decomp.core_aten_decompositions + # removed _backward ops and ops supported without decomposition + decomp = [ + torch.ops.aten.addcdiv, + torch.ops.aten.addcdiv_, + torch.ops.aten.addcmul, + torch.ops.aten.addcmul_, + torch.ops.aten.addr, + torch.ops.aten.affine_grid_generator, + torch.ops.aten.all, + torch.ops.aten.aminmax, + torch.ops.aten.arange.default, + torch.ops.aten.arange.start, + torch.ops.aten.baddbmm, + torch.ops.aten.binary_cross_entropy, + torch.ops.aten.binary_cross_entropy_with_logits, + torch.ops.aten.block_diag, + torch.ops.aten.celu, + torch.ops.aten.celu_, + torch.ops.aten.clamp_max, + torch.ops.aten.clamp_min, + torch.ops.aten.count_nonzero, + torch.ops.aten.linalg_cross, + torch.ops.aten.cudnn_batch_norm, + torch.ops.aten.deg2rad, + torch.ops.aten.deg2rad_, + torch.ops.aten.detach, + torch.ops.aten.diag_embed, + torch.ops.aten.dot, + torch.ops.aten.vdot, + torch.ops.aten.elu, + torch.ops.aten.elu_, + torch.ops.aten._embedding_bag, + torch.ops.aten.empty_like, + torch.ops.aten._euclidean_dist.default, + torch.ops.aten.expand_as, + torch.ops.aten.eye, + torch.ops.aten.fill, + torch.ops.aten.fill_, + torch.ops.aten.floor_divide, + torch.ops.aten.frac, + torch.ops.aten.frac_, + torch.ops.aten._fused_moving_avg_obs_fq_helper, + torch.ops.aten.gelu_, + torch.ops.aten.glu, + torch.ops.aten.hardshrink, + torch.ops.aten.hardsigmoid, + torch.ops.aten.hardsigmoid_, + torch.ops.aten.hardswish, + torch.ops.aten.hardswish_, + torch.ops.aten.hardtanh_, + torch.ops.aten.heaviside, + torch.ops.aten.heaviside_, + torch.ops.aten.huber_loss, + torch.ops.aten.im2col, + torch.ops.aten.index_add, + torch.ops.aten.index_add_, + torch.ops.aten.index_copy, + torch.ops.aten.index_copy_, + torch.ops.aten.index_fill, + torch.ops.aten.index_fill_, + torch.ops.aten.isin, + torch.ops.aten.isneginf, + torch.ops.aten.isposinf, + torch.ops.aten.l1_loss, + torch.ops.aten.leaky_relu_, + torch.ops.aten.lerp, + torch.ops.aten.lerp_, + torch.ops.aten.linspace, + torch.ops.aten.logaddexp, + torch.ops.aten.logaddexp2, + torch.ops.aten.logit, + torch.ops.aten.logit_, + torch.ops.aten.log_sigmoid_forward, + torch.ops.aten.logspace, + torch.ops.aten.logsumexp.default, + torch.ops.aten.masked_fill, + torch.ops.aten.masked_fill_, + torch.ops.aten.mish, + torch.ops.aten.mish_, + torch.ops.aten.mse_loss, + torch.ops.aten.multi_margin_loss, + torch.ops.aten.multilabel_margin_loss_forward, + torch.ops.aten.mv, + torch.ops.aten.mvlgamma, + torch.ops.aten.mvlgamma_, + torch.ops.aten.nansum, + torch.ops.aten.nan_to_num, + torch.ops.aten.nan_to_num_, + torch.ops.aten.narrow, + torch.ops.aten.new_empty, + torch.ops.aten.new_full, + torch.ops.aten.new_ones, + torch.ops.aten.new_zeros, + torch.ops.aten.nll_loss_forward, + torch.ops.aten.norm, + torch.ops.aten.ones, + torch.ops.aten.ones_like, + torch.ops.aten._prelu_kernel, + torch.ops.aten._reshape_alias, + torch.ops.aten.rad2deg, + torch.ops.aten.rad2deg_, + torch.ops.aten.reflection_pad1d, + torch.ops.aten.reflection_pad2d, + torch.ops.aten.reflection_pad3d, + torch.ops.aten.replication_pad1d, + torch.ops.aten.replication_pad2d, + torch.ops.aten.replication_pad3d, + torch.ops.aten.renorm, + torch.ops.aten.renorm_, + torch.ops.aten.resize_as, + torch.ops.aten.roll, + torch.ops.aten.rot90, + torch.ops.aten.rrelu_with_noise, + torch.ops.aten.rrelu_with_noise_, + torch.ops.aten.rsub, + torch.ops.aten.select_scatter, + torch.ops.aten.sgn, + torch.ops.aten.sgn_, + torch.ops.aten.silu, + torch.ops.aten.silu_, + torch.ops.aten.sinc, + torch.ops.aten.sinc_, + torch.ops.aten.smooth_l1_loss, + torch.ops.aten.soft_margin_loss, + torch.ops.aten.softplus, + torch.ops.aten.softshrink, + torch.ops.aten.special_entr, + torch.ops.aten.special_log_ndtr, + torch.ops.aten.special_xlog1py, + torch.ops.aten.split.Tensor, + torch.ops.aten.split_with_sizes_copy, + torch.ops.aten.squeeze.default, + torch.ops.aten.squeeze.dim, + torch.ops.aten.std, + torch.ops.aten.std_mean, + torch.ops.aten.stack, + torch.ops.aten.sum.default, + torch.ops.aten.sum.out, + torch.ops.aten.t, + torch.ops.aten.take, + torch.ops.aten.threshold, + torch.ops.aten.threshold_, + torch.ops.aten.trace, + torch.ops.aten.transpose.int, + torch.ops.aten.tril, + torch.ops.aten.tril_, + torch.ops.aten.triu, + torch.ops.aten.triu_, + torch.ops.aten.unbind, + torch.ops.aten.unfold_copy, + torch.ops.aten._unsafe_index, + torch.ops.aten.unsafe_split.Tensor, + torch.ops.aten.unsafe_split_with_sizes, + torch.ops.aten._unsafe_view, + torch.ops.aten.view_as_complex, + torch.ops.aten.xlogy, + torch.ops.aten.xlogy_, + torch.ops.aten.zero, + torch.ops.aten.zero_, + torch.ops.aten.zeros, + torch.ops.aten.zeros_like, + torch.ops.aten._weight_norm_interface, + ] + try: + from packaging import version + if version.parse(torch.__version__) >= version.parse("2.3"): + decomp += [ + torch.ops.aten._lazy_clone, + torch.ops.aten._test_parallel_materialize, + torch.ops.aten._chunk_cat, + ] + except ImportError: + pass + return decomp diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 28bd6bbd2dfbb0..af8eafda8e9be7 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -73,7 +73,6 @@ def __init__( "https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html." ) from e self.graph_element = pt_module.inlined_graph - log.debug("Inlined graph:\n%s", pt_module.inlined_graph) self.alias_db = self.graph_element.alias_db() else: self.graph_element = graph_element @@ -96,6 +95,7 @@ def __init__( self._transform_tensor_list_constants_to_listconstruct( self.graph_element) self._transform_optional_constants(self.graph_element) + log.debug("Inlined graph:\n%s", self.graph_element) @staticmethod def _get_preserved_attributes(model) -> list: @@ -293,11 +293,13 @@ def decoder_type_name(self) -> str: return "ts" def get_subgraphs(self) -> list: - if self.graph_element.kind() == "prim::PythonOp": + if self.graph_element.kind() in ["prim::PythonOp", "prim::fork"]: if "Subgraph" in self.graph_element.attributeNames(): assert isinstance( self.graph_element, torch.Node), "Graph element must be of type torch.Node." - return [getattr(self.graph_element, self.graph_element.kindOf("Subgraph"))("Subgraph")] + subgraph = getattr(self.graph_element, self.graph_element.kindOf("Subgraph"))("Subgraph") + torch._C._jit_pass_inline(subgraph) + return [subgraph] else: # Attribute "Subgraph" is only available if Graph was created using tracing. # TODO Find way to extract subgraph for scripted Graph. @@ -305,10 +307,17 @@ def get_subgraphs(self) -> list: return list(self.graph_element.blocks()) def get_subgraph_decoder(self, index: int): - decoder = TorchScriptPythonDecoder( - self.pt_module, self.get_subgraphs( - )[index], alias_db=self.alias_db, shared_memory=self._shared_memory, module_extensions=self.module_extensions - ) + module = self.pt_module + if self.graph_element.kind() == "prim::fork": + in0 = self.raw_inputs[0] + if in0.node().kind() == "prim::GetAttr": + module, _ = get_value_from_getattr(in0.node(), self.pt_module) + decoder = TorchScriptPythonDecoder(module, + self.get_subgraphs()[index], + alias_db=self.alias_db, + shared_memory=self._shared_memory, + module_extensions=self.module_extensions + ) self.m_decoders.append(decoder) return decoder @@ -456,8 +465,8 @@ def as_string(self): @staticmethod def _as_constant_list(pt_value: torch.Value): - # For now it is treat a list as a 1D tensor; it is required by converters to avoid need to massively - # rewrite them in that part where constant attributes are queried + # For now we treat a list as a 1D tensor; it is required by converters to avoid + # need to massively rewrite them in that part where constant attributes are queried pt_element_type = str(pt_value.type().getElementType()) ivalue = pt_value.toIValue() is_known_type = pt_element_type in pt_to_ov_type_map @@ -467,6 +476,7 @@ def _as_constant_list(pt_value: torch.Value): ovshape = PartialShape([len(ivalue)]) ov_const = op.Constant(ovtype, ovshape.get_shape(), ivalue) return ov_const.outputs() + return [] def _get_device_string(self) -> str: assert self.graph_element.kind( diff --git a/src/common/snippets/src/utils/tokenization_utils.cpp b/src/common/snippets/src/utils/tokenization_utils.cpp index 700b282f86f4d4..a5f925b5ab7374 100644 --- a/src/common/snippets/src/utils/tokenization_utils.cpp +++ b/src/common/snippets/src/utils/tokenization_utils.cpp @@ -227,44 +227,9 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs // internal output index == external output index - auto& input_body = clones[input_node]; - size_t source_output_index = input_value.get_index(); - auto source_result = input_body->get_results()[source_output_index]; - - // We cannot add new node, that is not Convert, after Convert (that is start node) to avoid arithmetic problems with conversion - // We can add any new node in Subgraph after Convert (bacause after Input) - // Parameter - // | - // Convert - // - // We cannot add new node, that isn't Convert, in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // - // But we can add new Convert in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // Convert - // - // Thus, We can grow subgraph only if Convert is the first node of subgraph and have to abort it's the last one and we want to add not Convert - // We have this limitation because at the moment we support only one execution precision inside body, so - // if there is Convert with input and output data types that aren't equal to supported exec type, - // we can get conversion math errors - const auto output_of_subgraph = source_result->get_input_node_shared_ptr(0); - if (!ov::is_type(node) && ov::is_type(output_of_subgraph)) { - // Also we can add new node after < Parameter -> Convert -> Convert -> Convert > - auto grandparent = output_of_subgraph->get_input_node_ptr(0); - while (ov::is_type(grandparent)) { - grandparent = grandparent->get_input_node_ptr(0); - } - - if (!ov::is_type(grandparent)) { - return abort("Convert supports only as Input and as Result of subgraph. Aborting"); - } - } - // Result op has a single input + const auto& input_body = clones[input_node]; + const size_t source_output_index = input_value.get_index(); + const auto& source_result = input_body->get_results()[source_output_index]; internal_inputs.push_back(source_result->input_value(0)); } else { // We need some non-scalar constants inside Subgraph in the following cases: diff --git a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp index c3459e1d2c6b2a..57ba33903f6197 100644 --- a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp @@ -27,22 +27,14 @@ void CollapseSubgraphTests::run() { }); } -class SKIP_CollapseSubgraphTests : public CollapseSubgraphTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_Eltwise) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_Eltwise) { const auto& f = EltwiseFunction(std::vector {{2, 3}, {1, 3}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_MatMulWithEltwise) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_MatMulWithEltwise) { const auto& f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); model = f.getOriginal(); model_ref = f.getReference(); @@ -56,35 +48,35 @@ TEST_F(CollapseSubgraphTests, smoke_Snippets_AvoidLoopEltwise) { run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_OneConvert) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_OneConvert) { const auto& f = ConvertFunction(std::vector{{2, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertInput) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertInput) { const auto& f = ConvertInputFunction(std::vector{{2, 5}, {1, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertOutput) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertOutput) { const auto& f = ConvertOutputFunction(std::vector{{2, 5}, {1, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertStub) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertStub) { const auto& f = ConvertStubFunction(std::vector{{2, 5, 2}, {1, 5, 1}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertPartialInputsAndResults) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertPartialInputsAndResults) { const auto& f = ConvertPartialInputsAndResultsFunction(std::vector{{2, 5, 1}, {1, 5, 1}, {2, 1, 10}}, std::vector{ov::element::i8, ov::element::bf16, ov::element::f32}, std::vector{ov::element::f32, ov::element::i8}); diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index 3a97b94925a2c0..c5932ed690d670 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -14,14 +14,6 @@ namespace ov { namespace test { namespace snippets { -class SKIP_TokenizeMHASnippetsTests : public TokenizeMHASnippetsTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - void TokenizeMHASnippetsTests::run() { ASSERT_TRUE(model); manager.register_pass(); @@ -103,8 +95,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_MatMul0_Transpose) { run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_with_MatMul0_Transpose_Dynamic) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_MatMul0_Transpose_Dynamic) { const auto &f = MHAMatMul0TransposeFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), false); @@ -113,8 +104,7 @@ TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_with_M run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-114607 */, smoke_Snippets_MHA_with_int_Matmuls) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_int_Matmuls) { const auto &f = MHAINT8MatMulTypeRelaxedFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); model = f.getOriginal(); model_ref = f.getReference(); @@ -128,8 +118,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Transpose_extraction) { run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_Dynamic_Transpose_extraction) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Dynamic_Transpose_extraction) { const auto& f = MHATransposedInputFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, true); model = f.getOriginal(); model_ref = f.getReference(); @@ -144,8 +133,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Transpose_extraction_and_uns run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_Dynamic_Transpose_extraction_and_unsupported_existing_transpose) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Dynamic_Transpose_extraction_and_unsupported_existing_transpose) { const auto& f = MHATransposedInputFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, true, std::vector{0, 3, 1, 2}); model = f.getOriginal(); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 3cf542377d5adc..282fc69486b923 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -143,6 +143,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr // In particular, if zero dim tensor is consumed in body of MultiSubGraphOp // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults should be called together. using namespace ov::pass; + REGISTER_PASS(manager, EliminateConvert) REGISTER_PASS(manager, EliminateScatterUpdate) REGISTER_PASS(manager, RemoveConcatZeroDimInput) REGISTER_PASS(manager, EliminateLoopInputsOutputs); diff --git a/src/common/transformations/tests/common_optimizations/moc_transformations.cpp b/src/common/transformations/tests/common_optimizations/moc_transformations.cpp index d054605fba726e..32cd330ca9ab75 100644 --- a/src/common/transformations/tests/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/tests/common_optimizations/moc_transformations.cpp @@ -39,6 +39,22 @@ TEST(TransformationTests, TestModelTensorsConsistencyUseShapesTrue) { EXPECT_TRUE(model->outputs()[0].get_names() == new_tensors); } +TEST(TransformationTests, MOCConvertElimination) { + auto input = std::make_shared(element::f32, Shape{1}); + auto const_val = opset12::Constant::create(element::f32, Shape{1}, {2}); + + auto add1 = std::make_shared(input, const_val); + auto convert_fp32 = std::make_shared(const_val, element::f32); + auto mul = std::make_shared(add1, convert_fp32); + + auto model = std::make_shared(NodeVector{mul}, ParameterVector{input}); + ov::pass::Manager m; + m.register_pass(false); + m.run_passes(model); + + EXPECT_EQ(count_ops_of_type(model), 1); +} + TEST(TransformationTests, TestModelTensorsConsistencyUseShapesFalse) { auto input = std::make_shared(element::f32, Shape{1}); auto const1 = opset12::Constant::create(element::f32, Shape{1}, {1}); diff --git a/src/core/include/openvino/op/ops.hpp b/src/core/include/openvino/op/ops.hpp index 9ba694963248ad..135167fe11ed5f 100644 --- a/src/core/include/openvino/op/ops.hpp +++ b/src/core/include/openvino/op/ops.hpp @@ -169,6 +169,7 @@ #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/scatter_nd_update.hpp" #include "openvino/op/scatter_update.hpp" +#include "openvino/op/search_sorted.hpp" #include "openvino/op/select.hpp" #include "openvino/op/selu.hpp" #include "openvino/op/shape_of.hpp" diff --git a/src/core/include/openvino/op/search_sorted.hpp b/src/core/include/openvino/op/search_sorted.hpp new file mode 100644 index 00000000000000..78650942ee8f0f --- /dev/null +++ b/src/core/include/openvino/op/search_sorted.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace v15 { +/// \brief SearchSorted operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API SearchSorted : public Op { +public: + OPENVINO_OP("SearchSorted", "opset15", Op); + + SearchSorted() = default; + /// \brief Constructs a SearchSorted operation. + /// \param sorted_sequence Sorted sequence to search in. + /// \param values Values to search indexs for. + /// \param right_mode If False, return the first suitable index that is found for given value. If True, return + /// the last such index. + SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode = false); + + void validate_and_infer_types() override; + bool visit_attributes(AttributeVisitor& visitor) override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + bool get_right_mode() const { + return m_right_mode; + } + + void set_right_mode(bool right_mode) { + m_right_mode = right_mode; + } + + bool validate() const; + +private: + bool m_right_mode{}; +}; +} // namespace v15 +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/op/sink.hpp b/src/core/include/openvino/op/sink.hpp index 4bba7968877471..7cd3c1c2bbbc2d 100644 --- a/src/core/include/openvino/op/sink.hpp +++ b/src/core/include/openvino/op/sink.hpp @@ -15,7 +15,7 @@ namespace op { class OPENVINO_API Sink : public Op { public: ~Sink() override = 0; - OPENVINO_OP("Sink"); + OPENVINO_OP("Sink", "util", Op); protected: Sink() : Op() {} diff --git a/src/core/include/openvino/op/util/convolution_backprop_base.hpp b/src/core/include/openvino/op/util/convolution_backprop_base.hpp index 9a24dc4edddaa2..0dcfe83079efbc 100644 --- a/src/core/include/openvino/op/util/convolution_backprop_base.hpp +++ b/src/core/include/openvino/op/util/convolution_backprop_base.hpp @@ -12,7 +12,7 @@ namespace util { /// \brief Base class for operations like back propagation convolution class OPENVINO_API ConvolutionBackPropBase : public ConvolutionBase { public: - OPENVINO_OP("ConvolutionBackPropBase", "util"); + OPENVINO_OP("ConvolutionBackPropBase", "util", ConvolutionBase); /// \brief Constructs a conversion operation. ConvolutionBackPropBase() = default; diff --git a/src/core/include/openvino/op/util/convolution_base.hpp b/src/core/include/openvino/op/util/convolution_base.hpp index f73d05182e8159..c58d1add2cd5b6 100644 --- a/src/core/include/openvino/op/util/convolution_base.hpp +++ b/src/core/include/openvino/op/util/convolution_base.hpp @@ -106,7 +106,7 @@ class OPENVINO_API ConvolutionBase : public Op { /// \brief Base class for operations like back propagation convolution class OPENVINO_API ConvolutionFwdPropBase : public ConvolutionBase { public: - OPENVINO_OP("ConvolutionFwdPropBase", "util"); + OPENVINO_OP("ConvolutionFwdPropBase", "util", ConvolutionBase); /// \brief Constructs a conversion operation. ConvolutionFwdPropBase() = default; diff --git a/src/core/include/openvino/op/util/deformable_convolution_base.hpp b/src/core/include/openvino/op/util/deformable_convolution_base.hpp index 8773b989885bba..788b0a9761f477 100644 --- a/src/core/include/openvino/op/util/deformable_convolution_base.hpp +++ b/src/core/include/openvino/op/util/deformable_convolution_base.hpp @@ -16,7 +16,7 @@ namespace util { /// v8. class OPENVINO_API DeformableConvolutionBase : public util::ConvolutionBase { public: - OPENVINO_OP("DeformableConvolutionBase", "util"); + OPENVINO_OP("DeformableConvolutionBase", "util", util::ConvolutionBase); /// \brief Constructs a conversion operation. DeformableConvolutionBase() = default; diff --git a/src/core/include/openvino/op/util/framework_node.hpp b/src/core/include/openvino/op/util/framework_node.hpp index 258e249e61071a..859843ff006701 100644 --- a/src/core/include/openvino/op/util/framework_node.hpp +++ b/src/core/include/openvino/op/util/framework_node.hpp @@ -75,7 +75,7 @@ class OPENVINO_API FrameworkNodeAttrs { class OPENVINO_API FrameworkNode : public MultiSubGraphOp { public: - OPENVINO_OP("FrameworkNode", "util"); + OPENVINO_OP("FrameworkNode", "util", MultiSubGraphOp); FrameworkNode() = default; diff --git a/src/core/include/openvino/op/util/multi_subgraph_base.hpp b/src/core/include/openvino/op/util/multi_subgraph_base.hpp index 749d82c0bbf3f0..9e81ac87579107 100644 --- a/src/core/include/openvino/op/util/multi_subgraph_base.hpp +++ b/src/core/include/openvino/op/util/multi_subgraph_base.hpp @@ -17,7 +17,7 @@ namespace util { /// class OPENVINO_API MultiSubGraphOp : public ov::op::Sink { public: - OPENVINO_OP("MultiSubGraphOp", "util"); + OPENVINO_OP("MultiSubGraphOp", "util", ov::op::Sink); /// \brief Abstract class describes a connection between a MultiSubGraphOp input and /// the body. class InputDescription { diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt index 791489b6e5171c..f7874964233cf5 100644 --- a/src/core/reference/CMakeLists.txt +++ b/src/core/reference/CMakeLists.txt @@ -21,6 +21,18 @@ add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS}) add_library(openvino::reference ALIAS ${TARGET_NAME}) set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME reference) +if(ENABLE_AVX2) + ov_avx2_optimization_flags(avx2_flags) + + set(OV_REFERENCE_X86_AVX2_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/src/op/convert_x86_intrinsics.cpp + ) + set_source_files_properties(${OV_REFERENCE_X86_AVX2_SRC} PROPERTIES COMPILE_OPTIONS "${avx2_flags}" + SKIP_UNITY_BUILD_INCLUSION ON + SKIP_PRECOMPILE_HEADERS ON) + target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_AVX2) +endif() + ov_build_target_faster(${TARGET_NAME} UNITY PCH PRIVATE "src/precomp.hpp") diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp index 926af31dbf5130..efb1ec1ca21415 100644 --- a/src/core/reference/include/openvino/reference/convert.hpp +++ b/src/core/reference/include/openvino/reference/convert.hpp @@ -14,9 +14,11 @@ #include "openvino/core/type/nf4.hpp" #if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)) -# define OV_CORE_USE_XBYAK_JIT 1 -#else -# define OV_CORE_USE_XBYAK_JIT 0 +# define OV_CORE_USE_XBYAK_JIT +#endif + +#if defined(OS_CHROMEOS) && defined(OPENVINO_ARCH_X86_64) && defined(HAVE_AVX2) +# define OV_CORE_USE_INTRINSICS #endif namespace ov { @@ -33,12 +35,12 @@ namespace reference { namespace detail { template -typename std::enable_if::value, TO>::type convert(const TI v) { +constexpr typename std::enable_if::value, TO>::type convert(const TI v) { return static_cast(v); } template -typename std::enable_if::value, TO>::type convert(const TI v) { +constexpr typename std::enable_if::value, TO>::type convert(const TI v) { return static_cast(static_cast(v)); } } // namespace detail @@ -62,8 +64,6 @@ void convert(const TI* arg, TO* out, const size_t count) { std::transform(arg, arg + count, out, detail::convert); } -#if OV_CORE_USE_XBYAK_JIT - template <> void convert(const uint8_t* arg, float16* out, size_t count); template <> @@ -79,8 +79,6 @@ void convert(const bfloat16* arg, float16* out, size_t count) template <> void convert(const bfloat16* arg, float* out, size_t count); -#endif // OV_CORE_USE_XBYAK_JIT - template <> void convert(const int32_t* arg, float16* out, size_t count); diff --git a/src/core/reference/include/openvino/reference/utils/convert_util.hpp b/src/core/reference/include/openvino/reference/utils/convert_util.hpp new file mode 100644 index 00000000000000..3be10c9fd19fbb --- /dev/null +++ b/src/core/reference/include/openvino/reference/utils/convert_util.hpp @@ -0,0 +1,88 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/reference/convert.hpp" + +namespace ov { + +// forward declare from inference dev API (cannot be included) +extern bool with_cpu_x86_avx2(); + +namespace reference { + +struct NoClamp { + static constexpr bool enabled = false; + + // Generic implementation + template + static constexpr T apply(const T v) { + return v; + } + + // Specialize for optimization + template + static R apply(const T v); +}; + +template +struct Clamp { + static constexpr bool enabled = true; + + // Generic implementation + static constexpr TO apply(const TI v) { + return (v < std::numeric_limits::lowest()) + ? std::numeric_limits::lowest() + : ((v > std::numeric_limits::max()) ? std::numeric_limits::max() + : detail::convert(v)); + } + + // Specialize for optimization + template + static R apply(const T v); +}; + +template +struct Converter { + static constexpr size_t vec_f32_size = 32 / sizeof(float); + + // Generic implementation to convert tail elements + template + static void tail(const TI* in, TO* out, size_t n) { + std::transform(in, in + n, out, [](const TI v) { + return detail::convert(ClampMode::apply(v)); + }); + } + + // Helper struct to defined optimized version of conversion + template + struct Optimized { + static constexpr bool enabled = false; + static void run(const TI* in, TO* out) {} + }; + + // Generic implementation of conversion + template ::enabled>::type* = nullptr> + static void apply(const TI* in, TO* out, size_t n) { + return tail(in, out, n); + } + + // Enabled when Optimized struct specialized defined for optimization + template ::enabled>::type* = nullptr> + static void apply(const TI* in, TO* out, size_t n) { + if (with_cpu_x86_avx2()) { + for (; n >= vec_f32_size; n -= vec_f32_size, in += vec_f32_size, out += vec_f32_size) { + Optimized::run(in, out); + } + } + tail(in, out, n); + } +}; + +} // namespace reference +} // namespace ov diff --git a/src/core/reference/include/openvino/reference/utils/convert_x86_intrinsics.hpp b/src/core/reference/include/openvino/reference/utils/convert_x86_intrinsics.hpp new file mode 100644 index 00000000000000..1716f05ba6ac78 --- /dev/null +++ b/src/core/reference/include/openvino/reference/utils/convert_x86_intrinsics.hpp @@ -0,0 +1,87 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#ifdef OV_CORE_USE_INTRINSICS +# include + +# include "openvino/reference/utils/convert_util.hpp" + +namespace ov { +namespace reference { +# ifdef HAVE_AVX2 + +// Clamp optimized specializations +template <> +__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32); + +template <> +template <> +__m256 Clamp::apply<__m256, __m256>(const __m256 vec_f32); + +// Conversion optimized specializations +// --- f32 -> other +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const float* in, float16* out); +}; + +template <> +template <> +struct Converter::Optimized> { + static constexpr bool enabled = true; + static void run(const float* in, float16* out); +}; + +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const float* in, int8_t* out); +}; + +// --- f16 -> other +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const float16* in, float* out); +}; + +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const float16* in, int8_t* out); +}; + +// --- bf16 -> other +template <> +template <> +struct Converter::Optimized> { + static constexpr bool enabled = true; + static void run(const bfloat16* in, float16* out); +}; + +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const bfloat16* in, float* out); +}; + +// --- u8 -> other +template <> +template <> +struct Converter::Optimized { + static constexpr bool enabled = true; + static void run(const uint8_t* in, float16* out); +}; +# endif // HAVE_AVX2 +} // namespace reference +} // namespace ov +#endif diff --git a/src/core/reference/src/op/convert.cpp b/src/core/reference/src/op/convert.cpp index 18ad7a6f4717e5..5054121b5615c0 100644 --- a/src/core/reference/src/op/convert.cpp +++ b/src/core/reference/src/op/convert.cpp @@ -4,18 +4,23 @@ #include "openvino/reference/convert.hpp" -#if OV_CORE_USE_XBYAK_JIT +#include "openvino/reference/utils/convert_util.hpp" + +#ifdef OV_CORE_USE_XBYAK_JIT # include "jit_generator.hpp" +#endif -using namespace ov::runtime; -#endif // OV_CORE_USE_XBYAK_JIT +#ifdef OV_CORE_USE_INTRINSICS +# include "openvino/reference/utils/convert_x86_intrinsics.hpp" +#endif namespace ov { namespace reference { -#if OV_CORE_USE_XBYAK_JIT + namespace { +#ifdef OV_CORE_USE_XBYAK_JIT template -void jit_convert_vec(jit::Generator&, const Xbyak::RegExp&, const Xbyak::RegExp&); +void jit_convert_vec(jit::Generator&, const Xbyak::RegExp&, const Xbyak::RegExp&) {} template void jit_convert_vec_prepare(jit::Generator&) {} @@ -265,40 +270,6 @@ class jit_convert_array : public jit::Generator { } }; -template -void convert_impl(const TI* arg, TO* out, size_t count) { - auto converter = jit_convert_array::get(); - - if (converter) { - jit_convert_array::args_t args = {arg, out, count}; - converter(&args); - } else { - for (size_t i = 0; i < count; ++i) { - out[i] = static_cast(arg[i]); - } - } -} - -template <> -void convert_impl(const float* arg, float16* out, size_t count) { - auto converter = jit_convert_array::get(); - - if (converter) { - jit_convert_array::args_t args = {arg, out, count}; - converter(&args); - } else { - for (size_t i = 0; i < count; ++i) { - if (arg[i] > std::numeric_limits::max()) { - out[i] = std::numeric_limits::max(); - } else if (arg[i] < std::numeric_limits::lowest()) { - out[i] = std::numeric_limits::lowest(); - } else { - out[i] = static_cast(arg[i]); - } - } - } -} - template void jit_count_out_of_range_vec_prepare(jit::Generator&) {} @@ -504,114 +475,88 @@ class jit_count_out_of_range : public jit::Generator { } }; +#endif // OV_CORE_USE_XBYAK_JIT + +template +void convert_impl(const TI* arg, TO* out, size_t count) { +#ifdef OV_CORE_USE_XBYAK_JIT + if (auto converter = jit_convert_array::get()) { + jit_convert_array::args_t args = {arg, out, count}; + converter(&args); + } else +#endif + { + Converter::template apply(arg, out, count); + } +} } // namespace template <> void convert(const uint8_t* arg, float16* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const float16* arg, float* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const float* arg, float16* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const float* arg, int8_t* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const float16* arg, int8_t* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const bfloat16* arg, float16* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } template <> void convert(const bfloat16* arg, float* out, size_t count) { - convert_impl(arg, out, count); + convert_impl(arg, out, count); } -#endif // OV_CORE_USE_XBYAK_JIT - void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t count) { -#if OV_CORE_USE_XBYAK_JIT - convert_impl(arg, out, count); -#else - // FIXME CVS-125496: duplicate and stub for ARM, provide optimized solution - for (size_t i = 0; i < count; ++i) { - if (arg[i] > std::numeric_limits::max()) { - out[i] = std::numeric_limits::max(); - } else if (arg[i] < std::numeric_limits::lowest()) { - out[i] = std::numeric_limits::lowest(); - } else { - out[i] = static_cast(arg[i]); - } - } -#endif // OV_CORE_USE_XBYAK_JIT + convert_impl>(arg, out, count); } template <> void convert(const int32_t* arg, float16* out, size_t count) { - for (size_t i = 0; i < count; ++i) { - if (arg[i] > std::numeric_limits::max()) { - out[i] = std::numeric_limits::max(); - } else if (arg[i] < std::numeric_limits::lowest()) { - out[i] = std::numeric_limits::lowest(); - } else { - out[i] = static_cast(arg[i]); - } - } + Converter::apply>(arg, out, count); } void convert_from_bf16_to_f16_with_clamp(const bfloat16* arg, float16* out, size_t count) { -#if OV_CORE_USE_XBYAK_JIT - convert_impl(arg, out, count); -#else + // can re-use Clamp as bf16 is converted to float before clamping + using clamp_bf16_f16 = Clamp; + convert_impl(arg, out, count); // FIXME CVS-125496: duplicate and stub for ARM, provide optimized solution - for (size_t i = 0; i < count; ++i) { - if (arg[i] > std::numeric_limits::max()) { - out[i] = std::numeric_limits::max(); - } else if (arg[i] < std::numeric_limits::lowest()) { - out[i] = std::numeric_limits::lowest(); - } else { - out[i] = static_cast(arg[i]); - } - } -#endif // OV_CORE_USE_XBYAK_JIT } size_t count_out_of_f16_range(const float* arg, size_t count) { - size_t num_out_of_range = 0; - -#if OV_CORE_USE_XBYAK_JIT - auto converter = jit_count_out_of_range::get(); - if (converter) { +#ifdef OV_CORE_USE_XBYAK_JIT + if (auto converter = jit_count_out_of_range::get()) { + size_t num_out_of_range = 0; jit_count_out_of_range::args_t args = {arg, &num_out_of_range, count}; converter(&args); return num_out_of_range; } #endif // OV_CORE_USE_XBYAK_JIT - for (size_t i = 0; i < count; ++i) { - // if abs value is smaller than the smallest positive fp16, but not zero - if (std::abs(arg[i]) < ov::float16::from_bits(0x0001) && arg[i] != 0.0f) { - num_out_of_range++; - } else if (arg[i] > std::numeric_limits::max()) { - num_out_of_range++; - } else if (arg[i] < std::numeric_limits::lowest()) { - num_out_of_range++; - } - } - return num_out_of_range; + const auto is_out_of_f16_range = [](const float v) { + return (std::abs(v) < float16::from_bits(0x0001) && v != 0.0f) || (v > std::numeric_limits::max()) || + (v < std::numeric_limits::lowest()); + }; + + return std::count_if(arg, arg + count, is_out_of_f16_range); } } // namespace reference diff --git a/src/core/reference/src/op/convert_x86_intrinsics.cpp b/src/core/reference/src/op/convert_x86_intrinsics.cpp new file mode 100644 index 00000000000000..78957feaaef3c7 --- /dev/null +++ b/src/core/reference/src/op/convert_x86_intrinsics.cpp @@ -0,0 +1,96 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/reference/convert.hpp" +#if defined(OV_CORE_USE_INTRINSICS) +# include "openvino/reference/utils/convert_x86_intrinsics.hpp" + +namespace ov { +namespace reference { + +# if defined(HAVE_AVX2) +template <> +__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32) { + // clang-format off + static const auto shuffle = _mm256_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1); + // clang-format on + + const auto t = _mm256_shuffle_epi8(vec_i32, shuffle); + const auto low = _mm256_castsi256_si128(t); + const auto high = _mm256_extracti128_si256(t, 1); + return _mm_or_si128(low, high); +} + +template <> +template <> +__m256 Clamp::apply<__m256, __m256>(const __m256 vec_f32) { + static const auto lo = _mm256_set1_ps(std::numeric_limits::lowest()); + static const auto hi = _mm256_set1_ps(std::numeric_limits::max()); + + return _mm256_min_ps(_mm256_max_ps(vec_f32, lo), hi); +} + +// --- f32 -> other +void Converter::Optimized>::run(const float* in, float16* out) { + auto vec_f32 = _mm256_loadu_ps(in); // load f32 input + auto vec_f16 = _mm256_cvtps_ph(Clamp::apply<__m256, __m256>(vec_f32), 0); // f32 -> f16 with clamp + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), vec_f16); // store f16 output +} + +void Converter::Optimized::run(const float* in, float16* out) { + auto vec_f32 = _mm256_loadu_ps(in); // load f32 input + auto vec_f16 = _mm256_cvtps_ph(vec_f32, _MM_ROUND_NEAREST); // f32 -> f16 + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), vec_f16); // store f16 output +} + +void Converter::Optimized::run(const float* in, int8_t* out) { + auto vec_f32 = _mm256_load_ps(in); // load f32 input + auto vec_i32 = _mm256_cvttps_epi32(vec_f32); // f32 -> i32 + auto vec_i8 = NoClamp::template apply<__m256i, __m128i>(vec_i32); // i32 -> i8 no clamping + _mm_storeu_si64(out, vec_i8); // store i8 output +} + +// --- f16 -> other +void Converter::Optimized::run(const float16* in, float* out) { + auto vec_f16 = _mm_loadu_si128(reinterpret_cast(in)); // load input as f16 vector + auto vec_f32 = _mm256_cvtph_ps(vec_f16); // convert f16 -> f32 + _mm256_storeu_ps(out, vec_f32); // store f32 in output +} + +void Converter::Optimized::run(const float16* in, int8_t* out) { + const auto vec_f16 = _mm_loadu_si128(reinterpret_cast(in)); // load input as f16 vector + const auto vec_f32 = _mm256_cvtph_ps(vec_f16); // convert f16 -> f32 + auto vec_i32 = _mm256_cvttps_epi32(vec_f32); // f32 -> i32 + auto vec_i8 = NoClamp::apply<__m256i, __m128i>(vec_i32); // i32 -> i8 no clamp + _mm_storeu_si64(out, vec_i8); // store i8 output +} + +// --- bf16 -> other +void Converter::Optimized>::run(const bfloat16* in, float16* out) { + auto vec_bf16 = _mm256_cvtepu16_epi32(*reinterpret_cast(in)); // expand to 32-bits + auto vec_f32 = _mm256_castsi256_ps(_mm256_slli_epi32(vec_bf16, 16)); // shift left bf16 -> f32 + auto vec_f16 = + _mm256_cvtps_ph(Clamp::apply<__m256, __m256>(vec_f32), _MM_ROUND_NEAREST); // f32 -> f16 + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), vec_f16); // store f16 +} + +void Converter::Optimized::run(const bfloat16* in, float* out) { + auto vec_f32 = _mm256_cvtepu16_epi32(*reinterpret_cast(in)); // expand to 32-bits + vec_f32 = _mm256_slli_epi32(vec_f32, 16); // shift left bf16 -> f32 + _mm256_storeu_ps(out, _mm256_castsi256_ps(vec_f32)); // store f32 in output +} + +// --- u8 -> other +void Converter::Optimized::run(const uint8_t* in, float16* out) { + auto i64 = _mm_loadu_si64(in); // load u8 input + auto vec_i32 = _mm256_cvtepu8_epi32(i64); // u8 -> i32 + auto vec_f32 = _mm256_cvtepi32_ps(vec_i32); // i32 -> f32 + auto vec_f16 = _mm256_cvtps_ph(vec_f32, _MM_ROUND_NEAREST); // f32 -> f16 + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), vec_f16); // store f16 output +} +# endif // HAVE_AVX2 +} // namespace reference +} // namespace ov +#endif // OV_CORE_USE_INTRINSICS diff --git a/src/core/reference/src/op/jit_generator.cpp b/src/core/reference/src/op/jit_generator.cpp index d516d210e71967..7d7da06d5da8d5 100644 --- a/src/core/reference/src/op/jit_generator.cpp +++ b/src/core/reference/src/op/jit_generator.cpp @@ -16,7 +16,7 @@ # include "openvino/core/type/float16.hpp" namespace ov { -namespace runtime { +namespace reference { namespace jit { using namespace Xbyak; @@ -191,7 +191,7 @@ void Generator::copy(const Xbyak::Reg64& dst, const Xbyak::Reg64& src, copy(dst, src, size); } } // namespace jit -} // namespace runtime +} // namespace reference } // namespace ov #endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64 diff --git a/src/core/reference/src/op/jit_generator.hpp b/src/core/reference/src/op/jit_generator.hpp index 924d582d8bc8d4..b4b9cd7a60c23f 100644 --- a/src/core/reference/src/op/jit_generator.hpp +++ b/src/core/reference/src/op/jit_generator.hpp @@ -5,90 +5,87 @@ #pragma once #if defined _WIN32 && !defined NOMINMAX -#define NOMINMAX +# define NOMINMAX #endif -#include #include +#include + namespace ov { -namespace runtime { +namespace reference { namespace jit { #ifdef XBYAK64 - static const Xbyak::Operand::Code abi_save_gpr_regs[] = { - Xbyak::Operand::RBX, - Xbyak::Operand::RBP, - Xbyak::Operand::R12, - Xbyak::Operand::R13, - Xbyak::Operand::R14, - Xbyak::Operand::R15, -#ifdef _WIN32 - Xbyak::Operand::RDI, - Xbyak::Operand::RSI, -#endif - }; +static const Xbyak::Operand::Code abi_save_gpr_regs[] = { + Xbyak::Operand::RBX, + Xbyak::Operand::RBP, + Xbyak::Operand::R12, + Xbyak::Operand::R13, + Xbyak::Operand::R14, + Xbyak::Operand::R15, +# ifdef _WIN32 + Xbyak::Operand::RDI, + Xbyak::Operand::RSI, +# endif +}; -#ifdef _WIN32 -#define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX -#else -#define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI -#endif +# ifdef _WIN32 +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX +# else +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI +# endif #endif // XBYAK64 - class Generator : public Xbyak::CodeGenerator - { - static constexpr size_t xmm_len = 16; +class Generator : public Xbyak::CodeGenerator { + static constexpr size_t xmm_len = 16; #ifdef _WIN32 - static constexpr size_t xmm_to_preserve_start = 6; - static constexpr size_t xmm_to_preserve = 10; + static constexpr size_t xmm_to_preserve_start = 6; + static constexpr size_t xmm_to_preserve = 10; #else - static constexpr size_t xmm_to_preserve_start = 0; - static constexpr size_t xmm_to_preserve = 0; + static constexpr size_t xmm_to_preserve_start = 0; + static constexpr size_t xmm_to_preserve = 0; #endif - static const size_t num_abi_save_gpr_regs = sizeof(abi_save_gpr_regs) / sizeof(abi_save_gpr_regs[0]); - const size_t size_of_abi_save_regs; - - const Xbyak::Reg64 reg_EVEX_max_8b_offt; - static constexpr int EVEX_max_8b_offt = 0x200; - - public: - const Xbyak::Reg64 param = abi_param1; - - typedef enum - { - isa_any, - sse42, - avx, - avx2, - avx512_common, - avx512_core, - avx512_core_vnni, - avx512_mic, - avx512_mic_4ops, - avx512_core_bf16, - avx512_vpopcnt, - fp16 - } cpu_isa_t; - - static bool mayiuse(const cpu_isa_t cpu_isa); - static bool is_x64(); - - Generator(void* code_ptr = nullptr, size_t code_size = 16 * 1024); - void preamble(); - void postamble(); - - void foreach (const Xbyak::Reg64& idx, - size_t step, - const Xbyak::Reg64& end, - std::function && fn); - - template - void copy(const Xbyak::Reg64& dst, - const Xbyak::Reg64& src, - const Xbyak::Reg64& size); - }; - } - } - } // namespace ov + static const size_t num_abi_save_gpr_regs = sizeof(abi_save_gpr_regs) / sizeof(abi_save_gpr_regs[0]); + const size_t size_of_abi_save_regs; + + const Xbyak::Reg64 reg_EVEX_max_8b_offt; + static constexpr int EVEX_max_8b_offt = 0x200; + +public: + const Xbyak::Reg64 param = abi_param1; + + typedef enum { + isa_any, + sse42, + avx, + avx2, + avx512_common, + avx512_core, + avx512_core_vnni, + avx512_mic, + avx512_mic_4ops, + avx512_core_bf16, + avx512_vpopcnt, + fp16 + } cpu_isa_t; + + static bool mayiuse(const cpu_isa_t cpu_isa); + static bool is_x64(); + + Generator(void* code_ptr = nullptr, size_t code_size = 16 * 1024); + void preamble(); + void postamble(); + + void foreach (const Xbyak::Reg64& idx, + size_t step, + const Xbyak::Reg64& end, + std::function && fn); + + template + void copy(const Xbyak::Reg64& dst, const Xbyak::Reg64& src, const Xbyak::Reg64& size); +}; +} // namespace jit +} // namespace reference +} // namespace ov diff --git a/src/core/shape_inference/include/search_sorted_shape_inference.hpp b/src/core/shape_inference/include/search_sorted_shape_inference.hpp new file mode 100644 index 00000000000000..da417f54121ee4 --- /dev/null +++ b/src/core/shape_inference/include/search_sorted_shape_inference.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/search_sorted.hpp" +#include "utils.hpp" + +namespace ov { +namespace op { +namespace v15 { +template > +std::vector shape_infer(const SearchSorted* op, const std::vector& input_shapes) { + // [HACK]: By convention, shape_infer should also perform node validation.. + op->validate(); + const auto& sorted_shape = input_shapes[0]; + const auto& values_shape = input_shapes[1]; + auto output_shape = values_shape; + TShape::merge_into(output_shape, sorted_shape); + + if (output_shape.rank().is_static()) { + auto last_it = output_shape.end() - 1; + if (values_shape.rank().is_static()) { + *last_it = *(input_shapes[1].end() - 1); + } else { + *last_it = Dimension::dynamic(); + } + } + + return {std::move(output_shape)}; +} +} // namespace v15 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp new file mode 100644 index 00000000000000..df179d925d054a --- /dev/null +++ b/src/core/src/op/search_sorted.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "search_sorted_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v15 { + +SearchSorted::SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode) + : Op({sorted_sequence, values}), + m_right_mode(right_mode) { + constructor_validate_and_infer_types(); +} + +bool SearchSorted::validate() const { + NODE_VALIDATION_CHECK(this, get_input_size() == 2); + NODE_VALIDATION_CHECK(this, + get_input_element_type(0) == get_input_element_type(1), + "Sorted sequence and values must have the same element type."); + + const auto& sorted_shape = get_input_partial_shape(0); + const auto& values_shape = get_input_partial_shape(1); + + if (sorted_shape.rank().is_static() && values_shape.rank().is_static() && sorted_shape.rank().get_length() > 1) { + NODE_VALIDATION_CHECK(this, + sorted_shape.rank().get_length() == values_shape.rank().get_length(), + "Sorted sequence and values have different ranks."); + + for (int64_t i = 0; i < sorted_shape.rank().get_length() - 1; ++i) { + NODE_VALIDATION_CHECK(this, + sorted_shape[i].compatible(values_shape[i]), + "Sorted sequence and values has different ", + i, + " dimension."); + } + } + + return true; +} + +void SearchSorted::validate_and_infer_types() { + OV_OP_SCOPE(v15_SearchSorted_validate_and_infer_types); + const auto& output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); + set_output_type(0, ov::element::i64, output_shapes[0]); +} + +bool SearchSorted::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v15_SearchSorted_visit_attributes); + visitor.on_attribute("right_mode", m_right_mode); + return true; +} + +std::shared_ptr SearchSorted::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v15_SearchSorted_clone_with_new_inputs); + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode()); +} +} // namespace v15 +} // namespace op +} // namespace ov \ No newline at end of file diff --git a/src/core/tests/type_prop/search_sorted.cpp b/src/core/tests/type_prop/search_sorted.cpp new file mode 100644 index 00000000000000..12af514038ec24 --- /dev/null +++ b/src/core/tests/type_prop/search_sorted.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/search_sorted.hpp" + +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/type_prop.hpp" + +using namespace std; +using namespace ov; + +#define EXPECT_THROW_SUBSTRING(SORTED, VALUES, SUBSTRING) \ + OV_EXPECT_THROW_HAS_SUBSTRING(std::ignore = make_shared(SORTED, VALUES), \ + NodeValidationFailure, \ + SUBSTRING); + +static void PerformShapeTest(const PartialShape& sorted_shape, + const PartialShape& values_shape, + const PartialShape& expected_output_shape) { + auto sorted = make_shared(element::i32, sorted_shape); + auto values = make_shared(element::i32, values_shape); + auto search_sorted_op = make_shared(sorted, values); + EXPECT_EQ(search_sorted_op->get_element_type(), element::i64); + EXPECT_EQ(search_sorted_op->get_output_partial_shape(0), expected_output_shape); +} + +TEST(type_prop, search_sorted_shape_infer_equal_inputs) { + PerformShapeTest({1, 3, 6}, {1, 3, 6}, {1, 3, 6}); +} + +TEST(type_prop, search_sorted_shape_infer_sorted_dynamic) { + PerformShapeTest(PartialShape::dynamic(), {1, 3, 6}, {1, 3, 6}); +} + +TEST(type_prop, search_sorted_shape_infer_values_dynamic) { + PerformShapeTest({1, 3, 7, 5}, PartialShape::dynamic(), {1, 3, 7, -1}); +} + +TEST(type_prop, search_sorted_shape_infer_different_last_dim) { + PerformShapeTest({1, 3, 7, 100}, {1, 3, 7, 10}, {1, 3, 7, 10}); +} + +TEST(type_prop, search_sorted_shape_infer_sorted_1d) { + PerformShapeTest({5}, {2, 3}, {2, 3}); +} + +TEST(type_prop, search_sorted_shape_infer_sorted_and_values_1d) { + PerformShapeTest({5}, {20}, {20}); +} + +TEST(type_prop, search_sorted_shape_infer_sorted_1d_values_dynamic) { + PerformShapeTest({8}, {-1, -1, 3}, {-1, -1, 3}); +} + +TEST(type_prop, search_sorted_shape_infer_both_dynamic_1) { + PerformShapeTest({1, -1, 7, -1}, {-1, 3, -1, 10}, {1, 3, 7, 10}); +} + +TEST(type_prop, search_sorted_shape_infer_both_dynamic_2) { + PerformShapeTest({1, -1, 7, 50}, {-1, 3, -1, -1}, {1, 3, 7, -1}); +} + +TEST(type_prop, search_sorted_shape_infer_both_dynamic_3) { + PerformShapeTest(PartialShape::dynamic(), PartialShape::dynamic(), PartialShape::dynamic()); +} + +TEST(type_prop, search_sorted_shape_infer_both_dynamic_4) { + PerformShapeTest({-1, -1, 50}, {-1, -1, 20}, {-1, -1, 20}); +} + +TEST(type_prop, search_sorted_shape_infer_both_dynamic_5) { + PerformShapeTest({-1}, {-1, -1, 3}, {-1, -1, 3}); +} + +TEST(type_prop, search_sorted_shape_infer_different_types) { + auto sorted = make_shared(element::f32, Shape{1, 3, 6}); + auto values = make_shared(element::i32, Shape{1, 3, 6}); + EXPECT_THROW_SUBSTRING(values, sorted, std::string("must have the same element type")); +} + +TEST(type_prop, search_sorted_shape_infer_wrong_rank) { + auto sorted = make_shared(element::i32, Shape{1, 1, 3, 6}); + auto values = make_shared(element::i32, Shape{1, 3, 6}); + EXPECT_THROW_SUBSTRING(sorted, values, std::string("Sorted sequence and values have different ranks")); +} + +TEST(type_prop, search_sorted_shape_infer_wrong_dim) { + auto sorted = make_shared(element::i32, Shape{1, 1, 3, 6}); + auto values = make_shared(element::i32, Shape{1, 1, 5, 6}); + EXPECT_THROW_SUBSTRING(sorted, values, std::string(" different 2 dimension.")); +} + +#undef EXPECT_THROW_SUBSTRING \ No newline at end of file diff --git a/src/core/tests/visitors/op/sorted_search.cpp b/src/core/tests/visitors/op/sorted_search.cpp new file mode 100644 index 00000000000000..860c9528d0e9aa --- /dev/null +++ b/src/core/tests/visitors/op/sorted_search.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/op/search_sorted.hpp" +#include "visitors/visitors.hpp" + +using namespace std; +using namespace ov; +using ov::test::NodeBuilder; + +TEST(attributes, search_sorted_op) { + using TOp = ov::op::v15::SearchSorted; + NodeBuilder::opset().insert(); + auto sorted = make_shared(element::i32, Shape{2, 3, 50, 50}); + auto values = make_shared(element::i32, Shape{2, 3, 50, 50}); + + auto op = make_shared(sorted, values); + NodeBuilder builder(op, {sorted, values}); + auto g_op = ov::as_type_ptr(builder.create()); + + // attribute count + const auto expected_attr_count = 1; + EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); + + // space_to_depth attributes + EXPECT_EQ(g_op->get_right_mode(), op->get_right_mode()); +} diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp index 5b8a439933efd1..0db8f11c4f214f 100644 --- a/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp @@ -59,8 +59,9 @@ ov::OutputVector matmulnbits(const ov::frontend::onnx::Node& node) { "Expected rank of quantized weights is 3 [N][n_blocks_per_col][blob_size], got: ", b_quantized.get_partial_shape().rank()); CHECK_VALID_NODE(node, - a.get_element_type() == ov::element::f16 || a.get_element_type() == ov::element::f32, - "Unsupported input A type, accepted FP16, FP32, got: ", + a.get_element_type() == ov::element::f16 || a.get_element_type() == ov::element::f32 || + a.get_element_type() == ov::element::dynamic, + "Unsupported input A type, accepted dynamic, FP16, FP32, got: ", a.get_element_type()); CHECK_VALID_NODE( node, @@ -96,7 +97,9 @@ ov::OutputVector matmulnbits(const ov::frontend::onnx::Node& node) { if (inputs.size() > 5) { bias = inputs[5]; CHECK_VALID_NODE(node, - bias.get_element_type() == a.get_element_type(), + bias.get_element_type() == a.get_element_type() || + a.get_element_type() == ov::element::dynamic || + bias.get_element_type() == ov::element::dynamic, "Unsupported input bias type, must be equal to input A type, got: ", bias.get_element_type()); CHECK_VALID_NODE(node, @@ -121,17 +124,35 @@ ov::OutputVector matmulnbits(const ov::frontend::onnx::Node& node) { case 2: casted_b_shape = ov::Shape{static_cast(N * n_blocks_per_col), static_cast(blob_size * 4)}; casted_b = std::make_shared(ov::element::u2, casted_b_shape, b_const->get_data_ptr()); - default_zp = std::make_shared(a.get_element_type(), Shape{}, 2); + if (a.get_element_type() != ov::element::dynamic) { + default_zp = std::make_shared(a.get_element_type(), Shape{}, 2); + } else { + default_zp = + std::make_shared(a, + std::make_shared(ov::element::f32, Shape{}, 2.f)); + } break; case 4: casted_b_shape = ov::Shape{static_cast(N * n_blocks_per_col), static_cast(blob_size * 2)}; casted_b = std::make_shared(ov::element::u4, casted_b_shape, b_const->get_data_ptr()); - default_zp = std::make_shared(a.get_element_type(), Shape{}, 8); + if (a.get_element_type() != ov::element::dynamic) { + default_zp = std::make_shared(a.get_element_type(), Shape{}, 8); + } else { + default_zp = + std::make_shared(a, + std::make_shared(ov::element::f32, Shape{}, 8.f)); + } break; case 8: casted_b_shape = ov::Shape{static_cast(N * n_blocks_per_col), static_cast(blob_size)}; casted_b = op::util::reshape(b_const, casted_b_shape); - default_zp = std::make_shared(a.get_element_type(), Shape{}, 128); + if (a.get_element_type() != ov::element::dynamic) { + default_zp = std::make_shared(a.get_element_type(), Shape{}, 128); + } else { + default_zp = + std::make_shared(a, + std::make_shared(ov::element::f32, Shape{}, 128.f)); + } break; default: FRONT_END_THROW("Unsupported bits count"); diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp index ba602c86b7bc82..ef66e15c5bcc37 100644 --- a/src/frontends/pytorch/src/op/bitwise.cpp +++ b/src/frontends/pytorch/src/op/bitwise.cpp @@ -7,6 +7,7 @@ #include "openvino/op/bitwise_not.hpp" #include "openvino/op/bitwise_or.hpp" #include "openvino/op/bitwise_xor.hpp" +#include "openvino/op/convert_like.hpp" #include "utils.hpp" namespace ov { @@ -29,8 +30,12 @@ OutputVector translate_bitwise_and(const NodeContext& context) { Output x; Output y; std::tie(x, y) = get_inputs_with_promoted_types(context, 0, 1); - auto and_x = context.mark_node(std::make_shared(x, y)); + auto and_x = context.mark_node(std::make_shared(x, y))->output(0); if (!context.input_is_none(2)) { + auto out = context.get_input(2); + if (out.get_element_type().is_dynamic() || and_x.get_element_type() != out.get_element_type()) { + and_x = context.mark_node(std::make_shared(and_x, out)); + } context.mutate_input(2, and_x); } return {and_x}; @@ -41,8 +46,12 @@ OutputVector translate_bitwise_or(const NodeContext& context) { Output x; Output y; std::tie(x, y) = get_inputs_with_promoted_types(context, 0, 1); - auto or_x = context.mark_node(std::make_shared(x, y)); + auto or_x = context.mark_node(std::make_shared(x, y))->output(0); if (!context.input_is_none(2)) { + auto out = context.get_input(2); + if (out.get_element_type().is_dynamic() || or_x.get_element_type() != out.get_element_type()) { + or_x = context.mark_node(std::make_shared(or_x, out)); + } context.mutate_input(2, or_x); } return {or_x}; @@ -53,8 +62,12 @@ OutputVector translate_bitwise_xor(const NodeContext& context) { Output x; Output y; std::tie(x, y) = get_inputs_with_promoted_types(context, 0, 1); - auto xor_x = context.mark_node(std::make_shared(x, y)); + auto xor_x = context.mark_node(std::make_shared(x, y))->output(0); if (!context.input_is_none(2)) { + auto out = context.get_input(2); + if (out.get_element_type().is_dynamic() || xor_x.get_element_type() != out.get_element_type()) { + xor_x = context.mark_node(std::make_shared(xor_x, out)); + } context.mutate_input(2, xor_x); } return {xor_x}; diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 1e4ecfc1e1367f..141e5b02ad8d25 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -675,6 +675,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::var_mean", op::translate_var_mean}, {"aten::view", op::quantizable_op}, {"aten::view_as", op::translate_reshape_as}, + {"aten::wait", op::skip_node}, {"aten::where", op::translate_where}, {"aten::zero", op::translate_zeros_like}, {"aten::zeros", op::translate_zeros}, @@ -685,6 +686,7 @@ const std::unordered_map get_supported_ops_ts() { {"prim::Constant", op::translate_constant}, {"prim::device", op::translate_constant}, // prim::DictConstruct - Supported in limited set of patterns + {"prim::fork", op::translate_pythonop}, {"prim::GetAttr", op::translate_get_attr}, {"prim::If", op::translate_if}, {"prim::is_cuda", op::return_false_scalar}, @@ -787,6 +789,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten.clamp_min.default", op::translate_1to1_match_2_inputs_align_types}, {"aten.clamp_min.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.clone.default", op::skip_node}, // ignore clone operators that are inserted by PyTorch autograd + {"aten.col2im.default", op::translate_col2im}, {"aten.constant_pad_nd.default", op::translate_constant_pad_nd_fx}, {"aten.convolution.default", op::translate_convolution}, {"aten.copy.default", op::translate_copy_fx}, diff --git a/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp b/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp index 998f9146584899..6cc0457c38eca9 100644 --- a/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp +++ b/src/frontends/tensorflow_common/include/helper_ops/internal_operation.hpp @@ -62,6 +62,7 @@ class InternalOperation : public ov::frontend::tensorflow::FrameworkNode { m_no_conversion_reason(no_conversion_reason) {} public: + OPENVINO_OP("InternalOperation", "util", ov::frontend::tensorflow::FrameworkNode); // get a reason why some operation is unable to convert to OpenVINO opset // we store this information for InternalOperation to elaborate the reason // for cases such as Constant node of string type diff --git a/src/frontends/tensorflow_common/src/op/expand_dims.cpp b/src/frontends/tensorflow_common/src/op/expand_dims.cpp index b3b37ad38cc302..a40e5c9b1bc6df 100644 --- a/src/frontends/tensorflow_common/src/op/expand_dims.cpp +++ b/src/frontends/tensorflow_common/src/op/expand_dims.cpp @@ -3,7 +3,13 @@ // #include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/subtract.hpp" #include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" using namespace std; using namespace ov::op; @@ -14,9 +20,31 @@ namespace tensorflow { namespace op { OutputVector translate_expand_dims_op(const NodeContext& node) { - default_op_checks(node, 2, {"ExpandDims", "EXPAND_DIMS"}); + default_op_checks(node, 2, {"ExpandDims", "EXPAND_DIMS"}, true); auto input = node.get_input(0); auto axis = node.get_input(1); + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + + if (complex_type_mark) { + element::Type complex_part_type = complex_type_mark->get_complex_part_type(); + input = complex_type_mark->input_value(0); + + auto const_zero = create_same_type_const_scalar(axis, 0); + + auto is_axis_neg = make_shared(axis, const_zero); + + auto const_one = create_same_type_const_scalar(axis, 1); + auto axis_min_one = make_shared(axis, const_one); + + auto new_axis = make_shared(is_axis_neg, axis_min_one, axis); + + auto unsqueeze = make_shared(input, new_axis); + + set_node_name(node.get_name(), unsqueeze); + auto complex_result = make_shared(unsqueeze, complex_part_type); + return {complex_result}; + } + auto unsqueeze = make_shared(input, axis); set_node_name(node.get_name(), unsqueeze); return {unsqueeze}; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp index 1230034b778437..4d1877f5d23ffe 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp @@ -95,8 +95,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub, ::testing::Combine( ::testing::ValuesIn(inputShapes_ConvertInput), ::testing::ValuesIn(types_ConvertStub), - ::testing::Values(2), - ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU)), Convert::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp index 842d6a5b453d09..590135c15750d3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp @@ -104,8 +104,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub, ::testing::Combine( ::testing::ValuesIn(inputShapes_ConvertInput), ::testing::ValuesIn(types_ConvertInput), - ::testing::Values(2), - ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU)), Convert::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp index 027386f0ad050d..f4ae354b6fe4e0 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp @@ -30,15 +30,7 @@ class SnippetsMarkSkippedTests : public TransformationTestsF { } }; -class SKIP_SnippetsMarkSkippedTests : public SnippetsMarkSkippedTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -TEST_F(SKIP_SnippetsMarkSkippedTests /* CVS-114336 */, smoke_Snippets_SkipAfterInputsMatMulEltwise) { +TEST_F(SnippetsMarkSkippedTests, smoke_Snippets_SkipAfterInputsMatMulEltwise) { const auto &f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); model = f.getOriginal(); // Fully tokenizable, since inputs are followed by MatMul diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 71623f32843eac..f4e09a51513085 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -4,17 +4,15 @@ #pragma once -#include "openvino/runtime/threading/cpu_streams_executor.hpp" +#include "openvino/runtime/threading/istreams_executor.hpp" #include "intel_gpu/graph/topology.hpp" #include "intel_gpu/graph/program.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" -#include "intel_gpu/runtime/compounds.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/stream.hpp" -#include "intel_gpu/runtime/lru_cache.hpp" #include "intel_gpu/runtime/shape_predictor.hpp" #include "intel_gpu/plugin/variable_state.hpp" @@ -118,34 +116,10 @@ struct network { std::vector> const& get_outputs() { return _outputs; } - const std::vector>& get_outputs() const { - return reinterpret_cast>&>(_outputs); - } - - network_output get_output(const primitive_id& output_id) { - event::ptr evt; - if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling) - evt = get_primitive_event(output_id); - return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id)); - } - layout get_node_output_layout(const primitive_id& output_id) const; memory::ptr get_output_memory(const primitive_id& output_id); layout get_output_layout(const primitive_id& output_id) const; std::vector get_input_layouts() const; - /// @brief Returns the list of primitive ids before and after graph optimization. - /// @details If primitive was not optimized, the old and actual id will be the same. - /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_". - std::map get_all_primitives() const { - auto primitive_ids = get_all_primitive_ids(); - auto primitive_org_ids = get_all_primitive_org_ids(); - std::map result; - for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) { - result.emplace(primitive_org_ids[i], primitive_ids[i]); - } - return result; - } - /// @brief Returns the list of @ref event for the primitives that were executed in network. std::map get_executed_primitives() const { auto primitive_ids = get_executed_primitive_ids(); @@ -203,7 +177,6 @@ struct network { void configure_primitives_second_output(); void build_insts_deps(); uint32_t get_id() const { return net_id; } - uint32_t get_local_id() const { return _local_net_id; } stream& get_stream() const { return *_stream; } stream::ptr get_stream_ptr() const { return _stream; } bool is_internal() const { return _internal; } @@ -211,7 +184,7 @@ struct network { bool is_dynamic() const { return _is_dynamic; } size_t get_weights_cache_capacity() const { return _weights_cache_capacity; } - memory_pool& get_memory_pool() { + memory_pool& get_memory_pool() const { return *_memory_pool; } @@ -221,7 +194,6 @@ struct network { const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const; const ov::intel_gpu::VariablesMap& get_variables() const; const ov::intel_gpu::VariablesInfoMap& get_variables_info() const; - std::vector get_kv_cache_ids() const { return kv_cache_ids; } const ExecutionConfig& get_config() const { return _config; } @@ -245,8 +217,6 @@ struct network { bool _is_dynamic = false; bool _enable_profiling = false; bool _reset_arguments; - uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique, - // but '_local_net_id' is unique only in each intel_gpu::Graph. std::unordered_map> _primitives; std::vector _in_out_shared_mem_types; @@ -257,10 +227,8 @@ struct network { ov::intel_gpu::VariablesMap _variables_states; ov::intel_gpu::VariablesInfoMap _variables_state_info; - std::vector kv_cache_ids; program::primitives_info _prims_info; - std::map _ext_id_mapping; size_t _weights_cache_capacity = 1; std::unordered_map _events; @@ -274,9 +242,7 @@ struct network { void allocate_primitive_instance(program_node const& node); void transfer_memory_to_device(std::shared_ptr instance, program_node const& node); void add_to_exec_order(const primitive_id& id); - std::shared_ptr find_in_internal_networks(const primitive_id& id) const; std::shared_ptr find_primitive(const primitive_id& id) const; - void check_names(); void add_default_output_chains(); void calculate_weights_cache_capacity(); output_chains_map::iterator add_output_chain(std::shared_ptr& p_inst); @@ -284,7 +250,9 @@ struct network { void dump_memory_pool(std::string dump_path, int64_t curr_iter); #ifdef GPU_DEBUG_CONFIG - int64_t iteration = 0; + mutable int64_t iteration = 0; + friend class NetworkDebugHelper; + friend class NodeDebugHelper; #endif }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp new file mode 100644 index 00000000000000..7f7071e704683e --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp @@ -0,0 +1,526 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "debug_helper.hpp" +#include "openvino/util/file_util.hpp" + +#ifdef GPU_DEBUG_CONFIG + +#include "to_string_utils.h" +#include "loop_inst.h" +#include "condition_inst.h" +#include "program_dump_graph.h" + +#include +#include +#include + +namespace cldnn { + +namespace { + +float convert_element(int64_t i) { return static_cast(i); } +float convert_element(int32_t i) { return static_cast(i); } + +float convert_element(float f) { return f; } + +float convert_element(ov::float16 h) { return static_cast(h); } + +size_t get_x_pitch(const layout& layout) { + try { + auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0)); + auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0)); + auto x0 = layout.get_linear_offset(tensor_x0); + auto x1 = layout.get_linear_offset(tensor_x1); + return (x1 - x0); + } catch (...) { + // When spatial size of x=0, x_pitch is meaningless + return 0; + } +} + +template +void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { + auto&& size = mem->get_layout().get_tensor(); + + GPU_DEBUG_GET_INSTANCE(debug_config); + auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + tensor tmp_size(size); + tmp_size.batch[0] = batch_size; + if (tmp_size == size) { + file_stream << "shape: " << size.to_string() << " "; + file_stream << "(count: " << size.count() + << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" + << (dump_raw ? " raw data" : "") << std::endl; + } else { + file_stream << "shape: " << tmp_size.to_string() << " "; + file_stream << "(count: " << tmp_size.count() + << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) + << ", original shape: " << size.to_string() << ")" + << (dump_raw ? " raw data" : "") << std::endl; + } + + if (size.count() == 0) { + file_stream << "Empty buffer" << std::endl; + return; + } + + mem_lock lock(mem, stream); + auto mem_ptr = lock.data(); + auto x_pitch = get_x_pitch(mem->get_layout()); + std::stringstream buffer; + + if (!dump_raw) { + for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) { + for (cldnn::tensor::value_type b = 0; b < batch_size; ++b) { + for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) { + for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) { + for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) { + for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { + cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(0, y, z, w)); + size_t input_it = mem->get_layout().get_linear_offset(t); + + for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x, input_it += x_pitch) { + buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl; + } + } + } + } + } + } + } + } else { + for (size_t i = 0; i < lock.size(); ++i) { + buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[i]) << std::endl; + } + } + file_stream << buffer.str(); +} + +void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) { + if (type == cldnn::data_types::i4) { + char s_bit = (input & 0x08); + char mask = s_bit > 0 ? 0xF0 : 0x00; + v0 = (input & 0x0F) | mask; + + input >>= 4; + s_bit = (input & 0x08); + mask = s_bit > 0 ? 0xF0 : 0x00; + v1 = (input & 0x0F) | mask; + } else if (type == cldnn::data_types::u4) { + v0 = input & 0x0F; + v1 = input >> 4; + } else { + OPENVINO_ASSERT(false, "not supported unpacking"); + } +} + +void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { + auto&& size = mem->get_layout().get_tensor(); + + GPU_DEBUG_GET_INSTANCE(debug_config); + auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); + tensor tmp_size(size); + tmp_size.batch[0] = batch_size; + if (tmp_size == size) { + file_stream << "shape: " << size.to_string() << " "; + file_stream << "(count: " << size.count() + << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" + << (dump_raw ? " raw data" : "") << std::endl; + } else { + file_stream << "shape: " << tmp_size.to_string() << " "; + file_stream << "(count: " << tmp_size.count() + << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) + << ", original shape: " << size.to_string() << ")" + << (dump_raw ? " raw data" : "") << std::endl; + } + + if (size.count() == 0) { + file_stream << "Empty buffer" << std::endl; + return; + } + + mem_lock lock(mem, stream); + auto mem_ptr = lock.data(); + std::stringstream buffer; + + if (dump_raw) { + for (size_t i = 0; i < lock.size(); ++i) { + int8_t v0, v1; + unpack(type, mem_ptr[i], v0, v1); + buffer << std::fixed << std::setprecision(6) << static_cast(v0) << std::endl; + buffer << std::fixed << std::setprecision(6) << static_cast(v1) << std::endl; + } + } else { + std::cout << __func__ << " supports raw dump only" << std::endl; + } + file_stream << buffer.str(); +} + +void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { + std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; + GPU_DEBUG_GET_INSTANCE(debug_config); + std::string filename = debug_config->get_name_for_dump(layerName); + filename = debug_config->dump_layers_path + filename + ".txt"; + std::ofstream file_stream(filename); + if (!mem) { + file_stream << "Empty" << std::endl; + return; + } + + // Reinterpret buffer to represent actual data layout + auto actual_mem = mem->get_engine()->reinterpret_buffer(*mem, data_layout); + + auto mem_dt = actual_mem->get_layout().data_type; + if (mem_dt == cldnn::data_types::f32) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::f16) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::i64) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::i32) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::i8) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::u8) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::u8) + dump(actual_mem, stream, file_stream, dump_raw); + else if (mem_dt == cldnn::data_types::i4 || mem_dt == cldnn::data_types::u4) + dump_i4u4(mem_dt, actual_mem, stream, file_stream, dump_raw); + else + std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl; +} + +} // namespace + +static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) { + std::string filename; + std::string data_type = ov::element::Type(layout.data_type).get_type_name(); + std::string format = layout.format.to_string(); + std::string tensor; + auto dims = layout.get_dims(); + for (size_t r = 0 ; r < layout.get_rank() ; r++) { + tensor += ("_" + to_string(dims[r])); + } + +#ifdef GPU_DEBUG_CONFIG + GPU_DEBUG_GET_INSTANCE(debug_config); + std::string layer_name = debug_config->get_name_for_dump(name); + filename = debug_config->dump_layers_path + layer_name + + "__" + data_type + "_" + tensor + "__" + format + ".bin"; +#endif + return filename; +} + +NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) + : m_inst(inst) + , m_stream(inst.get_network().get_stream()) + , m_network(inst.get_network()) + , m_program(inst.get_network().get_program().get()) + , m_iter(m_network.iteration) { + // Load binary dump for input layers + if (!debug_config->load_layers_raw_dump.empty()) { + const std::string layer_name = m_inst.id(); + auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name); + if (!files.empty()) { + if (m_inst.is_input()) { + // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists + auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__"); + OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer"); + + OPENVINO_ASSERT(files.size() == m_inst.outputs_memory_count(), "Mis-match dump file count"); + + for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { + auto dump_file = files[0]; + if (files.size() > 1 || m_inst.outputs_memory_count() != 1) { + std::string pattern = "_dst" + std::to_string(i) + "__"; + dump_file = debug_config->get_matched_from_filelist(files, pattern); + } + OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump"); + GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl; + + std::vector bin = ov::util::load_binary(dump_file); + OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); + + auto output_mem = m_inst.output_memory_ptr(i); + OPENVINO_ASSERT(output_mem->size() == bin.size(), "memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name + + "\n Expected size : " + to_string(output_mem->size()) + ", Binary : " + to_string(bin.size())); + + output_mem->copy_from(m_stream, static_cast(&bin[0]), true); + } + } else { + auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__"); + OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name); + + // Loading input tensors for any layer + auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__"); + OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name); + + for (size_t i = 0; i < m_inst.dependencies().size(); i++) { + auto dump_file = files[0]; + if (files.size() > 1 || m_inst.dependencies().size() != 1) { + std::string pattern = "_src" + std::to_string(i) + "__"; + dump_file = debug_config->get_matched_from_filelist(files, pattern); + } + if (dump_file.length() == 0) { + GPU_DEBUG_COUT << " Skip loading for input(" << i << ") of " << layer_name << std::endl; + continue; + } + OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_src[N]__' for binary dump input"); + GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for input(" << i << ") of " << layer_name << std::endl; + + std::vector bin = ov::util::load_binary(dump_file); + OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); + + auto input_mem = m_inst.dep_memory_ptr(i); + if (input_mem->size() != bin.size()) { + std::cout << "WARNING: memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name + << " " << input_mem->size() << " / " << bin.size() << std::endl; + bin.resize(input_mem->size()); + } + + input_mem->copy_from(m_stream, static_cast(&bin[0]), true); + } + } + } + } + + // Dump input buffers of 'inst' + if (debug_config->dump_layers_path.length() > 0) { + const std::string layer_name = inst.id(); + + if (debug_config->is_target_iteration(m_iter) && + debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { + std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; + for (size_t i = 0; i < m_inst.dependencies().size(); i++) { + std::string name = get_file_prefix() + layer_name + "_src" + std::to_string(i); + auto input_mem = m_inst.dep_memory_ptr(i); + if (input_mem == nullptr) { + GPU_DEBUG_COUT << " input_mem_" << i << " is nullptr. Nothing to dump." << std::endl; + continue; + } + + auto dep = m_inst.dependencies().at(i); + auto input_layout = dep.first->get_output_layout(dep.second); + GPU_DEBUG_IF(debug_config->dump_layers_binary) { + // Binary dump : raw + auto filename = get_file_path_for_binary_dump(input_layout, name); + + mem_lock lock(input_mem, m_stream); + ov::util::save_binary(filename, lock.data(), input_mem->size()); + GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; + debug_str_for_bin_load += (filename + ","); + } else { + log_memory_to_file(input_mem, + input_layout, + m_stream, + name, + debug_config->dump_layers_raw); + } + } + + if (debug_config->dump_layers_binary && !inst.is_input()) { + debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; + GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl; + } + } + } +} + + +NodeDebugHelper::~NodeDebugHelper() { + // Dump output buffers of 'inst' + if (debug_config->dump_layers_path.length() > 0) { + m_stream.finish(); + const std::string layer_name = m_inst.id(); + + GPU_DEBUG_IF(debug_config->is_target_iteration(m_iter) && + debug_config->is_layer_for_dumping(layer_name, m_inst.is_output(), m_inst.is_input())) { + std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + + layer_name + ":"; + for (size_t i = 0; i < m_inst.outputs_memory_count(); i++) { + std::string name = get_file_prefix() + "_dst" + std::to_string(i); + auto output_mem = m_inst.output_memory_ptr(i); + if (output_mem == nullptr) { + GPU_DEBUG_COUT << " output_mem is nullptr. Nothing to dump." << std::endl; + continue; + } + + GPU_DEBUG_IF(debug_config->dump_layers_binary) { + // Binary dump : raw + auto output_layout = m_inst.get_output_layout(i); + auto filename = get_file_path_for_binary_dump(output_layout, name); + + mem_lock lock(output_mem, m_stream); + ov::util::save_binary(filename, lock.data(), output_mem->size()); + GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl; + debug_str_for_bin_load += (filename + ","); + } else { + // Text dump + log_memory_to_file(output_mem, m_inst.get_output_layout(i), m_stream, name, debug_config->dump_layers_raw); + } + } + + GPU_DEBUG_IF(debug_config->dump_layers_binary && m_inst.is_input()) { + debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; + GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; + } + } + } +} + +NetworkDebugHelper::NetworkDebugHelper(const network& net) + : m_network(net) + , m_iter(net.iteration) { + auto net_id = m_network.get_id(); + GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { + auto& iters = debug_config->dump_memory_pool_iters; + if (iters.empty() || iters.find(m_iter) != iters.end()) { + GPU_DEBUG_COUT << "============================================================================" << std::endl; + GPU_DEBUG_COUT << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; + if (m_iter == 0 && net_id > 0) { + dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + GPU_DEBUG_COUT << "============================================================================" << std::endl; + } + } + } else { + GPU_DEBUG_TRACE << "============================================================================" << std::endl; + GPU_DEBUG_TRACE << "Start network execution (net_id : " << net_id << ", iter :" << m_iter << ")" << std::endl; + } + + if (debug_config->list_layers == 1) { + for (auto& inst : m_network._exec_order) { + GPU_DEBUG_COUT << inst->id() << std::endl; + if (inst->get_node().is_type()) { + auto& loop_node = inst->get_node().as(); + for (auto& prim : loop_node.get_body_program()->get_processing_order()) { + GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; + } + } else if (inst->get_node().is_type()) { + auto& cond_node = inst->get_node().as(); + GPU_DEBUG_COUT << "* Branch_True" << std::endl; + for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) { + GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; + } + GPU_DEBUG_COUT << "* Branch_False" << std::endl; + for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) { + GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; + } + } + } + + if (!m_network.is_internal()) + exit(0); + } +} + +NetworkDebugHelper::~NetworkDebugHelper() { + auto prog = m_network.get_program().get(); + auto net_id = m_network.get_id(); + // print '-data_shape' option for benchmark_app + if (debug_config->print_input_data_shapes == 1) { + std::stringstream data_shape_str; + auto add_string = [&data_shape_str](std::string str) { + data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; + }; + + for (auto& inst : m_network._exec_order) { + auto name = inst->id(); + auto pos = name.find(':'); + auto type = name.substr(0, pos); + name.erase(0, pos + 1); + if (inst->is_input() && type == "parameter") { + add_string(name + inst->get_output_layout().get_partial_shape().to_string()); + } + } + + GPU_DEBUG_COUT << "[program:" << std::setw(2) << ((prog != nullptr) ? prog->get_id() : 0) + << "|network:" << std::setw(2) << net_id << "|iter:" << std::setw(4) << m_iter << "] benchmark_app cmd: " + << data_shape_str.str() << std::endl; + } + + if (!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(m_iter)) { + auto get_fixed_str = [](int value, int length = 2) -> std::string { + std::ostringstream ss; + ss << std::setw(length) << std::setfill('0') << std::to_string(value); + return ss.str(); + }; + std::string path = get_dir_path(m_network.get_config()); + if (!path.empty()) { + std::ofstream ofs(path + "cldnn_program_exec_p" + get_fixed_str(prog->get_id()) + "_n" + get_fixed_str(net_id) + + "_" + get_fixed_str(m_iter, 5) + ".graph"); + dump_graph_init(ofs, *prog, [this](const primitive_id& id) -> std::shared_ptr { + return m_network.get_primitive(id); + }); + } + } + + if (debug_config->dump_memory_pool > 0) { + auto& iters = debug_config->dump_memory_pool_iters; + if (iters.empty() || iters.find(m_iter) != iters.end()) { + dump_memory_pool(debug_config->dump_memory_pool_path, m_iter); + GPU_DEBUG_COUT << "============================================================================" << std::endl; + } + } + + m_network.iteration++; +} + +void NetworkDebugHelper::dump_memory_pool(std::string dump_path, int64_t curr_iter) const { + m_network.get_memory_pool().dump(m_network.get_id(), curr_iter, dump_path); + auto get_constants_mem_size = [&](allocation_type type) -> size_t { + size_t mem_size = 0; + for (auto& prim : m_network._primitives) { + if (prim.second->get_node().is_constant()) { + for (size_t i = 0; i < prim.second->outputs_memory_count(); i++) { + if (prim.second->output_memory_ptr(i)->get_allocation_type() == type) + mem_size += prim.second->output_memory_ptr(i)->size(); + } + } + } + return mem_size; + }; + auto get_variables_mem_size = [&](allocation_type type) -> size_t { + size_t mem_size = 0; + for (auto& var : m_network.get_variables()) { + if (var.second->get_memory() && var.second->get_memory()->get_allocation_type() == type) + mem_size += var.second->get_actual_mem_size(); + } + return mem_size; + }; + auto get_mb_size = [&](int64_t size) -> std::string { + if (size == 0) return "0 MB"; + return std::to_string(static_cast(size) / (1024 * 1024)) + " MB"; + }; + int64_t usm_host_const_mem_size = get_constants_mem_size(allocation_type::usm_host); + int64_t usm_device_const_mem_size = get_constants_mem_size(allocation_type::usm_device); + int64_t usm_host_var_mem_size = get_variables_mem_size(allocation_type::usm_host); + int64_t usm_device_var_mem_size = get_variables_mem_size(allocation_type::usm_device); + int64_t host_mem_size = m_network.get_engine().get_used_device_memory(allocation_type::usm_host); + int64_t device_mem_size = m_network.get_engine().get_used_device_memory(allocation_type::usm_device); + int64_t usm_host_mem_pool_size = m_network.get_memory_pool().get_total_mem_pool_size(allocation_type::usm_host); + int64_t usm_host_etc_size = host_mem_size - usm_host_mem_pool_size + - usm_host_const_mem_size - usm_host_var_mem_size; + int64_t usm_device_mem_pool_size = m_network.get_memory_pool().get_total_mem_pool_size(allocation_type::usm_device); + int64_t usm_device_etc_size = device_mem_size - usm_device_mem_pool_size + - usm_device_const_mem_size - usm_device_var_mem_size; + GPU_DEBUG_COUT << "------------------------------------------------------------------------" << std::endl; + GPU_DEBUG_COUT << "Memory statistics for (net_id:" << m_network.get_id() << ", iter:" << curr_iter << ")" << std::endl; + GPU_DEBUG_COUT << " Total host mem size : " << get_mb_size(host_mem_size) << std::endl; + GPU_DEBUG_COUT << " * Memory pool : " << get_mb_size(usm_host_mem_pool_size) << std::endl; + GPU_DEBUG_COUT << " * Constant : " << get_mb_size(usm_host_const_mem_size) << std::endl; + GPU_DEBUG_COUT << " * Variable : " << get_mb_size(usm_host_var_mem_size) << std::endl; + GPU_DEBUG_COUT << " * ETC : " << get_mb_size(usm_host_etc_size) << std::endl; + GPU_DEBUG_COUT << " Total device mem size : " << get_mb_size(device_mem_size) << std::endl; + GPU_DEBUG_COUT << " * Memory pool : " << get_mb_size(usm_device_mem_pool_size) << std::endl; + GPU_DEBUG_COUT << " * Constant : " << get_mb_size(usm_device_const_mem_size) << std::endl; + GPU_DEBUG_COUT << " * Variable : " << get_mb_size(usm_device_var_mem_size) << std::endl; + GPU_DEBUG_COUT << " * ETC : " << get_mb_size(usm_device_etc_size) << std::endl; + GPU_DEBUG_COUT << "------------------------------------------------------------------------" << std::endl; +} + +} // namespace cldnn + +#endif // GPU_DEBUG_CONFIG diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.hpp b/src/plugins/intel_gpu/src/graph/debug_helper.hpp new file mode 100644 index 00000000000000..c7c6bd006af1db --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/debug_helper.hpp @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/graph/network.hpp" +#include "intel_gpu/graph/program.hpp" +#include "intel_gpu/runtime/stream.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" +#include "primitive_inst.h" + +namespace cldnn { + +#ifdef GPU_DEBUG_CONFIG + +class NodeDebugHelper { +public: + NodeDebugHelper(const primitive_inst& inst); + ~NodeDebugHelper(); + +private: + std::string get_iteration_prefix() { + if (m_iter < 0) + return std::string(""); + return std::to_string(m_iter) + "_"; + } + + std::string get_file_prefix() { + auto prog_id = ((m_program != nullptr) ? m_program->get_id() : 0); + auto net_id = m_network.get_id(); + + return "program" + std::to_string(prog_id) + "_network" + std::to_string(net_id) + "_" + get_iteration_prefix() + m_inst.id(); + } + + + const primitive_inst& m_inst; + stream& m_stream; + const network& m_network; + const program* m_program; + const size_t m_iter; + + const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); +}; + +class NetworkDebugHelper { +public: + NetworkDebugHelper(const network& net); + ~NetworkDebugHelper(); + +private: + void dump_memory_pool(std::string dump_path, int64_t curr_iter) const; + const network& m_network; + const size_t m_iter; + + const debug_configuration* debug_config = cldnn ::debug_configuration ::get_instance(); +}; + +#define NETWORK_DEBUG(net) NetworkDebugHelper __network_debug_helper(net) +#define NODE_DEBUG(inst) NodeDebugHelper __node_debug_helper(inst) + +#else + +#define NETWORK_DEBUG(...) +#define NODE_DEBUG(...) + +#endif // GPU_DEBUG_CONFIG + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h b/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h index 075422a4196b38..cf5111de6b247e 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h +++ b/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h @@ -14,6 +14,6 @@ std::string get_dir_path(const ExecutionConfig& config); void dump_graph_optimized(std::ofstream&, const program&); void dump_graph_processing_order(std::ofstream&, const program&); void dump_graph_init(std::ofstream&, const program&, - std::function(const primitive_id&)> get_primitive_inst = nullptr); + std::function(const primitive_id&)> get_primitive_inst = nullptr); void dump_graph_info(std::ofstream&, const program&); } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 95cdd587cdf175..66a874b9b153ec 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -17,7 +17,8 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(kv_cache) kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) : parent{network, node, false}, memory_state::variable{node.get_primitive()->variable_info.variable_id} { - kv_cache_id = network.get_kv_cache_ids().size(); + thread_local size_t kv_cache_counter = 0; + kv_cache_id = kv_cache_counter++; } layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index ae64846a0c9b5e..57f2fb41c7cc06 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1089,11 +1089,13 @@ format layout_optimizer::get_expected_format(quantize_node const& node) { auto use_onednn_impls = _optimization_attributes.use_onednn_impls; if (use_onednn_impls) { - auto& user = node.get_users().front(); - if (user != nullptr && user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { - expected = user->get_preferred_input_fmt(user->get_dependency_index(node)); - } else { - expected = format::any; + expected = format::any; + auto& users = node.get_users(); + if (users.size() != 0) { + auto& user = users.front(); + if (user != nullptr && user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { + expected = user->get_preferred_input_fmt(user->get_dependency_index(node)); + } } } else if (only_gemm_users(node)) { // TODO: Gemm is not supporting fsv layouts diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 92d62782828d78..0af0e957df4ea8 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -4,7 +4,6 @@ #include "intel_gpu/plugin/variable_state.hpp" #include "intel_gpu/primitives/read_value.hpp" -#include "openvino/util/file_util.hpp" #include "intel_gpu/primitives/data.hpp" #include "intel_gpu/primitives/mutable_data.hpp" @@ -31,13 +30,10 @@ #include "deconvolution_inst.h" #include "mutable_data_inst.h" #include "condition_inst.h" -#include "loop_inst.h" -#include "assign_inst.h" #include "read_value_inst.h" #include "reshape_inst.h" #include "kv_cache_inst.h" #include "program_helpers.h" -#include "to_string_utils.h" #include "program_dump_graph.h" #include @@ -51,8 +47,8 @@ #include #include +#include "debug_helper.hpp" #ifdef GPU_DEBUG_CONFIG -#include #include #include #include @@ -60,7 +56,6 @@ #endif namespace cldnn { - namespace { #ifdef GPU_DEBUG_CONFIG @@ -143,179 +138,6 @@ void dump_perf_data_raw(std::string dump_path, const std::list(i); } -float convert_element(int32_t i) { return static_cast(i); } - -float convert_element(float f) { return f; } - -float convert_element(ov::float16 h) { return static_cast(h); } - -size_t get_x_pitch(const layout& layout) { - try { - auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0)); - auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0)); - auto x0 = layout.get_linear_offset(tensor_x0); - auto x1 = layout.get_linear_offset(tensor_x1); - return (x1 - x0); - } catch (...) { - // When spatial size of x=0, x_pitch is meaningless - return 0; - } -} - -template -void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { - auto&& size = mem->get_layout().get_tensor(); - - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); - tensor tmp_size(size); - tmp_size.batch[0] = batch_size; - if (tmp_size == size) { - file_stream << "shape: " << size.to_string() << " "; - file_stream << "(count: " << size.count() - << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" - << (dump_raw ? " raw data" : "") << std::endl; - } else { - file_stream << "shape: " << tmp_size.to_string() << " "; - file_stream << "(count: " << tmp_size.count() - << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) - << ", original shape: " << size.to_string() << ")" - << (dump_raw ? " raw data" : "") << std::endl; - } - - if (size.count() == 0) { - file_stream << "Empty buffer" << std::endl; - return; - } - - mem_lock lock(mem, stream); - auto mem_ptr = lock.data(); - auto x_pitch = get_x_pitch(mem->get_layout()); - std::stringstream buffer; - - if (!dump_raw) { - for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) { - for (cldnn::tensor::value_type b = 0; b < batch_size; ++b) { - for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) { - for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) { - for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) { - for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { - cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(0, y, z, w)); - size_t input_it = mem->get_layout().get_linear_offset(t); - - for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x, input_it += x_pitch) { - buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl; - } - } - } - } - } - } - } - } else { - for (size_t i = 0; i < lock.size(); ++i) { - buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[i]) << std::endl; - } - } - file_stream << buffer.str(); -} - -void unpack(cldnn::data_types type, uint8_t input, int8_t &v0, int8_t &v1) { - if (type == cldnn::data_types::i4) { - char s_bit = (input & 0x08); - char mask = s_bit > 0 ? 0xF0 : 0x00; - v0 = (input & 0x0F) | mask; - - input >>= 4; - s_bit = (input & 0x08); - mask = s_bit > 0 ? 0xF0 : 0x00; - v1 = (input & 0x0F) | mask; - } else if (type == cldnn::data_types::u4) { - v0 = input & 0x0F; - v1 = input >> 4; - } else { - OPENVINO_ASSERT(false, "not supported unpacking"); - } -} - -void dump_i4u4(cldnn::data_types type, memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { - auto&& size = mem->get_layout().get_tensor(); - - GPU_DEBUG_GET_INSTANCE(debug_config); - auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1); - tensor tmp_size(size); - tmp_size.batch[0] = batch_size; - if (tmp_size == size) { - file_stream << "shape: " << size.to_string() << " "; - file_stream << "(count: " << size.count() - << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" - << (dump_raw ? " raw data" : "") << std::endl; - } else { - file_stream << "shape: " << tmp_size.to_string() << " "; - file_stream << "(count: " << tmp_size.count() - << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) - << ", original shape: " << size.to_string() << ")" - << (dump_raw ? " raw data" : "") << std::endl; - } - - if (size.count() == 0) { - file_stream << "Empty buffer" << std::endl; - return; - } - - mem_lock lock(mem, stream); - auto mem_ptr = lock.data(); - std::stringstream buffer; - - if (dump_raw) { - for (size_t i = 0; i < lock.size(); ++i) { - int8_t v0, v1; - unpack(type, mem_ptr[i], v0, v1); - buffer << std::fixed << std::setprecision(6) << static_cast(v0) << std::endl; - buffer << std::fixed << std::setprecision(6) << static_cast(v1) << std::endl; - } - } else { - std::cout << __func__ << " supports raw dump only" << std::endl; - } - file_stream << buffer.str(); -} - -void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { - std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string filename = debug_config->get_name_for_dump(layerName); - filename = debug_config->dump_layers_path + filename + ".txt"; - std::ofstream file_stream(filename); - if (!mem) { - file_stream << "Empty" << std::endl; - return; - } - - // Reinterpret buffer to represent actual data layout - auto actual_mem = mem->get_engine()->reinterpret_buffer(*mem, data_layout); - - auto mem_dt = actual_mem->get_layout().data_type; - if (mem_dt == cldnn::data_types::f32) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::f16) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::i64) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::i32) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::i8) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::u8) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::u8) - dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::i4 || mem_dt == cldnn::data_types::u4) - dump_i4u4(mem_dt, actual_mem, stream, file_stream, dump_raw); - else - std::cout << "Dump for this data type is not supported: " << dt_to_str(mem_dt) << std::endl; -} - void wait_for_the_turn() { GPU_DEBUG_GET_INSTANCE(debug_config); bool need_to_wait; @@ -336,7 +158,6 @@ void wait_for_the_turn() { #else void dump_perf_data_raw(std::string, const std::list>&) {} -void log_memory_to_file(memory::ptr, layout, stream&, std::string, bool dump_raw) {} void wait_for_the_turn() {} #endif } // namespace @@ -346,25 +167,6 @@ static uint32_t get_unique_net_id() { return ++id_gen; } -static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) { - std::string filename; - std::string data_type = ov::element::Type(layout.data_type).get_type_name(); - std::string format = layout.format.to_string(); - std::string tensor; - auto dims = layout.get_dims(); - for (size_t r = 0 ; r < layout.get_rank() ; r++) { - tensor += ("_" + to_string(dims[r])); - } - -#ifdef GPU_DEBUG_CONFIG - GPU_DEBUG_GET_INSTANCE(debug_config); - std::string layer_name = debug_config->get_name_for_dump(name); - filename = debug_config->dump_layers_path + layer_name - + "__" + data_type + "_" + tensor + "__" + format + ".bin"; -#endif - return filename; -} - /* Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants opt pass). @@ -401,8 +203,6 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); - if (!_program->is_loaded_from_cache()) - check_names(); build_insts_deps(); build_exec_order(); validate_primitives(); @@ -531,11 +331,7 @@ void network::reset_execution(bool wait) { event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) { GPU_DEBUG_TRACE_DETAIL << "Set input " << id << " " << data->get_layout().to_short_string() << std::endl; - std::shared_ptr primitive_inst; - - primitive_inst = find_primitive(id); - - OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); + auto primitive_inst = find_primitive(id); if (primitive_inst->type() != input_layout::type_id()) { CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input"); @@ -679,11 +475,8 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr

network::set_output_memory(const primitive_id& id, memory::ptr mem_new) { GPU_DEBUG_TRACE_DETAIL << "Set output " << id << " " << mem_new->get_layout().to_short_string() << std::endl; - std::shared_ptr p_inst; std::vector ret_ev; - p_inst = find_primitive(id); - - OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); + std::shared_ptr p_inst = find_primitive(id); auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst); if (iter == _outputs.end()) @@ -711,35 +504,10 @@ std::vector network::set_output_memory(const primitive_id& id, memor return ret_ev; } -void cldnn::network::check_names() { - for (auto const& prim : _primitives) { - if (find_in_internal_networks(prim.first) != nullptr) - CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network."); - } -} - std::shared_ptr cldnn::network::find_primitive(const primitive_id& id) const { - if (_primitives.find(id) != _primitives.end()) - return _primitives.at(id); - - return find_in_internal_networks(id); -} - -std::shared_ptr cldnn::network::find_in_internal_networks(const primitive_id& id) const { - std::shared_ptr ret; - - for (auto const& prim : _primitives) { - if (prim.second->type() == condition::type_id()) { // currently only condition inst contains mini networks - auto cond_inst = std::static_pointer_cast(prim.second); - ret = cond_inst->get_net_true()->find_primitive(id); - if (ret != nullptr) - return ret; - ret = cond_inst->get_net_false()->find_primitive(id); - if (ret != nullptr) - return ret; - } - } - return nullptr; + auto it = _primitives.find(id); + OPENVINO_ASSERT(it != _primitives.end(), "[GPU] Network doesn't contain primitive ", id); + return it->second; } std::string network::get_primitive_info(const primitive_id& id) const { @@ -750,9 +518,6 @@ std::string network::get_primitive_info(const primitive_id& id) const { bool network::does_node_need_lockable_output(const primitive_id& id) const { auto prim_inst = find_primitive(id); - OPENVINO_ASSERT(prim_inst, "[GPU] Can't get implementation type, since topology ", - "doesn't contain primitive with requested id: ", id); - const auto& node = prim_inst->get_node(); if (node.is_type()) { for (const auto& user : node.get_users()) { @@ -772,15 +537,6 @@ std::string network::get_implementation_info(const primitive_id& id) const { return _program->get_implementation_info(id); } -layout network::get_node_output_layout(const primitive_id& output_id) const { - auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr& v) { - return v->id() == output_id; - }); - OPENVINO_ASSERT(res != _outputs.end(), "[GPU] Couldn't get output layout for ", output_id, ". Output with such name is not found in the outputs list"); - - return (*res)->get_node_output_layout(); -} - memory::ptr network::get_output_memory(const primitive_id& output_id) { return get_primitive(output_id)->output_memory_ptr(); } @@ -927,40 +683,11 @@ void network::add_to_exec_order(const primitive_id& id) { } std::map network::execute(const std::vector& dependencies) { - execute_impl(dependencies); - - auto output_ids = get_output_ids(); - std::map result; - for (auto& id : output_ids) { - result.emplace(id, get_output(id)); - } - return result; -} - -void network::execute_impl(const std::vector& events) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "NetworkImpl::Execute"); - int64_t curr_iter = -1; - GPU_DEBUG_GET_INSTANCE(debug_config); -#ifdef GPU_DEBUG_CONFIG - curr_iter = iteration; -#endif + NETWORK_DEBUG(*this); // Wait for previous execution completion reset_execution(false); - GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; - if (iters.empty() || iters.find(curr_iter) != iters.end()) { - GPU_DEBUG_COUT << "============================================================================" << std::endl; - GPU_DEBUG_COUT << "Start network execution (net_id : " << get_id() << ", iter :" << curr_iter << ")" << std::endl; - if (curr_iter == 0 && get_id() > 0) { - dump_memory_pool(debug_config->dump_memory_pool_path, curr_iter); - GPU_DEBUG_COUT << "============================================================================" << std::endl; - } - } - } else { - GPU_DEBUG_TRACE << "============================================================================" << std::endl; - GPU_DEBUG_TRACE << "Start network execution (net_id : " << get_id() << ", iter :" << curr_iter << ")" << std::endl; - } std::vector in_out_mem; auto is_surface_lock_check_needed = [&](const shared_mem_type& shared_mem_type) { @@ -995,34 +722,22 @@ void network::execute_impl(const std::vector& events) { // in some cases. auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream()); - set_arguments(); - GPU_DEBUG_IF(debug_config->list_layers == 1) { - for (auto& inst : _exec_order) { - GPU_DEBUG_COUT << inst->id() << std::endl; - if (inst->get_node().is_type()) { - auto& loop_node = inst->get_node().as(); - for (auto& prim : loop_node.get_body_program()->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } else if (inst->get_node().is_type()) { - auto& cond_node = inst->get_node().as(); - GPU_DEBUG_COUT << "* Branch_True" << std::endl; - for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - GPU_DEBUG_COUT << "* Branch_False" << std::endl; - for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) { - GPU_DEBUG_COUT << "\t" << prim->id() << std::endl; - } - } - } - if (!is_internal()) exit(0); + execute_impl(dependencies); + + std::map result; + for (auto& inst : _outputs) { + event::ptr ev = nullptr; + const auto& id = inst->id(); + if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling) + ev = _events.at(id); + + result.emplace(id, network_output(ev, inst->output_memory_ptr(0), get_stream_ptr(), inst->get_output_layout(0))); } - auto get_iteration_prefix = [](int64_t iter) { - if (iter < 0) - return std::string(""); - return std::to_string(iter) + "_"; - }; + return result; +} + +void network::execute_impl(const std::vector& events) { + set_arguments(); // This extra flush command is needed for dynamic models in both cases of out_of_order / in_order operating mode // since it reduces `bubbles` number in pipeline and GPU's idle time by timely flushing new kernels to device. @@ -1033,233 +748,43 @@ void network::execute_impl(const std::vector& events) { size_t executed_prims = 0; for (auto& inst : _exec_order) { - // Load binary dump for input layers - GPU_DEBUG_IF(!debug_config->load_layers_raw_dump.empty()) { - const std::string layer_name = inst->id(); - auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name); - if (!files.empty()) { - if (inst->is_input()) { - // Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists - auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__"); - OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer"); - - OPENVINO_ASSERT(files.size() == get_primitive(inst->id())->outputs_memory_count(), "Mis-match dump file count"); - - for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) { - auto dump_file = files[0]; - if (files.size() > 1 || get_primitive(inst->id())->outputs_memory_count() != 1) { - std::string pattern = "_dst" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); - } - OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump"); - GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl; - - std::vector bin = ov::util::load_binary(dump_file); - OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); - - auto output_mem = get_primitive(layer_name)->output_memory_ptr(i); - OPENVINO_ASSERT(output_mem->size() == bin.size(), "memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name - + "\n Expected size : " + to_string(output_mem->size()) + ", Binary : " + to_string(bin.size())); - - output_mem->copy_from(get_stream(), static_cast(&bin[0]), true); - } - } else { - auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__"); - OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name); - - // Loading input tensors for any layer - auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__"); - OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name); - - for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) { - auto dump_file = files[0]; - if (files.size() > 1 || get_primitive(inst->id())->dependencies().size() != 1) { - std::string pattern = "_src" + std::to_string(i) + "__"; - dump_file = debug_config->get_matched_from_filelist(files, pattern); - } - if (dump_file.length() == 0) { - GPU_DEBUG_COUT << " Skip loading for input(" << i << ") of " << layer_name << std::endl; - continue; - } - OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_src[N]__' for binary dump input"); - GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for input(" << i << ") of " << layer_name << std::endl; - - std::vector bin = ov::util::load_binary(dump_file); - OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file); - - auto input_mem = get_primitive(inst->id())->dep_memory_ptr(i); - if (input_mem->size() != bin.size()) { - std::cout << "WARNING: memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name - << " " << input_mem->size() << " / " << bin.size() << std::endl; - bin.resize(input_mem->size()); - } - - input_mem->copy_from(get_stream(), static_cast(&bin[0]), true); - } - } - } - } - - // Dump input buffers of 'inst' - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) { - const std::string layer_name = inst->id(); - - GPU_DEBUG_IF(debug_config->is_target_iteration(curr_iter) && - debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { - std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; - for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) { - std::string name = "program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 0) + - "_network" + std::to_string(get_id()) + - "_" + get_iteration_prefix(curr_iter) + - layer_name + "_src" + std::to_string(i); - auto input_mem = get_primitive(inst->id())->dep_memory_ptr(i); - if (input_mem == nullptr) { - GPU_DEBUG_COUT << " input_mem_" << i << " is nullptr. Nothing to dump." << std::endl; - continue; - } - - auto dep = inst->dependencies().at(i); - auto input_layout = dep.first->get_output_layout(dep.second); - GPU_DEBUG_IF(debug_config->dump_layers_binary) { - // Binary dump : raw - auto filename = get_file_path_for_binary_dump(input_layout, name); - - mem_lock lock(input_mem, get_stream()); - ov::util::save_binary(filename, lock.data(), input_mem->size()); - GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl; - debug_str_for_bin_load += (filename + ","); - } else { - log_memory_to_file(input_mem, - input_layout, - get_stream(), - name, - debug_config->dump_layers_raw); - } - } - - GPU_DEBUG_IF(debug_config->dump_layers_binary && !inst->is_input()) { - debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; - GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; - } - } - } + NODE_DEBUG(*inst); execute_primitive(inst, events); executed_prims++; if (needs_flushing && executed_prims % flush_frequency == 0) get_stream().flush(); - - // Dump output buffers of 'inst' - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) { - get_stream().finish(); - const std::string layer_name = inst->id(); - auto prog_id = ((get_program() != nullptr) ? get_program()->get_id() : 0); - auto net_id = get_id(); - GPU_DEBUG_IF(debug_config->is_target_iteration(curr_iter) && - debug_config->is_layer_for_dumping(layer_name, inst->is_output(), inst->is_input())) { - std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" - + layer_name + ":"; - for (size_t i = 0; i < get_primitive(layer_name)->outputs_memory_count(); i++) { - std::string name = "program" + std::to_string(prog_id) + - "_network" + std::to_string(net_id) + - "_" + get_iteration_prefix(curr_iter) + - layer_name + "_dst" + std::to_string(i); - auto output_mem = get_primitive(layer_name)->output_memory_ptr(i); - if (output_mem == nullptr) { - GPU_DEBUG_COUT << " output_mem is nullptr. Nothing to dump." << std::endl; - continue; - } - - GPU_DEBUG_IF(debug_config->dump_layers_binary) { - // Binary dump : raw - auto output_layout = inst->get_output_layout(i); - auto filename = get_file_path_for_binary_dump(output_layout, name); - - mem_lock lock(output_mem, get_stream()); - ov::util::save_binary(filename, lock.data(), output_mem->size()); - GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl; - debug_str_for_bin_load += (filename + ","); - } else { - // Text dump - log_memory_to_file(output_mem, inst->get_output_layout(i), get_stream(), name, debug_config->dump_layers_raw); - } - } - - GPU_DEBUG_IF(debug_config->dump_layers_binary && inst->is_input()) { - debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"'; - GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;; - } - } - } - } - - // print '-data_shape' option for benchmark_app - GPU_DEBUG_IF(debug_config->print_input_data_shapes == 1) { - std::stringstream data_shape_str; - auto add_string = [&data_shape_str](std::string str) { - data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; - }; - - for (auto& inst : _exec_order) { - auto name = inst->id(); - auto pos = name.find(':'); - auto type = name.substr(0, pos); - name.erase(0, pos + 1); - if (inst->is_input() && type == "parameter") { - add_string(name + inst->get_output_layout().get_partial_shape().to_string()); - } - } - - GPU_DEBUG_COUT << "[program:" << std::setw(2) << ((get_program() != nullptr) ? get_program()->get_id() : 0) - << "|network:" << std::setw(2) << get_id() << "|iter:" << std::setw(4) << curr_iter << "] benchmark_app cmd: " - << data_shape_str.str() << std::endl; - } - - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(curr_iter)) { - auto get_fixed_str = [](int value, int length = 2) -> std::string { - std::ostringstream ss; - ss << std::setw(length) << std::setfill('0') << std::to_string(value); - return ss.str(); - }; - std::string path = get_dir_path(get_config()); - if (!path.empty()) { - std::ofstream ofs(path + "cldnn_program_exec_p" + get_fixed_str(get_program()->get_id()) + "_n" + get_fixed_str(get_id()) - + "_" + get_fixed_str(curr_iter, 5) + ".graph"); - dump_graph_init(ofs, *get_program(), [&](const primitive_id& id) -> std::shared_ptr { - return get_primitive(id); - }); - } } // Store events only in case of OOO queue or enabled Profiling auto store_events = is_out_of_order_queue || _enable_profiling; if (store_events) { if (_program != nullptr) { - for (auto& inst : _program->get_processing_order()) { - // Special handling for mutable data. The event should be the same as the user or dependency with highest - // processing_num as the mutable_data can be updated when is both user or dependency. - if (inst->is_type()) { - decltype(_program->get_processing_order().get_processing_number(inst)) proc_num = 0; - for (auto& user : inst->get_users()) { - auto user_proc_num = _program->get_processing_order().get_processing_number(user); - if (user_proc_num > proc_num) { - _events[inst->id()] = _events[user->id()]; - proc_num = user_proc_num; + for (auto& inst : _program->get_processing_order()) { + // Special handling for mutable data. The event should be the same as the user or dependency with highest + // processing_num as the mutable_data can be updated when is both user or dependency. + if (inst->is_type()) { + decltype(_program->get_processing_order().get_processing_number(inst)) proc_num = 0; + for (auto& user : inst->get_users()) { + auto user_proc_num = _program->get_processing_order().get_processing_number(user); + if (user_proc_num > proc_num) { + _events[inst->id()] = _events[user->id()]; + proc_num = user_proc_num; + } } - } - if (!inst->get_dependencies().empty()) { - for (auto& dep : inst->get_dependencies()) { - auto dep_proc_num = _program->get_processing_order().get_processing_number(dep.first); - if (dep_proc_num > proc_num) { - _events[inst->id()] = _events[dep.first->id()]; - proc_num = dep_proc_num; + if (!inst->get_dependencies().empty()) { + for (auto& dep : inst->get_dependencies()) { + auto dep_proc_num = _program->get_processing_order().get_processing_number(dep.first); + if (dep_proc_num > proc_num) { + _events[inst->id()] = _events[dep.first->id()]; + proc_num = dep_proc_num; + } } } } } } - } for (auto& dout : _data_outputs) { // data primitives are not executed so if they are marked as output we need to add // them valid events manually @@ -1278,73 +803,6 @@ void network::execute_impl(const std::vector& events) { // Deallocate events from the previos iteration _old_events.clear(); - - GPU_DEBUG_IF(debug_config->dump_memory_pool > 0) { - auto& iters = debug_config->dump_memory_pool_iters; - if (iters.empty() || iters.find(curr_iter) != iters.end()) { - dump_memory_pool(debug_config->dump_memory_pool_path, curr_iter); - GPU_DEBUG_COUT << "============================================================================" << std::endl; - } - } - -#ifdef GPU_DEBUG_CONFIG - iteration++; -#endif -} - -void network::dump_memory_pool(std::string dump_path, int64_t curr_iter) { -#ifdef GPU_DEBUG_CONFIG - get_memory_pool().dump(get_id(), curr_iter, dump_path); - auto get_constants_mem_size = [&](allocation_type type) -> size_t { - size_t mem_size = 0; - for (auto& prim : _primitives) { - if (prim.second->get_node().is_constant()) { - for (size_t i = 0; i < prim.second->outputs_memory_count(); i++) { - if (prim.second->output_memory_ptr(i)->get_allocation_type() == type) - mem_size += prim.second->output_memory_ptr(i)->size(); - } - } - } - return mem_size; - }; - auto get_variables_mem_size = [&](allocation_type type) -> size_t { - size_t mem_size = 0; - for (auto& var : get_variables()) { - if (var.second->get_memory() && var.second->get_memory()->get_allocation_type() == type) - mem_size += var.second->get_actual_mem_size(); - } - return mem_size; - }; - auto get_mb_size = [&](int64_t size) -> std::string { - if (size == 0) return "0 MB"; - return std::to_string(static_cast(size) / (1024 * 1024)) + " MB"; - }; - int64_t usm_host_const_mem_size = get_constants_mem_size(allocation_type::usm_host); - int64_t usm_device_const_mem_size = get_constants_mem_size(allocation_type::usm_device); - int64_t usm_host_var_mem_size = get_variables_mem_size(allocation_type::usm_host); - int64_t usm_device_var_mem_size = get_variables_mem_size(allocation_type::usm_device); - int64_t host_mem_size = get_engine().get_used_device_memory(allocation_type::usm_host); - int64_t device_mem_size = get_engine().get_used_device_memory(allocation_type::usm_device); - int64_t usm_host_mem_pool_size = get_memory_pool().get_total_mem_pool_size(allocation_type::usm_host); - int64_t usm_host_etc_size = host_mem_size - usm_host_mem_pool_size - - usm_host_const_mem_size - usm_host_var_mem_size; - int64_t usm_device_mem_pool_size = get_memory_pool().get_total_mem_pool_size(allocation_type::usm_device); - int64_t usm_device_etc_size = device_mem_size - usm_device_mem_pool_size - - usm_device_const_mem_size - usm_device_var_mem_size; - GPU_DEBUG_COUT << "------------------------------------------------------------------------" << std::endl; - GPU_DEBUG_COUT << "Memory statistics for (net_id:" << get_id() << ", iter:" << curr_iter << ")" << std::endl; - GPU_DEBUG_COUT << " Total host mem size : " << get_mb_size(host_mem_size) << std::endl; - GPU_DEBUG_COUT << " * Memory pool : " << get_mb_size(usm_host_mem_pool_size) << std::endl; - GPU_DEBUG_COUT << " * Constant : " << get_mb_size(usm_host_const_mem_size) << std::endl; - GPU_DEBUG_COUT << " * Variable : " << get_mb_size(usm_host_var_mem_size) << std::endl; - GPU_DEBUG_COUT << " * ETC : " << get_mb_size(usm_host_etc_size) << std::endl; - GPU_DEBUG_COUT << " Total device mem size : " << get_mb_size(device_mem_size) << std::endl; - GPU_DEBUG_COUT << " * Memory pool : " << get_mb_size(usm_device_mem_pool_size) << std::endl; - GPU_DEBUG_COUT << " * Constant : " << get_mb_size(usm_device_const_mem_size) << std::endl; - GPU_DEBUG_COUT << " * Variable : " << get_mb_size(usm_device_var_mem_size) << std::endl; - GPU_DEBUG_COUT << " * ETC : " << get_mb_size(usm_device_etc_size) << std::endl; - GPU_DEBUG_COUT << "------------------------------------------------------------------------" << std::endl; -#endif } std::vector network::get_input_ids() const { @@ -1404,10 +862,6 @@ const program::graph_optimizer_info& network::get_optimizer_passes_info() const } std::map network::get_ext_id_mapping() const { - if (_program == nullptr) { - return _ext_id_mapping; - } - std::map result; for (auto& prim : _primitives) { result.emplace(prim.first, prim.second->get_node().get_primitive()->origin_op_name); @@ -1508,9 +962,6 @@ void network::allocate_primitive_instance(program_node const& node) { if (node.is_type()) _data_outputs.push_back(inst); } - if (node.is_type()) { - kv_cache_ids.push_back(node.id()); - } if (auto state_prim = std::dynamic_pointer_cast(inst)) { auto prim = inst->get_node().get_primitive(); set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0), state_prim->get_user_specified_type(), prim.get()); diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index bff45cd81f9900..4a2f43b28d9360 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -170,7 +170,7 @@ std::string get_dir_path(const ExecutionConfig& config) { void dump_graph_init(std::ofstream& graph, const program& program, - std::function(const primitive_id&)> get_primitive_inst) { + std::function(const primitive_id&)> get_primitive_inst) { const std::string invalid_layout_msg = "(invalid layout)"; const auto dump_mem_info = [&invalid_layout_msg, &get_primitive_inst](const program_node* ptr) { diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 3c9ad0f7317a27..21ba4e656fae0d 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -611,9 +611,9 @@ bool program_node::is_padded_spatial(size_t idx) const { auto& layout = get_output_layout(idx); const auto& lower_size = layout.data_padding._lower_size; const auto& upper_size = layout.data_padding._upper_size; - return std::any_of(std::begin(lower_size) + 2, std::begin(lower_size) + layout.get_spatial_rank() - 1, + return std::any_of(std::begin(lower_size) + 2, std::begin(lower_size) + 2 + layout.get_spatial_rank(), [](const tensor::value_type& el) { return el != 0; }) || - std::any_of(std::begin(upper_size) + 2, std::begin(upper_size) + layout.get_spatial_rank() - 1, + std::any_of(std::begin(upper_size) + 2, std::begin(upper_size) + 2 + layout.get_spatial_rank(), [](const tensor::value_type& el) { return el != 0; }); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 29d322d432dd35..57545b0df37cff 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -809,7 +809,20 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; #endif +#if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + const int power_of_two_for_simd = 5; + const int power_of_two_for_osv = 6; + const uint osv64_weight_base = (( (int) (out_f >> power_of_two_for_osv) ) << power_of_two_for_osv); + const uint osv_weight_stride = (INPUT_ELEMENTS_COUNT >> 1); + const uint out_f_offset = (int)((out_f >> power_of_two_for_simd) & 0x1) << power_of_two_for_simd; + // out_f(32) : 0 * osv_weight_stride + 32; + // out_f(64) : 64 * osv_weight_stride + 0; + // out_f(128) : 64 * osv_weight_stride + 32; + // ... + uint weights_offset = osv64_weight_base * osv_weight_stride + out_f_offset; +#else uint weights_offset = out_f * (INPUT_ELEMENTS_COUNT / 2); +#endif ACCUMULATOR_VEC_TYPE acc[TILE_B] = { }; @@ -905,7 +918,11 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( __local int* char_slm_weight = (__local int*)wei_local_mem; + #if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_LOAD_BLOCK_SIZE * 2; + #else uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_ACTUAL_LOAD_BLOCK_SIZE; + #endif uint wei_local_idx = local_id * SIMD * FILTER_LOAD_ITERS * (FILTER_LOAD_BLOCK_SIZE/2) + sglid * 2; // DECOMPRESSION_SCALE_POST_OP SHOULD be enabled for dynamic quantize FC : scale is ACCUMULATOR_VAL_ONE @@ -917,6 +934,17 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // loaded weights 'wei_packed' of os_iyx_osv16 format have continuous values along TILE_K. So no need to transpose while unpacking dq_wei_unpacked.s0123 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0)); dq_wei_unpacked.s4567 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1)); + #elif FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + SLM_FILTER_PACKED_VEC wei_packed0 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, weights_idx); + SLM_FILTER_PACKED_VEC wei_packed1 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, (weights_idx + (FILTER_LOAD_BLOCK_SIZE * SIMD))); + DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked; + DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked_tmp; + dq_wei_unpacked_tmp.s0123 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0)); + dq_wei_unpacked_tmp.s4567 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1)); + dq_wei_unpacked.s01 = dq_wei_unpacked_tmp.s01; + dq_wei_unpacked.s23 = dq_wei_unpacked_tmp.s45; + dq_wei_unpacked.s45 = dq_wei_unpacked_tmp.s23; + dq_wei_unpacked.s67 = dq_wei_unpacked_tmp.s67; #else SLM_FILTER_PACKED_VEC wei_packed = BLOCK_READN(FILTER_TYPE, FILTER_LOAD_BLOCK_SIZE, weights, weights_idx); DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked = UNPACK_TRANSPOSED_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD *)&wei_packed)); @@ -996,11 +1024,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( acc_tmp[1][bi] = imad_SW(acc_tmp[1][bi], input_val, second_weight); } - #if FILTER_LAYOUT_OS_IYX_OSV16 && TILE_OFM == 2 - weights_offset += (TILE_K_OFM_PACKED/2) * SIMD; - #else - weights_offset += TILE_K_OFM_PACKED * SIMD; - #endif + weights_offset += TILE_K_OFM_PACKED * TILE_OFM_PER_OSV_SIZE * SIMD; #if DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE > DECOMPRESSION_SCALE_GROUP_SIZE) unroll_for (uint bi = 0; bi < TILE_B; ++bi) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 5377387c8b497e..c4115d74f54a92 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -534,6 +534,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para size_t tile_k_ofm_packed = tile_k_ofm; size_t quantize_grp_size = get_dynamic_quantize_group_size(params); + bool add_decompress_scale_post_op = false; WeightsType weights_dt = params.weights.GetDType(); if (weights_dt == WeightsType::UINT4 || weights_dt == WeightsType::INT4) { tile_k_ofm_packed /= 2; @@ -542,7 +543,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; // Do not use SCALE_POST_OP for SLM kernel, since it demonstrates worse performance if (scale_group_size % simd == 0 && !dispatchData.use_slm) - jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1)); + add_decompress_scale_post_op = true; } if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { jit.AddConstant(MakeJitConstant("W_IDX", "fi * TILE_K + kii")); @@ -619,6 +620,8 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para jit.AddConstant(MakeJitConstant("DQ_TYPE", "char")); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", quantize_grp_size)); } else { + if (add_decompress_scale_post_op) + jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1)); jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 0)); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", min_quantize_grp_size)); } @@ -781,8 +784,7 @@ KernelsData FullyConnected_bf_tiled::GetTunedKernelsDataByIndex(const Params &pa auto output_f = get_output_aligned_bf_size(fc_params, false).second; WeightsLayout weights_layout = WeightsLayout::os_iyx_osv16; - // TODO: Update may also be required to fc_bf_tiled_kernel_dyn_quan kernel to support os_is_yx_osv64_isv2 format as needed - if (!should_dynamic_quantize(fc_params) && fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 + if (fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 && (fc_params.weights.GetLayout() == WeightsLayout::oiyx || fc_params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) && (fc_params.weights.GetDType() == WeightsType::INT4 || fc_params.weights.GetDType() == WeightsType::UINT4) && is_weight_horizontal(fc_params, output_f)) { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 2a3bd5dc0ff239..22f616e3d39818 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -558,7 +558,6 @@ void Graph::update_profiling_info() { }; std::map executedPrimitives = get_network()->get_executed_primitives(); - auto allPrimitives = get_network()->get_all_primitives(); // Get profiling info for all layers for (auto &profiledID : profilingIDs) { diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 346b4471779593..88d69dcd3e47b3 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -34,9 +34,12 @@ namespace { inline bool can_use_usm_host(const cldnn::engine& engine) { auto can_use_usm = engine.use_unified_shared_memory(); - if (engine.get_device_info().gfx_ver.major == 12 && engine.get_device_info().gfx_ver.minor == 60) { - // WA: Disable USM host memory for infer request`s tensors for PVC as - // it has performance issues in case of host <-> device data transfers inside kernels + const auto& device_info = engine.get_device_info(); + if ((device_info.gfx_ver.major == 12 && device_info.gfx_ver.minor == 60) || + (device_info.gfx_ver.major >= 20 && device_info.dev_type == cldnn::device_type::discrete_gpu)) { + // WA: Disable USM host memory for infer request`s tensors for PVC and subsequent dGPUs, as kernel access + // to system memory is slower than using an explicit memcpy (Host <-> Device) call with the copy engine + // Driver tickets with additional details: 6155, 10054 GPU_DEBUG_TRACE << "Do not use usm_host for performance issue" << std::endl; can_use_usm = false; } diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp index 7be7f74e6e96e5..cd5c2fdd1681fc 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp @@ -183,7 +183,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) { 7.f, 3.f, -2.f, -1.f }); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); const auto& prog = network.get_program(); auto& pool_node = prog->get_node("pool"); @@ -191,7 +191,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) { ASSERT_EQ(pool_layout.format.value, format::yxfb); - auto out_mem = network.get_output("pool").get_memory(); + auto out_mem = outputs.at("pool").get_memory(); cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 4u); @@ -239,7 +239,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { 7.f, 3.f, -2.f, -1.f }); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); auto prog = network.get_program(); auto& node = prog->get_node("actv"); @@ -250,7 +250,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { ASSERT_EQ(actv_layout.format.value, format::yxfb); ASSERT_EQ(kernel_name, actv_impl.kernel_name); - auto out_mem = network.get_output("actv").get_memory(); + auto out_mem = outputs.at("actv").get_memory(); cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 8u); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index 8e3da9692dcb45..f640b02afa99cb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1031,9 +1031,9 @@ struct concat_gpu_4d : public concat_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num; bi++) { @@ -1117,9 +1117,9 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num; bi++) { @@ -1283,9 +1283,9 @@ struct concat_id_conv_gpu_4d : public concat_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, fmt); @@ -1420,13 +1420,13 @@ struct concat_gpu_4d_implicit : public concat_gpu { for (size_t i = 0; i < in_features.size(); i++) { concat_network->set_input_data(input_ids[i], in_memory[i]); } - concat_network->execute(); + auto outputs = concat_network->execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network->get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { @@ -1640,13 +1640,13 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { for (size_t i = 0; i < in_features.size(); i++) { concat_network.set_input_data(input_ids[i], in_memory[i]); } - concat_network.execute(); + auto outputs = concat_network.execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network.get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { @@ -1803,7 +1803,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { for (size_t i = 0; i < 4; i++) { concat_network.set_input_data(input_ids[i], in_memory[i]); } - concat_network.execute(); + auto outputs = concat_network.execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); @@ -1813,7 +1813,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { if (concat_opt_enabled && batch_num > 1) concat_opt_result = !concat_opt_result; EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network.get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp index d5d7798ff4ce79..7fd439ecac5728 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/network.hpp" #include "intel_gpu/primitives/permute.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "random_generator.hpp" @@ -577,6 +578,7 @@ class condition_gpu_tests: public ::testing::Test { ); branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({"input", "branch_input3"}); + branch_true.input_map.insert({"predicate2", "predicate2"}); branch_true.output_map.insert({0, "condi_nested"}); } @@ -598,11 +600,12 @@ class condition_gpu_tests: public ::testing::Test { ); topology.add( - input_layout("predicate", predicate->get_layout()) + input_layout("predicate", predicate->get_layout()), + input_layout("predicate2", predicate2->get_layout()) ); topology.add( - condition("condi", {input_info("predicate"), input_info("input")}, branch_true, branch_false) + condition("condi", {input_info("predicate"), input_info("predicate2"), input_info("input")}, branch_true, branch_false) ); std::vector input_data = { @@ -773,7 +776,7 @@ class condition_gpu_tests: public ::testing::Test { pooling(duplicated_id, input_info(cond_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 }) ); - EXPECT_ANY_THROW(network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);); + EXPECT_NO_THROW(network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);); } void test_empty_body(bool is_caching_test) { @@ -1038,6 +1041,7 @@ TEST(condition_gpu, set_empty_tensor) { net.set_input_data(empty_input_id, empty_input_mem); net.set_input_data(input_id, input_mem); - OV_ASSERT_NO_THROW(net.execute()); - OV_ASSERT_NO_THROW(net.get_output(cond_id).get_memory()); + std::map outputs; + OV_ASSERT_NO_THROW(outputs = net.execute()); + OV_ASSERT_NO_THROW(outputs.at(cond_id).get_memory()); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 421941296e58ab..4155ac0b420e66 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -5439,9 +5439,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -5549,9 +5549,9 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -5773,9 +5773,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bfyx); @@ -6020,9 +6020,9 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("out").get_memory(); + auto out_mem = outputs.at("out").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto output_layout = out_mem->get_layout(); @@ -6254,12 +6254,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6394,12 +6394,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6531,12 +6531,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6695,12 +6695,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -6836,12 +6836,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -6975,12 +6975,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -7113,9 +7113,9 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -7257,9 +7257,9 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); @@ -7395,9 +7395,9 @@ TEST_P(convolution_depthwise_gpu_fsv16_xy, depthwise_conv_b_fs_yx_fsv16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("out").get_memory(); + auto out_mem = outputs.at("out").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); @@ -7602,9 +7602,9 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bfyx); @@ -7924,9 +7924,9 @@ TEST_P(convolution_grouped_gpu, base) { cldnn::network network(engine, topology, config); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -8092,9 +8092,9 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { network network(engine, topology, config); network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -9669,7 +9669,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { std::cerr << p.original_id << " " << p.kernel_id << std::endl; auto out_ptr = get_output_values_to_float(network, outputs.find("conv_fsv")->second); - auto out_lay = network.get_node_output_layout("conv_fsv"); + auto out_lay = network.get_primitive("conv_fsv")->get_node_output_layout(); ASSERT_EQ(out_lay.batch(), expected_result.size()); ASSERT_EQ(out_lay.feature(), expected_result[0].size()); ASSERT_EQ(out_lay.spatial(1), expected_result[0][0].size()); @@ -10330,9 +10330,9 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_block_size_1( network->set_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 1b9e52d1e7ef2b..20d42e85d0c301 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1569,9 +1569,8 @@ TEST(crop_gpu, optimized_out_crop) { for (size_t i = 0; i < out_vec.size(); i++) ASSERT_EQ(output_ptr[i], out_vec[i]); - auto all_primitives = network.get_all_primitives(); - ASSERT_TRUE(all_primitives["crop1"] == "_optimized_"); - ASSERT_TRUE(all_primitives["crop2"] == "_optimized_"); + ASSERT_TRUE(network.get_primitive("crop1")->can_be_optimized()); + ASSERT_TRUE(network.get_primitive("crop2")->can_be_optimized()); } TEST(crop_single_axis, simple_Baxis) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp index 5f2f5a6bbfe7d2..8d67bc3f7db2ad 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp @@ -366,21 +366,21 @@ void run_eltwise_int_bitwise_generic_test(cldnn::eltwise_mode mode) { cldnn::format test_inputs_fmt = cldnn::format::bfyx; const int dim_size = 227; -#define ELTWISE_INT_TEST_CASES(type) \ - generic_eltwise_int_test(test_inputs_fmt, \ - 1, \ - 1, \ - dim_size, \ - dim_size, \ - mode, \ - 0, \ - 0, \ - 0, \ - 0, \ - 0, \ - static_cast(std::numeric_limits::max()), \ - 0, \ - static_cast(std::numeric_limits::max())); +#define ELTWISE_INT_TEST_CASES(type) \ + generic_eltwise_int_test(test_inputs_fmt, \ + 1, \ + 1, \ + dim_size, \ + dim_size, \ + mode, \ + 0, \ + 0, \ + 0, \ + 0, \ + 0, \ + static_cast(std::numeric_limits::max())/16,\ + 0, \ + static_cast(std::numeric_limits::max())/16); ELTWISE_INT_TEST_CASES(int8_t); ELTWISE_INT_TEST_CASES(uint8_t); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 603f3329c43620..a485d7017c2478 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1420,7 +1420,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size}, data_types::f16, format::bfyx }); auto dcomp_zp_mem = engine.allocate_memory({ {1, 1, 1, 1}, data_types::u8, format::bfyx }); - set_values(dcomp_zp_mem, {8}); + set_values(dcomp_zp_mem, {8}); auto input_data = rg.generate_random_1d(batch_num * ifm_num, -2.0f, 2.0f); set_values(input_mem, input_data); @@ -1528,7 +1528,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size}, data_types::f16, format::bfyx }); auto dcomp_zp_mem = engine.allocate_memory({ {1, 1, 1, 1}, data_types::u8, format::bfyx }); - set_values(dcomp_zp_mem, {8}); + set_values(dcomp_zp_mem, {8}); auto input_data = rg.generate_random_1d(batch_num * ifm_num, -1.0f, 1.0f); set_values(input_mem, input_data); @@ -1698,7 +1698,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size}, data_types::f16, format::bfyx }); auto dcomp_zp_mem = engine.allocate_memory({ {1, 1, 1, 1}, data_types::u8, format::bfyx }); - set_values(dcomp_zp_mem, {8}); + set_values(dcomp_zp_mem, {8}); auto input_data = rg.generate_random_1d(batch_num * ifm_num, -2.0f, 2.0f); set_values(input_mem, input_data); @@ -1804,7 +1804,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto scale_mem = engine.allocate_memory({ {32, 1}, data_types::f32, format::bfyx }); auto zp_mem = engine.allocate_memory({ {32, 1}, data_types::f32, format::bfyx }); - set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f }); + set_values(input_mem, { -0.5f, 2.0f}); set_values(weights_mem, { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 15, 14, 13, 12, 11, 10, 9, 8, @@ -2085,7 +2085,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto scale_mem = engine.allocate_memory({ { ofm_num, 1 }, data_types::f16, format::bfyx }); auto dcomp_zp_mem = engine.allocate_memory({ {1, 1, 1, 1}, data_types::u8, format::bfyx }); - set_values(dcomp_zp_mem, {8}); + set_values(dcomp_zp_mem, {8}); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, 2.0f }); set_values(weights_mem, { 0, 1, 2, 3, 4, 5, @@ -3605,7 +3605,7 @@ TEST(fully_connected_3d_onednn_gpu, compressed_int4_scale_static) { auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size, 1, 1}, data_types::f16, format::bfyx }); auto dcomp_zp_mem = engine.allocate_memory({ {1, 1, 1, 1}, data_types::u8, format::bfyx }); - set_values(dcomp_zp_mem, {8}); + set_values(dcomp_zp_mem, {8}); auto input_data = rg.generate_random_1d(batch_num * ifm_num, -2.0f, 2.0f); set_values(input_mem, input_data); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp index 42dbfa10903f6b..bb44a1022f368d 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp @@ -572,7 +572,7 @@ TEST(quantize_gpu, eltwise_quantize_fs_b_yx_fsv32) { auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, + set_values(input, {-1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, @@ -603,12 +603,12 @@ TEST(quantize_gpu, eltwise_quantize_fs_b_yx_fsv32) { set_values(output_low, { -1.0f }); set_values(output_high, { 1.0f }); - std::vector ref_data = { -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1, -1, 1, -1, 1, -1, 1, -1, -1, -1, -1, - -1, -1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, - -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, 1, - -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1, -1, 1 }; + std::vector ref_data = { 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, + 1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, + 1, 1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, + 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1, 1, + 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, 1 }; topology.add( input_layout("input1", in_layout), diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp index 2dd46fe7598b5a..385226ad7eb11f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp @@ -538,9 +538,9 @@ class ReduceTestBase : public ::testing::TestWithParamset_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("reduce").get_memory(); + auto out_mem = outputs.at("reduce").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -1834,7 +1834,7 @@ TEST(reduce_gpu, reduce_min_max_default_output_element_type_should_be_same_to_in auto& engine = get_test_engine(); auto input = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 2, 2}}); - set_values(input, {1, 1, 1, 1}); + set_values(input, {1, 1, 1, 1}); topology topology1( input_layout("input", input->get_layout()), @@ -1972,9 +1972,9 @@ class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParamset_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("reduce").get_memory(); + auto out_mem = outputs.at("reduce").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -2132,9 +2132,9 @@ class ReduceOnednnTestBase : public ::testing::TestWithParam out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 5d99607c5efac5..d43273e2a1508d 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1916,7 +1916,6 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - auto all_primitives = net.get_all_primitives(); if (engine.get_device_info().supports_immad) { // Currently, oneDNN only supports in_order_queue diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp index ad62e51b3be7c8..8937d75e7b185e 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp @@ -24,9 +24,9 @@ class strided_slice_gpu: public ::testing::Test { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); - auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); + auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); set_values(input, { -0.2f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/tile_gpu_test.cpp index 051bb15cf90700..cbc2ee19dfa006 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/tile_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/tile_gpu_test.cpp @@ -259,7 +259,7 @@ class tile_gpu: public ::testing::Test { auto config = get_test_default_config(engine); if (impl_type != impl_types::any) - config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"tile", {format::bfyx, "", impl_types::cpu}} })); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"tile", {format::bfzyx, "", impl_types::cpu}} })); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp index bf9e0f20af3b78..ade70f9b67dc0f 100644 --- a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp @@ -22,7 +22,7 @@ namespace intel_npu { */ class SyncInferRequest : public ov::IInferRequest { public: - explicit SyncInferRequest(const std::shared_ptr& compiledModel); + explicit SyncInferRequest(const std::shared_ptr& compiledModel, const Config& config); /** * @brief Gets an input/output tensor for inference. @@ -50,8 +50,8 @@ class SyncInferRequest : public ov::IInferRequest { * @brief Currently there is no support implemented for batches of tensors, thus this call is a simple redirection * to the "set_tensor" one. */ - void set_tensors(const ov::Output& port, - const std::vector>& tensors) override; + virtual void set_tensors(const ov::Output& port, + const std::vector>& tensors) override; /** * @brief Gets inputs for infer request @@ -126,6 +126,15 @@ class SyncInferRequest : public ov::IInferRequest { */ void check_tensor(const ov::Output& port, const ov::SoPtr& tensor) const; + /** + * @brief Basic checks for input tensors + * + * @param port Input port + * @param tensors Input tensors + */ + void check_batched_tensors(const ov::Output& port, + const std::vector>& tensors) const; + /** * @brief Check that all tensors are valid. Throws an exception if it's not. */ @@ -153,14 +162,22 @@ class SyncInferRequest : public ov::IInferRequest { const ov::Allocator& allocator = {}, const std::optional batchSize = std::nullopt) const; + bool is_batched_input(size_t idx) const; + + ov::SoPtr& get_user_input(size_t index) const; + std::vector>& get_user_inputs(size_t index) const; + // This is intel_npu::ICompiledModel pointer, but need to use OV base class because // ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr! std::shared_ptr _compiledModel; NetworkMetadata _metadata; - mutable std::vector> _userInputTensors; - mutable std::vector> _userOutputTensors; + Logger _logger; + + // In case set_tensors is called, we receive a vector with N tensors otherwise only 1 tensor is needed + mutable std::vector>> _userInputTensors; + mutable std::vector> _userOutputTensors; mutable std::vector> _variableStates; diff --git a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp index 08d5b518b98cad..04e9ce0d9bbcf8 100644 --- a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp @@ -19,11 +19,12 @@ constexpr size_t BATCH_AXIS = 0; namespace intel_npu { -SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel) +SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel, const Config& config) : _compiledModel(compiledModel), _metadata(compiledModel->get_network_metadata()), - _userInputTensors(_metadata.inputs.size(), nullptr), - _userOutputTensors(_metadata.outputs.size(), nullptr) { + _logger("SyncInferRequest", config.get()), + _userInputTensors(_metadata.inputs.size(), std::vector>(1, {nullptr})), + _userOutputTensors(_metadata.outputs.size(), {nullptr}) { OPENVINO_ASSERT(_compiledModel); if (get_outputs().empty()) { @@ -121,7 +122,7 @@ ov::SoPtr SyncInferRequest::get_tensor(const ov::Output& port, const } if (foundPort.is_input()) { - _userInputTensors.at(foundPort.idx) = tensor._ptr; + get_user_input(foundPort.idx) = tensor; } else { - _userOutputTensors.at(foundPort.idx) = tensor._ptr; + _userOutputTensors.at(foundPort.idx) = tensor; } } -std::vector> SyncInferRequest::get_tensors(const ov::Output& /*port*/) const { +std::vector> SyncInferRequest::get_tensors(const ov::Output& port) const { OV_ITT_SCOPED_TASK(ov::itt::domains::Plugin, "get_tensors"); - // Using batches of tensors is currently not supported by the NPU plugin. In this scenario, the OpenVINO API demands - // returning an empty vector. + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find input tensors for port ", port); + + if (foundPort.is_input() && is_batched_input(foundPort.idx)) { + return get_user_inputs(foundPort.idx); + } + return {}; } @@ -192,11 +198,89 @@ void SyncInferRequest::check_tensor(const ov::Output& port, "Tensor data equal nullptr!"); } +void SyncInferRequest::check_batched_tensors(const ov::Output& port, + const std::vector>& tensors) const { + OPENVINO_ASSERT(!tensors.empty(), "set_input_tensors/set_tensors can't be called with empty tensors"); + OPENVINO_ASSERT( + tensors.size() != 1, + "Internal error (plugin): check_batched_tensors is not allowed to have only one tensor inside batch"); + + auto layout = ov::layout::get_layout(port); + + int64_t batch_idx; + + if (layout.empty()) { + _logger.warning("set_input_tensors/set_tensors layout is not set, assuming batch dimension is found on 0 axis"); + batch_idx = BATCH_AXIS; + } else { + OPENVINO_ASSERT(ov::layout::has_batch(layout), + "set_input_tensors/set_tensors can be used only for inputs with N(batch) dimension" + " 'layout' defined. Current layout is ", + layout.to_string()); + batch_idx = ov::layout::batch_idx(layout); + } + + if (batch_idx < 0) { + batch_idx += static_cast(tensors[BATCH_AXIS]->get_shape().size()); + } + OPENVINO_ASSERT(batch_idx == BATCH_AXIS, + "set_input_tensors/set_tensors is not currently supported for batch dimension index ", + batch_idx, + " != 0"); + std::for_each(tensors.begin(), tensors.end(), [&batch_idx](const ov::SoPtr& item) { + OPENVINO_ASSERT(item, "Unintialized tensor is provided!"); + OPENVINO_ASSERT(item->get_shape()[batch_idx] == 1, + "set_input_tensors/set_tensors. Tensors shall represent one item in a batch, ", + item->get_shape()[batch_idx], + " provided"); + }); + auto tensors_size = static_cast(tensors.size()); + if (port.get_partial_shape().rank().is_static()) { + OPENVINO_ASSERT(batch_idx >= 0 && batch_idx < port.get_partial_shape().rank().get_length(), + "set_input_tensors/set_tensors error. Layout ", + layout.to_string(), + " is incorrect for operation with shape ", + port.get_partial_shape()); + auto batch = port.get_partial_shape()[batch_idx]; + + OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size, + "set_input_tensors/set_tensors error. Input shape ", + port.get_partial_shape(), + "batch ", + batch, + "doesn't match with total blobs count: ", + tensors_size); + } + + auto batched_shape = tensors[BATCH_AXIS]->get_shape(); + auto element_type = tensors[BATCH_AXIS]->get_element_type(); + batched_shape[batch_idx] = tensors_size; + for (const auto& item : tensors) { + OPENVINO_ASSERT(item, "Unintialized tensor is provided!"); + auto item_shape = item->get_shape(); + item_shape[batch_idx] = batched_shape[batch_idx]; + OPENVINO_ASSERT(item_shape == batched_shape && item->get_element_type() == element_type && + "set_input_tensors/set_tensors error. Tensor with element type ", + item->get_element_type(), + " and shape ", + item_shape, + " is not compatible with batched tensor with element type ", + element_type, + " and shape ", + batched_shape); + OPENVINO_ASSERT(item->is_continuous(), "Strides for batched tensors should be default."); + } +} + void SyncInferRequest::check_tensors() const { const auto& inputs = _compiledModel->inputs(); for (size_t i = 0; i < inputs.size(); i++) { - if (_userInputTensors.at(i)) { - check_tensor(inputs[i], _userInputTensors.at(i)); + if (is_batched_input(i)) { + check_batched_tensors(inputs[i], get_user_inputs(i)); + continue; + } + if (get_user_input(i)) { + check_tensor(inputs[i], get_user_input(i)); } } @@ -229,7 +313,7 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(), "The link between state descriptors is missing, state name: ", descriptor.nameFromCompiler); - tensor = _userInputTensors.at(*descriptor.relatedDescriptorIndex); + tensor = get_user_input(*descriptor.relatedDescriptorIndex)._ptr; } else if (allocator) { tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape, allocator); } else { @@ -237,8 +321,8 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto } if (isInput) { - if (_userInputTensors.at(index) == nullptr) { - _userInputTensors.at(index) = tensor; + if (get_user_input(index) == nullptr) { + get_user_input(index) = tensor; } if (descriptor.isStateInput) { @@ -250,4 +334,17 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto return tensor; } + +bool SyncInferRequest::is_batched_input(size_t idx) const { + return _userInputTensors.at(idx).size() > 1; +} + +ov::SoPtr& SyncInferRequest::get_user_input(size_t index) const { + return _userInputTensors.at(index).at(0); +} + +std::vector>& SyncInferRequest::get_user_inputs(size_t index) const { + return _userInputTensors.at(index); +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp index 11cc6fab4bce25..c3f4cc14eb4945 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp @@ -61,6 +61,8 @@ class ZeroExecutor final : public IExecutor { } private: + void initialize_graph_through_command_list() const; + const Config _config; Logger _logger; @@ -72,7 +74,6 @@ class ZeroExecutor final : public IExecutor { const uint32_t _group_ordinal; ze_graph_handle_t _graph = nullptr; - ze_graph_properties_t _props{}; std::vector _input_descriptors; std::vector _output_descriptors; diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index f6d15d2c2aed5e..6d0b343bf8d7b7 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -27,6 +27,8 @@ class ZeroInferRequest final : public SyncInferRequest { ov::SoPtr get_tensor(const ov::Output& port) const override; void set_tensor(const ov::Output& port, const ov::SoPtr& tensor) override; + void set_tensors(const ov::Output& port, + const std::vector>& tensors) override; void infer() override; void infer_async() override; @@ -54,7 +56,7 @@ class ZeroInferRequest final : public SyncInferRequest { * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside * the plugin. */ - std::optional getBatchSize(const NetworkMetadata& metadata); + std::optional get_batch_size(const NetworkMetadata& metadata); /** * @brief Check the received tensor and set the Level Zero tensor accordingly @@ -75,6 +77,12 @@ class ZeroInferRequest final : public SyncInferRequest { void check_network_precision(const ov::element::Type_t precision) const override; void create_pipeline(); + std::shared_ptr& get_level_zero_input(size_t index, size_t tensorNo = 0) const; + std::vector>& get_level_zero_inputs(size_t index) const; + + std::optional& get_input_tensor_data(size_t index, size_t tensorNo = 0) const; + std::vector>& get_input_tensors_data(size_t index) const; + const std::shared_ptr _initStructs; const std::shared_ptr _executorPtr; const ZeroExecutor* _executor; @@ -83,10 +91,10 @@ class ZeroInferRequest final : public SyncInferRequest { // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another // memory area for the tensor. - mutable std::vector> _levelZeroInputTensors; + mutable std::vector>> _levelZeroInputTensors; mutable std::vector> _levelZeroOutputTensors; - mutable std::vector> _inputTensorsData; + mutable std::vector>> _inputTensorsData; mutable std::vector> _outputTensorsData; ze_device_properties_t _properties = {}; diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 6a5cc79ed7a7fc..4160a2ca979290 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -25,7 +25,7 @@ struct Pipeline { zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, - const std::vector>& inputTensorsData, + const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, const size_t numberOfCommandLists); @@ -37,7 +37,8 @@ struct Pipeline { void pull(); void reset() const; - void updateCommandList(const TensorData& tensorsData, const uint32_t index); + void updateCommandList(const TensorData& tensorsData, uint32_t index); + void updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex); protected: const Config _config; diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index a2b50639cf97a0..ca65d99eff806b 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -45,7 +45,7 @@ class ZeroRemoteTensor : public RemoteTensor { void* _mem = nullptr; void* _data = nullptr; - bool _external_memory_support = true; + bool _external_memory_support = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp index 834d66a45a80d9..32c67988bfb565 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp @@ -16,7 +16,7 @@ /** * @brief Last version of Table of Graph Extension functions used within plugin */ -using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t; +using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_8_t; /** * @brief Last version of the Command Queue functions used within plugin */ @@ -157,10 +157,23 @@ struct ze_graph_dditable_ext_decorator final { } // version 1.7 - ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) { + ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, + size_t* pSize, + const uint8_t** pGraphNativeBinary) { throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7); return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary); } + + // version 1.8 + ze_result_t ZE_APICALL pfnGetProperties2(ze_graph_handle_t hGraph, ze_graph_properties_2_t* pGraphProperties) { + throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8); + return _impl->pfnGetProperties2(hGraph, pGraphProperties); + } + + ze_result_t ZE_APICALL pfnGraphInitialize(ze_graph_handle_t hGraph) { + throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8); + return _impl->pfnGraphInitialize(hGraph); + } }; /** diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp index ade476c5649e53..3655f0b611d5f9 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp @@ -37,23 +37,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _initStructs->getCommandQueueDdiTable(), _config, group_ordinal)} { - _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); - CommandList graph_command_list(_initStructs->getDevice(), - _initStructs->getContext(), - _graph_ddi_table_ext, - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); - CommandQueue graph_command_queue(_initStructs->getDevice(), - _initStructs->getContext(), - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, - _initStructs->getCommandQueueDdiTable(), - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); - Fence fence(graph_command_queue, _config); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph"); OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate"); @@ -79,7 +62,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties"); _logger.debug("performing pfnGetProperties"); - zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &_props)); + ze_graph_properties_t props{}; + props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + + zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &props)); auto targetDriverExtVersion = _graph_ddi_table_ext.version(); if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) { OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please " @@ -88,8 +74,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3"); _logger.debug("performing pfnGetArgumentProperties3"); - for (uint32_t index = 0; index < _props.numGraphArgs; ++index) { - ze_graph_argument_properties_3_t arg3; + for (uint32_t index = 0; index < props.numGraphArgs; ++index) { + ze_graph_argument_properties_3_t arg3{}; + arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; zeroUtils::throwOnFail("pfnGetArgumentProperties3", _graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3)); @@ -100,6 +87,51 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i } } + if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) { + initialize_graph_through_command_list(); + } else { + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _graph_ddi_table_ext.pfnGetProperties2(_graph, &properties); + + if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize"); + _graph_ddi_table_ext.pfnGraphInitialize(_graph); + } + + if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { + initialize_graph_through_command_list(); + } + } + + if (config.has()) { + setWorkloadType(config.get()); + } +} + +void ZeroExecutor::initialize_graph_through_command_list() const { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, + itt::domains::LevelZeroBackend, + "Executor::ZeroExecutor", + "initialize_graph_through_command_list"); + + _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); + CommandList graph_command_list(_initStructs->getDevice(), + _initStructs->getContext(), + _graph_ddi_table_ext, + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); + CommandQueue graph_command_queue(_initStructs->getDevice(), + _initStructs->getContext(), + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + _initStructs->getCommandQueueDdiTable(), + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); + Fence fence(graph_command_queue, _config); + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize"); _logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize"); graph_command_list.appendGraphInitialize(_graph); @@ -112,10 +144,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); fence.hostSynchronize(); _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); - - if (config.has()) { - setWorkloadType(config.get()); - } } void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 0a8d8dded5e97d..2c954151a4f652 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -19,6 +19,7 @@ using namespace intel_npu; namespace { +constexpr std::size_t SINGLE_TENSOR = 0; constexpr std::size_t BATCH_AXIS = 0; constexpr std::size_t DEFAULT_BATCH_SIZE = 1; constexpr bool INPUT = true; @@ -30,8 +31,8 @@ constexpr bool OUTPUT = false; * @param ioDescriptor The OpenVINO API specific I/O descriptor which shall be compared. * @param zeDescriptor The Level Zero specific structure used for comparison. */ -void checkLevelZeroAttributesMatch(const IODescriptor& ioDescriptor, - const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { +void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, + const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { std::string zeDescriptorName = zeDescriptor.info.name; if (isStateInputName(zeDescriptorName)) { @@ -78,9 +79,25 @@ Type extract_object(const ov::AnyMap& params, const ov::Property& p) { return res.as(); } +bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { + ze_memory_allocation_properties_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; + auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr); + if (res == ZE_RESULT_SUCCESS) { + if (desc.id) { + if ((desc.type & ZE_MEMORY_TYPE_HOST) || (desc.type & ZE_MEMORY_TYPE_DEVICE) || + (desc.type & ZE_MEMORY_TYPE_SHARED)) { + return true; + } + } + } + + return false; +} + } // namespace -std::optional ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) { +std::optional ZeroInferRequest::get_batch_size(const NetworkMetadata& metadata) { if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) { _logger.debug("Batching on the plugin is not used, batching is handled by the compiler"); return std::nullopt; @@ -143,15 +160,15 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& const std::shared_ptr& compiledModel, const std::shared_ptr& executor, const Config& config) - : SyncInferRequest(compiledModel), + : SyncInferRequest(compiledModel, config), _initStructs(initStructs), _executorPtr(executor), _executor(static_cast(_executorPtr.get())), _config(config), _logger("ZeroInferRequest", config.get()), - _levelZeroInputTensors(_metadata.inputs.size(), nullptr), + _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), - _inputTensorsData(_metadata.inputs.size(), std::nullopt), + _inputTensorsData(_metadata.inputs.size(), std::vector>(1, std::nullopt)), _outputTensorsData(_metadata.outputs.size(), std::nullopt), _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()), _profilingQuery(0, @@ -179,7 +196,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& std::make_shared(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED); if (config.get() != ov::intel_npu::BatchMode::COMPILER) { - _batchSize = getBatchSize(_metadata); + _batchSize = get_batch_size(_metadata); } if (_batchSize.has_value()) { _numberOfCommandLists = *_batchSize; @@ -189,24 +206,23 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& size_t ioIndex = 0; for (const IODescriptor& inputDescriptor : _metadata.inputs) { - checkLevelZeroAttributesMatch(inputDescriptor, executorInputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(inputDescriptor, executorInputDescriptors.at(ioIndex)); if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) { ++ioIndex; continue; } - _levelZeroInputTensors.at(ioIndex) = - allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize); - _inputTensorsData.at(ioIndex) = - TensorData{_levelZeroInputTensors.at(ioIndex)->data(), _levelZeroInputTensors.at(ioIndex)->get_byte_size()}; + get_level_zero_input(ioIndex) = allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize); + get_input_tensor_data(ioIndex) = + TensorData{get_level_zero_input(ioIndex)->data(), get_level_zero_input(ioIndex)->get_byte_size()}; ++ioIndex; } ioIndex = 0; for (const IODescriptor& outputDescriptor : _metadata.outputs) { - checkLevelZeroAttributesMatch(outputDescriptor, executorOutputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(outputDescriptor, executorOutputDescriptors.at(ioIndex)); if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) { ++ioIndex; @@ -227,18 +243,25 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& void ZeroInferRequest::create_pipeline() { for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) { - if (_levelZeroInputTensors.at(inputIndex)) { + if (is_batched_input(inputIndex)) { + if (_batchSize.has_value()) { + _logger.debug("ZeroInferRequest::create_pipeline - tensors %s were already allocated", + _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str()); + continue; + } + } + + if (get_level_zero_input(inputIndex)) { _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str()); continue; } - _logger.debug("ZeroInferRequest::create_pipeline - Allocate new tensor"); - _levelZeroInputTensors.at(inputIndex) = + _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor"); + get_level_zero_input(inputIndex) = allocate_tensor(_metadata.inputs.at(inputIndex), inputIndex, INPUT, *_inputAllocator, _batchSize); - _inputTensorsData.at(inputIndex) = - std::optional(TensorData{_levelZeroInputTensors.at(inputIndex)->data(), - _levelZeroInputTensors.at(inputIndex)->get_byte_size()}); + get_input_tensor_data(inputIndex) = std::optional( + TensorData{get_level_zero_input(inputIndex)->data(), get_level_zero_input(inputIndex)->get_byte_size()}); } for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) { @@ -274,47 +297,32 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor const size_t index, const bool isInput) { OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); - auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors; - auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData; + auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index); + auto& tensorsData = isInput ? get_input_tensor_data(index) : _outputTensorsData.at(index); bool setTensorData = false; bool levelZeroTensorCreatedLocally = true; OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); - ze_memory_allocation_properties_t desc = {}; - desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; - auto res = zeMemGetAllocProperties(_initStructs->getContext(), tensor->data(), &desc, nullptr); - if (res == ZE_RESULT_SUCCESS) { - if (desc.id) { - switch (desc.type) { - case ZE_MEMORY_TYPE_HOST: - case ZE_MEMORY_TYPE_DEVICE: - case ZE_MEMORY_TYPE_SHARED: - _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); - levelZeroTensors.at(index) = tensor; - levelZeroTensorCreatedLocally = false; - setTensorData = true; - break; - case ZE_MEMORY_TYPE_UNKNOWN: - case ZE_MEMORY_TYPE_FORCE_UINT32: - break; - } - } + if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { + _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); + levelZeroTensors = tensor; + levelZeroTensorCreatedLocally = false; + setTensorData = true; } if (!setTensorData) { // make sure that the L0 tensor was allocated locally and is not received from the user when receiving // random tensor - if (tensorsData.at(index).has_value() && !tensorsData.at(index)->levelZeroTensorCreatedLocally) { + if (tensorsData.has_value() && !tensorsData->levelZeroTensorCreatedLocally) { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); - levelZeroTensors.at(index) = - allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index), - index, - isInput, - isInput ? *_inputAllocator : *_outputAllocator, - _batchSize); + levelZeroTensors = allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index), + index, + isInput, + isInput ? *_inputAllocator : *_outputAllocator, + _batchSize); setTensorData = true; levelZeroTensorCreatedLocally = true; @@ -322,15 +330,14 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor } if (setTensorData) { - tensorsData.at(index) = std::optional(TensorData{levelZeroTensors.at(index)->data(), - levelZeroTensors.at(index)->get_byte_size(), - levelZeroTensorCreatedLocally}); + tensorsData = std::optional( + TensorData{levelZeroTensors->data(), levelZeroTensors->get_byte_size(), levelZeroTensorCreatedLocally}); if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(*tensorsData.at(index), + _pipeline->updateCommandList(*tensorsData, isInput ? _executor->get_input_descriptors().at(index).idx : _executor->get_output_descriptors().at(index).idx); } @@ -353,17 +360,17 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrget_byte_size(), false}); + levelZeroTensors = tensor; + tensorsData = std::optional(TensorData{data, tensor->get_byte_size(), false}); if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(*tensorsData.at(index), + _pipeline->updateCommandList(*tensorsData, isInput ? _executor->get_input_descriptors().at(index).idx : _executor->get_output_descriptors().at(index).idx); } @@ -381,9 +388,16 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const } if (foundPort.is_input()) { - _userInputTensors.at(foundPort.idx) = tensor._ptr; + if (is_batched_input(foundPort.idx)) { + // resize vector size to 1 if set_tensor is called after set_tensors + get_input_tensors_data(foundPort.idx).resize(1); + get_level_zero_inputs(foundPort.idx).resize(1); + get_user_inputs(foundPort.idx).resize(1); + } + + get_user_input(foundPort.idx) = tensor; } else { - _userOutputTensors.at(foundPort.idx) = tensor._ptr; + _userOutputTensors.at(foundPort.idx) = tensor; } if (_initStructs->getMutableCommandListVersion()) { @@ -399,6 +413,78 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const } } +void ZeroInferRequest::set_tensors(const ov::Output& port, + const std::vector>& tensors) { + OV_ITT_TASK_CHAIN(SET_TENSORS, itt::domains::LevelZeroBackend, "set_tensors", "set_tensors"); + if (tensors.size() == 1) { + set_tensor(port, tensors[0]); + return; + } + + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find input tensor for port ", port); + if (!foundPort.is_input()) { + OPENVINO_THROW("set_input_tensors/set_tensors is not supported for output port."); + } + + check_batched_tensors(port, tensors); + + get_user_inputs(foundPort.idx).resize(tensors.size()); + get_user_inputs(foundPort.idx) = tensors; + + if (_initStructs->getMutableCommandListVersion()) { + if (_batchSize.has_value()) { + for (size_t i = 0; i < tensors.size(); i++) { + auto remoteTensor = std::dynamic_pointer_cast(tensors[i]._ptr); + + get_level_zero_inputs(foundPort.idx).resize(tensors.size()); + get_input_tensors_data(foundPort.idx).resize(tensors.size()); + + if (remoteTensor == nullptr) { + bool tensorHasSameL0Context = false; + + OV_ITT_TASK_NEXT(SET_TENSORS, "check_data_allocation"); + if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensors[i]->data())) { + _logger.debug("ZeroInferRequest::set_tensors - tensor was created in the same L0 context"); + + get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; + tensorHasSameL0Context = true; + } + + if (!tensorHasSameL0Context) { + _logger.debug("ZeroInferRequest::set_tensors - tensor wasn't created in the same L0 context, " + "create a L0 tensor"); + + get_level_zero_input(foundPort.idx, i) = + allocate_tensor(_metadata.inputs.at(foundPort.idx), foundPort.idx, true, *_inputAllocator); + } + + get_input_tensor_data(foundPort.idx, i) = + std::optional(TensorData{get_level_zero_input(foundPort.idx, i)->data(), + get_level_zero_input(foundPort.idx, i)->get_byte_size(), + false}); + } else { + _logger.debug("ZeroInferRequest::set_tensors - remote tensor is used"); + + get_input_tensor_data(foundPort.idx, i) = std::optional( + TensorData{extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle), + remoteTensor->get_byte_size(), + false}); + + get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; + } + + if (_pipelineIsCreated) { + OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList"); + _pipeline->updateCommandList(*get_input_tensor_data(foundPort.idx, i), + _executor->get_input_descriptors().at(foundPort.idx).idx, + i); + } + } + } + } +} + ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output& port) const { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor"); @@ -407,28 +493,31 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Outputdata(), levelZeroTensors.at(ioIndex)->get_byte_size()}); + levelZeroTensors = allocate_tensor(isInput ? _metadata.inputs.at(ioIndex) : _metadata.outputs.at(ioIndex), + ioIndex, + isInput, + isInput ? *_inputAllocator : *_outputAllocator, + _batchSize); + tensorsData = std::optional(TensorData{levelZeroTensors->data(), levelZeroTensors->get_byte_size()}); - return levelZeroTensors.at(ioIndex); + return levelZeroTensors; } void ZeroInferRequest::infer() { @@ -450,26 +539,75 @@ void ZeroInferRequest::infer_async() { _executor->mutexUnlock(); size_t inputIndex = 0; - for (const std::shared_ptr& userTensor : _userInputTensors) { + for (const auto& userTensor : _userInputTensors) { const IODescriptor inputDescriptor = _metadata.inputs.at(inputIndex); if (inputDescriptor.isShapeTensor) { OPENVINO_ASSERT(inputDescriptor.relatedDescriptorIndex.has_value(), "The link between the dynamic tensor and its shape tensor is missing, entry name: ", inputDescriptor.nameFromCompiler); - const auto& inputDims = _userInputTensors.at(*inputDescriptor.relatedDescriptorIndex)->get_shape(); + const auto& inputDims = get_user_input(*inputDescriptor.relatedDescriptorIndex)->get_shape(); - for (size_t i = 0; i < userTensor->get_size(); ++i) { + for (size_t i = 0; i < userTensor.at(SINGLE_TENSOR)->get_size(); ++i) { const auto reverseIdx = inputDims.size() - 1 - i; - userTensor->data()[i] = static_cast(inputDims[reverseIdx]); + userTensor.at(SINGLE_TENSOR)->data()[i] = static_cast(inputDims[reverseIdx]); } } - auto userRemoteTensor = std::dynamic_pointer_cast(userTensor); + if (is_batched_input(inputIndex)) { + if (_batchSize.has_value()) { + for (size_t i = 0; i < userTensor.size(); i++) { + auto levelZeroBatchRemoteTensor = + std::dynamic_pointer_cast(get_level_zero_input(inputIndex, i)); + if (levelZeroBatchRemoteTensor == nullptr) { + void* levelZeroBuffer = get_level_zero_input(inputIndex, i)->data(); + + auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); + + void* userBuffer = + !userBatchRemoteTensor + ? userTensor.at(i)->data() + : extract_object(userBatchRemoteTensor->get_properties(), ov::intel_npu::mem_handle); + + if (userBuffer != levelZeroBuffer) { + if (userBuffer == nullptr || levelZeroBuffer == nullptr) { + OPENVINO_THROW("Empty buffer"); + } + + _logger.info("Batched Tensors - Tensor is not allocated in the current Level Zero context"); + OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy"); + std::memcpy(levelZeroBuffer, userBuffer, userTensor.at(i)->get_byte_size()); + } + } + } + } else { + void* levelZeroBuffer = get_level_zero_input(inputIndex)->data(); + + _logger.info("Batched Tensors - Tensor is not allocated in the current Level Zero context or must be " + "in a continued memory space"); + + for (size_t i = 0; i < userTensor.size(); i++) { + auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); + + void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() + : extract_object(userBatchRemoteTensor->get_properties(), + ov::intel_npu::mem_handle); + + std::memcpy(static_cast(levelZeroBuffer) + (i * userTensor.at(i)->get_byte_size()), + userBuffer, + userTensor.at(i)->get_byte_size()); + } + } + + ++inputIndex; + continue; + } + + auto userRemoteTensor = std::dynamic_pointer_cast(userTensor.at(SINGLE_TENSOR)._ptr); void* userBuffer = !userRemoteTensor - ? userTensor->data() + ? userTensor.at(SINGLE_TENSOR)->data() : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); - const std::shared_ptr& levelZeroTensor = _levelZeroInputTensors.at(inputIndex); + const std::shared_ptr& levelZeroTensor = get_level_zero_input(inputIndex); auto levelZeroRemoteTensor = std::dynamic_pointer_cast(levelZeroTensor); if (levelZeroRemoteTensor == nullptr) { void* levelZeroBuffer = levelZeroTensor->data(); @@ -481,7 +619,7 @@ void ZeroInferRequest::infer_async() { _logger.info("Tensor is not allocated in the current Level Zero context"); OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy"); - std::memcpy(levelZeroBuffer, userBuffer, userTensor->get_byte_size()); + std::memcpy(levelZeroBuffer, userBuffer, userTensor.at(SINGLE_TENSOR)->get_byte_size()); } } @@ -498,7 +636,7 @@ void ZeroInferRequest::get_result() { _pipeline->pull(); size_t outputIndex = 0; - for (const std::shared_ptr& userTensor : _userOutputTensors) { + for (const auto& userTensor : _userOutputTensors) { const IODescriptor outputDescriptor = _metadata.outputs.at(outputIndex); if (outputDescriptor.isShapeTensor) { OPENVINO_ASSERT(outputDescriptor.relatedDescriptorIndex.has_value(), @@ -516,7 +654,7 @@ void ZeroInferRequest::get_result() { tensorToBeReshaped->set_shape(actualDims); } - auto userRemoteTensor = std::dynamic_pointer_cast(userTensor); + auto userRemoteTensor = std::dynamic_pointer_cast(userTensor._ptr); void* userBuffer = !userRemoteTensor ? userTensor->data() : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); @@ -615,3 +753,18 @@ std::vector ZeroInferRequest::get_profiling_info() const { std::vector ZeroInferRequest::get_raw_profiling_data() const { return _profilingQuery.getData(); } + +std::shared_ptr& ZeroInferRequest::get_level_zero_input(size_t index, size_t tensorNo) const { + return _levelZeroInputTensors.at(index).at(tensorNo); +} + +std::vector>& ZeroInferRequest::get_level_zero_inputs(size_t index) const { + return _levelZeroInputTensors.at(index); +} + +std::optional& ZeroInferRequest::get_input_tensor_data(size_t index, size_t tensorNo) const { + return _inputTensorsData.at(index).at(tensorNo); +} +std::vector>& ZeroInferRequest::get_input_tensors_data(size_t index) const { + return _inputTensorsData.at(index); +} diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index ff24536a52d9b6..cfc80d48c50707 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -20,7 +20,7 @@ Pipeline::Pipeline(const Config& config, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, - const std::vector>& inputTensorsData, + const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, const size_t numberOfCommandLists) : _config(config), @@ -31,9 +31,9 @@ Pipeline::Pipeline(const Config& config, numberOfCommandLists ? static_cast(numberOfCommandLists) : 1, _config}, _npu_profiling(std::move(npu_profiling)), - _logger("IntegratedPipeline", _config.get()) { - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::IntegratedPipeline::IntegratedPipeline"); - _logger.debug("IntegratedPipeline - initialize started"); + _logger("Pipeline", _config.get()) { + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline"); + _logger.debug("Pipeline - initialize started"); if (profiling_pool.create()) { profiling_query.create(profiling_pool._handle); @@ -42,7 +42,7 @@ Pipeline::Pipeline(const Config& config, _command_lists.reserve(numberOfCommandLists); _events.reserve(numberOfCommandLists); _fences.reserve(numberOfCommandLists); - _logger.debug("IntegratedPipeline - emplace_back _event_pool and _command_queue"); + _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < numberOfCommandLists; i++) { _command_lists.emplace_back( std::make_unique(_executor->getInitStructs()->getDevice(), @@ -58,9 +58,17 @@ Pipeline::Pipeline(const Config& config, for (size_t i = 0; i < numberOfCommandLists; i++) { size_t ioIndex = 0; for (const auto& desc : _executor->get_input_descriptors()) { + if (inputTensorsData.at(ioIndex).size() > 1) { + _executor->setArgumentValue(desc.idx, inputTensorsData.at(ioIndex).at(i)->mem); + + ++ioIndex; + continue; + } + _executor->setArgumentValue(desc.idx, - static_cast(inputTensorsData.at(ioIndex)->mem) + - (i * inputTensorsData.at(ioIndex)->size) / numberOfCommandLists); + static_cast(inputTensorsData.at(ioIndex).at(0)->mem) + + (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists); + ++ioIndex; } @@ -93,14 +101,14 @@ Pipeline::Pipeline(const Config& config, } _command_lists.at(i)->close(); } - _logger.debug("IntegratedPipeline - initialize completed"); + _logger.debug("Pipeline - initialize completed"); } void Pipeline::push() { - _logger.debug("IntegratedPipeline - push() started"); + _logger.debug("Pipeline - push() started"); for (size_t i = 0; i < _command_lists.size(); ++i) { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push"); + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); if (sync_output_with_fences_) { _command_queue.executeCommandList(*_command_lists.at(i), *_fences.at(i)); } else { @@ -108,12 +116,12 @@ void Pipeline::push() { } } - _logger.debug("IntegratedPipeline - push() completed"); + _logger.debug("Pipeline - push() completed"); }; void Pipeline::pull() { - _logger.debug("IntegratedPipeline - pull() started"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "pull"); + _logger.debug("Pipeline - pull() started"); + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); for (size_t i = 0; i < _command_lists.size(); ++i) { if (sync_output_with_fences_) { @@ -127,11 +135,11 @@ void Pipeline::pull() { } } - _logger.debug("IntegratedPipeline - pull() completed"); + _logger.debug("Pipeline - pull() completed"); }; void Pipeline::reset() const { - _logger.debug("IntegratedPipeline - rest() started"); + _logger.debug("Pipeline - rest() started"); for (size_t i = 0; i < _command_lists.size(); ++i) { if (sync_output_with_fences_) { @@ -141,11 +149,13 @@ void Pipeline::reset() const { } } - _logger.debug("IntegratedPipeline - rest() completed"); + _logger.debug("Pipeline - rest() completed"); }; -void Pipeline::updateCommandList(const TensorData& tensorsData, const uint32_t index) { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "updateCommandList"); +void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index) { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList"); + _logger.debug("Pipeline - updateCommandList"); + const size_t numberOfCommandLists = _command_lists.size(); for (size_t i = 0; i < numberOfCommandLists; i++) { @@ -156,4 +166,18 @@ void Pipeline::updateCommandList(const TensorData& tensorsData, const uint32_t i } }; +void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex) { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList"); + _logger.debug("Pipeline - updateCommandList"); + + const size_t numberOfCommandLists = _command_lists.size(); + + OPENVINO_ASSERT(commandListIndex < numberOfCommandLists, + "Command list index is higgher than the number of Command lists ", + commandListIndex); + + _command_lists.at(commandListIndex)->updateMutableCommandList(index, tensorsData.mem); + _command_lists.at(commandListIndex)->close(); +}; + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp index 4ac1d75fe57f10..e47b454e11c427 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp @@ -44,9 +44,16 @@ ZeroRemoteTensor::ZeroRemoteTensor(std::shared_ptr context, ze_device_external_memory_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES; auto res = zeDeviceGetExternalMemoryProperties(_init_structs->getDevice(), &desc); - if (res != ZE_RESULT_SUCCESS || (desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF && - desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32)) { - _external_memory_support = false; + if (res == ZE_RESULT_SUCCESS) { +#ifdef _WIN32 + if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) { + _external_memory_support = true; + } +#else + if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) { + _external_memory_support = true; + } +#endif } allocate(byte_size); diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index 523fc87a7f9dd3..65f7e8e3bee6c7 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -132,14 +132,14 @@ class LevelZeroCompilerInDriver final : public ICompiler { void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& blob, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const; template = true> void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& /* unusedBlob */, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const; template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index 1f2a23539a99f5..ad7efd701cfde6 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -70,6 +70,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr>(driverHandle, + deviceHandle, + zeContext, + graph_ddi_table_ext); + break; default: apiAdapter = std::make_shared>(driverHandle, deviceHandle, diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index 0e02bb48f3a4b7..d768a4322f7a9b 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -4,7 +4,6 @@ #include "zero_compiler_in_driver.hpp" -#include #include #include @@ -272,13 +271,19 @@ std::string LevelZeroCompilerInDriver::serializeIOInfo(const std inputsPrecisionSS << INPUTS_PRECISIONS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; inputsLayoutSS << INPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; + const auto getRankOrThrow = [](const ov::PartialShape& shape) -> size_t { + if (shape.rank().is_dynamic()) { + OPENVINO_THROW("Dynamic rank is not supported for NPU plugin"); + } + return shape.rank().get_length(); + }; if (!parameters.empty()) { size_t parameterIndex = 0; for (const std::shared_ptr& parameter : parameters) { - const ov::element::Type& precision = parameter->get_element_type(); - const size_t rank = parameter->get_shape().size(); + const auto precision = parameter->get_element_type(); + const auto rank = getRankOrThrow(parameter->get_partial_shape()); if (parameterIndex != 0) { inputsPrecisionSS << VALUES_SEPARATOR; @@ -310,10 +315,9 @@ std::string LevelZeroCompilerInDriver::serializeIOInfo(const std outputsLayoutSS << OUTPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; size_t resultIndex = 0; - for (const std::shared_ptr& result : results) { - const ov::element::Type_t precision = result->get_element_type(); - const size_t rank = result->get_shape().size(); + const auto precision = result->get_element_type(); + const auto rank = getRankOrThrow(result->get_output_partial_shape(0)); if (resultIndex != 0) { outputsPrecisionSS << VALUES_SEPARATOR; @@ -367,7 +371,7 @@ template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& blob, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const { // Get blob size first auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); @@ -404,7 +408,7 @@ template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& /* unusedBlob */, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const { // Get blob ptr and size auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); @@ -427,7 +431,7 @@ CompiledNetwork LevelZeroCompilerInDriver::getCompiledNetwork( _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle"); ze_graph_handle_t graphHandle = static_cast(networkDescription.metadata.graphHandle); - uint8_t* blobPtr = nullptr; + const uint8_t* blobPtr = nullptr; size_t blobSize = -1; std::vector blob; @@ -1239,6 +1243,7 @@ template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; +template class LevelZeroCompilerInDriver; } // namespace driverCompilerAdapter } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 0e3a2312ffbcc5..1d2217f1114d0c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -339,13 +339,25 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, std::map forced_sub_devices{}; const std::string fsd_opt = m_cfg.get<::intel_npu::NPUW_SUBMODEL_DEVICE>(); forced_sub_devices = ::intel_npu ::OptionParser>::parse(fsd_opt); + + // Exclude optimized out subgraphs from compilation target beforehand - otherwise we might get head and repeated + // block in the same chunk + std::vector idx_subgraph_to_compile; + for (std::size_t i = 0u; i < orderedSubgraphs.size(); i++) { + if (orderedSubgraphs[i]._optimized_out || m_compiled_submodels[i].replaced_by.value_or(i) != i) { + continue; // do nothing here + } else { + idx_subgraph_to_compile.push_back(i); + } + } + // Compile submodels. Some of them can be functions: track which model will be // used as function(s): function name -> index of the compiled subgraph - auto compile = [&](size_t id) { + auto compile = [&](size_t i) { + const auto& id = idx_subgraph_to_compile[i]; const auto& subgraph = orderedSubgraphs[id]; - if (subgraph._optimized_out) { - return; - } + + NPUW_ASSERT(!subgraph._optimized_out); const std::size_t real_id = m_compiled_submodels[id].replaced_by.value_or(id); if (!orderedSubgraphs[real_id]._avoid_list.empty()) { @@ -401,10 +413,10 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, // Parallel compilation is unstable so is disabled by default. const bool par_opt = m_cfg.get<::intel_npu::NPUW_PARALLEL_COMPILE>(); if (par_opt) { - ov::parallel_for(orderedSubgraphs.size(), compile); + ov::parallel_for(idx_subgraph_to_compile.size(), compile); } else { // TODO: Introduce npuw::serial(i, f) instead where f is a _funcall - for (std::size_t i = 0u; i < orderedSubgraphs.size(); i++) { + for (std::size_t i = 0u; i < idx_subgraph_to_compile.size(); i++) { compile(i); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp index 72a62781580cda..e7e5121b1240e7 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp @@ -16,8 +16,6 @@ namespace ov { namespace npuw { namespace online { -class Group; // forward declaration - namespace detail { // At partitioning level we exclude some "non-Ops" to not interfere with the passes. // We include some of them back to properly link everything at plugin level @@ -33,6 +31,8 @@ class Snapshot : public std::enable_shared_from_this { m_node_to_prod_cons(std::make_shared()), m_node_to_gr(std::make_shared()) {} + friend class Group; // forward declaration + // Simple passes void singleGroup(); @@ -49,27 +49,27 @@ class Snapshot : public std::enable_shared_from_this { void repeatedBlocks(); void earlyAvoids(); void earlyRegroup(); - void markInternalCompute(); - void resetExcludedRep(); // Utility std::shared_ptr getGraph() const; - size_t graphSize() const; - const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const; - const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const; const detail::OVPortsMap& getPortsMap() const; const detail::OVNodeToGroupMapPtr& getNodeToGroupMap() const; const std::map>>& getMatches() const; - detail::GPtrSet getRepGroups(const std::shared_ptr& group) const; void repeat(detail::Pass&& pass); void setCtx(const PassContext& ctx); + size_t graphSize() const; private: + detail::GPtrSet getRepGroups(const std::shared_ptr& group) const; + const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const; + const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const; void identifyUniques(); void mergeUniques(); void mergeTriangles(); void cleanUpUniques(); void afterUniques(); + void markInternalCompute(); + void resetExcludedRep(); bool cleanUpUniquesImpl(const detail::GPtrSet& gset); std::shared_ptr tryGrowRepeatingGroups(const detail::GPtrSet& repeating_groups); std::shared_ptr tryMergeTriangles(const detail::GPtrSet& repeating_groups); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index ffbece94b04176..4928be783317fd 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -619,7 +619,7 @@ DCOFFPassReshape3::DCOFFPassReshape3(DCOffMode dcoff_mode, ov::element::Type dco register_matcher(std::make_shared(cvt, "TagDCOFFPassReshape3"), std::move(callback)); } -// Pattern: i4 Phi-3 4SymW16A +// Pattern: i4 group-quant // // // "tensor" "scale" > "tensor" @@ -627,12 +627,12 @@ DCOFFPassReshape3::DCOFFPassReshape3(DCOffMode dcoff_mode, ov::element::Type dco // i4 f16|f32 > f16 // : : > : // V : > V -// Convert : > Convert -// f16|f32 : > f32 -// : : > -// V V > -// Multiply > -// f16|f32 > +// Convert : > [ Convert] +// f16|f32 : > [ f32 ] +// : : > : +// V V > V +// Multiply > Reshape +// f16|f32 > f16|f32 // : > // : > // Reshape > @@ -657,6 +657,8 @@ DCOFFPassReshape4::DCOFFPassReshape4(DCOffMode dcoff_mode, ov::element::Type dco auto matched_paramA = std::static_pointer_cast(matched_nodeA); auto matched_paramC = std::static_pointer_cast(matched_nodeC); + auto matched_out_mulply = node_to_output.at(mulply); + if (ov::element::i4 == matched_paramA->get_element_type() && (ov::element::f16 == matched_paramC->get_element_type() || ov::element::f32 == matched_paramC->get_element_type())) { @@ -669,31 +671,17 @@ DCOFFPassReshape4::DCOFFPassReshape4(DCOffMode dcoff_mode, ov::element::Type dco LOG_DEBUG("Matched: " << matched_paramC << " - parameter to remove..."); LOG_BLOCK(); - // Extra transformation here: - // - remove Multiply + Intermediate Convert - // - mark paramC for removal. - // Convert will be reconnected to paramA directly. - // Record mapping from the Scale coeff parameter to the Real weight parameter pref.get().scales[matched_paramC] = matched_paramA; - // Disconnect Multiply and Convert from their outputs - auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); - auto matched_convrt = node_to_output.at(cvtA).get_node_shared_ptr(); - auto drop_outputs = [](std::shared_ptr node) { - for (auto&& node_outputs : node->outputs()) { - for (auto&& node_reader_port : node_outputs.get_target_inputs()) { - node_outputs.remove_target_input(node_reader_port); - } - } - }; - LOG_DEBUG("Dropping the connections..."); - drop_outputs(matched_mulply); - drop_outputs(matched_convrt); + std::shared_ptr new_rshp_in = matched_paramA; + if (matched_out_mulply.get_element_type() == ov::element::f32) { + new_rshp_in = std::make_shared(matched_paramA, ov::element::f32); + } LOG_DEBUG("Reconnecting the Root..."); auto matched_reshape = node_to_output.at(reshape).get_node_shared_ptr(); - matched_reshape->input(0).replace_source_output(matched_paramA); + matched_reshape->input(0).replace_source_output(new_rshp_in); } LOG_DEBUG("Done"); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index c2fecc7b14d8ae..ebbed29893583c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -645,6 +645,115 @@ void unpack_i4f16(const ov::SoPtr& from, } } +void unpack_i4f16_z(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3); + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[2] == from_shape[2]); + NPUW_ASSERT(scale_shape[1] == 1); + + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(scale_elem_type == ov::element::f32); + + // This conversion combines i4tof32 and f32tof16. Here we + // - read 256 bits (= 32 bytes, = 64 u4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const size_t C = from_shape[from_shape.size() - 3]; + const size_t H = from_shape[from_shape.size() - 2]; + const size_t W = from_shape[from_shape.size() - 1]; + + const int8_t* const pSrc = static_cast(from->data()); // 2 x i4 elements + const float* const pScl = static_cast(scale->data()); // 1 x f32 element + int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + auto unpack_body = [&](size_t job_index, size_t stride) { + size_t start_c = job_index * stride; + size_t end_c = std::min(C, start_c + stride); + + for (size_t c = start_c; c < end_c; ++c) { + for (size_t h = 0; h < H; ++h) { + for (size_t w = 0; w < W; w += 64) { + const int8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; + __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); + __m256i vout0, vout1; + avx2_i4toi8(vinput, &vout0, &vout1); + int8_t tmp[64]; // FIXME: Avoid it + __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); + __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); + _mm256_storeu_si256(tmpv0, vout0); + _mm256_storeu_si256(tmpv1, vout1); + __m128i i8vecs[8] = { + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), + }; + + const float* pScl_iter = pScl + w + W * c; + __m256 svalVec[8]; + for (int i = 0; i < 8; ++i) { + svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); + } + + __m128i vresults[8] = {avx2_i8tof16(i8vecs[0], svalVec[0]), + avx2_i8tof16(i8vecs[1], svalVec[1]), + avx2_i8tof16(i8vecs[2], svalVec[2]), + avx2_i8tof16(i8vecs[3], svalVec[3]), + avx2_i8tof16(i8vecs[4], svalVec[4]), + avx2_i8tof16(i8vecs[5], svalVec[5]), + avx2_i8tof16(i8vecs[6], svalVec[6]), + avx2_i8tof16(i8vecs[7], svalVec[7])}; + + int16_t* pDst_iter = pDst + w + W * h + W * H * c; + for (int i = 0; i < 8; ++i) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), vresults[i]); + } + } + } + } + }; + + size_t stride = C; + size_t num_jobs = 1; + + if (unpack_options.nPartitions) { + if (unpack_options.bStrictPartitioning) { + stride = (C + unpack_options.nPartitions - 1) / unpack_options.nPartitions; + num_jobs = unpack_options.nPartitions; + } else { + stride = std::max(1, C / unpack_options.nPartitions); + num_jobs = (C + stride - 1) / stride; + } + } + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(num_jobs, [&](size_t job_index) { + unpack_body(job_index, stride); + }); + } else { + for (size_t job_index = 0; job_index < num_jobs; ++job_index) { + unpack_body(job_index, stride); + } + } +} + void unpack_u4f16(const ov::SoPtr& from, const ov::SoPtr& to, const ov::npuw::util::UnpackOptions& unpack_options) { @@ -1329,22 +1438,27 @@ void ov::npuw::util::unpack(const ov::SoPtr& from, // This is in fact a weight decompression procedure const auto type_from = from->get_element_type(); const auto type_to = to->get_element_type(); - namespace ove = ov::element; -#define CAST(x) static_cast((x).operator ove::Type_t()) -#define PAIR(f, t) (CAST(f) << 16 | CAST(t)) -#define HNDL(f, t) \ - case PAIR(ove::f, ove::t): \ - unpack_##f##t(from, scale, to, unpack_options); \ - break; - switch (PAIR(type_from, type_to)) { - HNDL(i4, f16); - HNDL(i8, f16); - default: - OPENVINO_THROW("Unknown unpack/scale combination ", type_from, " -> ", type_to); + NPUW_ASSERT(type_to == ov::element::f16); + + const auto& from_shape = from->get_shape(); + const auto& scale_shape = scale->get_shape(); + + if (type_from == ov::element::i4) { + if (from_shape.size() == 3) { + if (scale_shape[2] == from_shape[2]) { + unpack_i4f16_z(from, scale, to, unpack_options); + } else { + unpack_i4f16(from, scale, to, unpack_options); + } + } else { + NPUW_ASSERT(from_shape.size() == 2); + unpack_i4f16(from, scale, to, unpack_options); + } + } else if (type_from == ov::element::i8) { + unpack_i8f16(from, scale, to, unpack_options); + } else { + NPUW_ASSERT(false && "Unsupported combination"); } -#undef HNDL -#undef PAIR -#undef CAST } void ov::npuw::util::unpack(const ov::SoPtr& from, diff --git a/src/plugins/intel_npu/tests/CMakeLists.txt b/src/plugins/intel_npu/tests/CMakeLists.txt index 4c41f008eb7f81..0f5bd7a6b093b2 100644 --- a/src/plugins/intel_npu/tests/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/CMakeLists.txt @@ -8,3 +8,4 @@ if (MSVC) ov_add_compiler_flags(/wd5105) endif() add_subdirectory(functional) +add_subdirectory(unit) diff --git a/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.cpp new file mode 100644 index 00000000000000..3d7d4eb89eff4c --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/batched_tensors_tests/batched_run.hpp" + +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" +#include "intel_npu/al/config/common.hpp" + +using namespace ov::test::behavior; + +const std::vector batchedConfigs = {{ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)}, + {ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)}, + {ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::AUTO)}}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, + BatchedTensorsRunTests, + ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(batchedConfigs)), + BatchedTensorsRunTests::getTestCaseName); diff --git a/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.hpp new file mode 100644 index 00000000000000..05e580ab99664c --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/behavior/batched_tensors_tests/batched_run.hpp @@ -0,0 +1,425 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "base/ov_behavior_test_utils.hpp" +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" +#include "npu_private_properties.hpp" +#include "openvino/core/any.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/core/type/element_iterator.hpp" +#include "openvino/op/op.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/runtime/compiled_model.hpp" +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/intel_npu/level_zero/level_zero.hpp" +#include "overload/overload_test_utils_npu.hpp" + +using CompilationParams = std::tuple; + +using ::testing::AllOf; +using ::testing::HasSubstr; + +namespace ov { +namespace test { +namespace behavior { +class BatchedTensorsRunTests : public ov::test::behavior::OVPluginTestBase, + public testing::WithParamInterface { +protected: + std::shared_ptr core = utils::PluginCache::get().core(); + ov::AnyMap configuration; + std::shared_ptr ov_model; + ov::CompiledModel compiled_model; + ov::Output input; + ov::Output output; + std::string m_cache_dir; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::string targetDevice; + ov::AnyMap configuration; + std::tie(targetDevice, configuration) = obj.param; + std::replace(targetDevice.begin(), targetDevice.end(), ':', '_'); + targetDevice = ov::test::utils::getTestsPlatformFromEnvironmentOr(ov::test::utils::DEVICE_NPU); + + std::ostringstream result; + result << "targetDevice=" << targetDevice << "_"; + result << "targetPlatform=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(targetDevice) << "_"; + if (!configuration.empty()) { + for (auto& configItem : configuration) { + result << "configItem=" << configItem.first << "_"; + configItem.second.print(result); + } + } + + return result.str(); + } + + void SetUp() override { + std::tie(target_device, configuration) = this->GetParam(); + + SKIP_IF_CURRENT_TEST_IS_DISABLED() + OVPluginTestBase::SetUp(); + ov_model = getDefaultNGraphFunctionForTheDeviceNPU(); // FIXME: E#80555 + } + + std::string generateCacheDirName(const std::string& test_name) { + using namespace std::chrono; + // Generate unique file names based on test name, thread id and timestamp + // This allows execution of tests in parallel (stress mode) + auto hash = std::to_string(std::hash()(test_name)); + std::stringstream ss; + auto ts = duration_cast(high_resolution_clock::now().time_since_epoch()); + ss << hash << "_" + << "_" << ts.count(); + return ss.str(); + } + + void TearDown() override { + if (!m_cache_dir.empty()) { + core->set_property({ov::cache_dir()}); + core.reset(); + ov::test::utils::PluginCache::get().reset(); + ov::test::utils::removeFilesWithExt(m_cache_dir, "blob"); + ov::test::utils::removeDir(m_cache_dir); + } + + if (!configuration.empty()) { + utils::PluginCache::get().reset(); + } + + APIBaseTest::TearDown(); + } + + std::shared_ptr create_n_inputs(size_t n, + element::Type type, + const PartialShape& shape, + const ov::Layout& layout) { + ResultVector res; + ParameterVector params; + + for (size_t i = 0; i < n; i++) { + auto index_str = std::to_string(i); + auto data1 = std::make_shared(type, shape); + data1->set_friendly_name("input" + index_str); + data1->get_output_tensor(0).set_names({"tensor_input" + index_str}); + data1->set_layout(layout); + auto constant = opset8::Constant::create(type, {1}, {1}); + auto op1 = std::make_shared(data1, constant); + op1->set_friendly_name("Add" + index_str); + auto res1 = std::make_shared(op1); + res1->set_friendly_name("Result" + index_str); + res1->get_output_tensor(0).set_names({"tensor_output" + index_str}); + params.push_back(data1); + res.push_back(res1); + } + + return std::make_shared(res, params); + } +}; + +TEST_P(BatchedTensorsRunTests, SetInputRemoteTensorsMultipleInfer) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = BatchedTensorsRunTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + auto execNet = core->compile_model(model, target_device, configuration); + auto context = core->get_default_context(target_device); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + std::vector tensors; + for (size_t i = 0; i < batch; ++i) { + // non contiguous memory + auto tensor = context.create_host_tensor(ov::element::f32, one_shape); + tensors.push_back(std::move(tensor)); + } + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + for (size_t i = 0; i < batch; ++i) { + auto* f = tensors[i].data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 21) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } +} + +TEST_P(BatchedTensorsRunTests, SetInputDifferentTensorsMultipleInfer) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = BatchedTensorsRunTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + auto execNet = core->compile_model(model, target_device, configuration); + auto context = core->get_default_context(target_device); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + std::vector tensors; + + std::vector buffer(one_shape_size * 2 * 2, 0); + + auto tensor0 = ov::Tensor(element::f32, one_shape, &buffer[(0 * 2) * one_shape_size]); + auto tensor1 = context.create_host_tensor(ov::element::f32, one_shape); + auto tensor2 = ov::Tensor(element::f32, one_shape, &buffer[(1 * 2) * one_shape_size]); + auto tensor3 = context.create_host_tensor(ov::element::f32, one_shape); + + tensors.push_back(std::move(tensor0)); + tensors.push_back(std::move(tensor1)); + tensors.push_back(std::move(tensor2)); + tensors.push_back(std::move(tensor3)); + + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + for (size_t i = 0; i < batch; ++i) { + auto* f = tensors[i].data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 21) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } +} + +TEST_P(BatchedTensorsRunTests, SetInputDifferentTensorsMultipleInferMCL) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = BatchedTensorsRunTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + auto execNet = core->compile_model(model, target_device, configuration); + auto context = core->get_default_context(target_device); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + + std::vector buffer(one_shape_size * batch * 2, 0); + + { + std::vector tensors; + + auto tensor0 = ov::Tensor(element::f32, one_shape, &buffer[(0 * 2) * one_shape_size]); + auto tensor1 = context.create_host_tensor(ov::element::f32, one_shape); + auto tensor2 = ov::Tensor(element::f32, one_shape, &buffer[(1 * 2) * one_shape_size]); + auto tensor3 = context.create_host_tensor(ov::element::f32, one_shape); + + tensors.push_back(std::move(tensor0)); + tensors.push_back(std::move(tensor1)); + tensors.push_back(std::move(tensor2)); + tensors.push_back(std::move(tensor3)); + + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + for (size_t i = 0; i < batch; ++i) { + auto* f = tensors[i].data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 21) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } + } + + { + std::vector tensors; + + auto tensor0 = context.create_host_tensor(ov::element::f32, one_shape); + auto tensor1 = ov::Tensor(element::f32, one_shape, &buffer[(2 * 2) * one_shape_size]); + auto tensor2 = ov::Tensor(element::f32, one_shape, &buffer[(3 * 2) * one_shape_size]); + auto tensor3 = context.create_host_tensor(ov::element::f32, one_shape); + + tensors.push_back(std::move(tensor0)); + tensors.push_back(std::move(tensor1)); + tensors.push_back(std::move(tensor2)); + tensors.push_back(std::move(tensor3)); + + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + for (size_t i = 0; i < batch; ++i) { + auto* f = tensors[i].data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 200); + } + } + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 201) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } + } +} + +TEST_P(BatchedTensorsRunTests, SetInputDifferentRemoteTensorsMultipleInferMCL) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + size_t batch = 4; + auto one_shape = Shape{1, 2, 2, 2}; + auto batch_shape = Shape{batch, 2, 2, 2}; + auto one_shape_size = ov::shape_size(one_shape); + auto model = BatchedTensorsRunTests::create_n_inputs(2, element::f32, batch_shape, "N..."); + auto execNet = core->compile_model(model, target_device, configuration); + auto context = core->get_default_context(target_device).as(); + // Create InferRequest + ov::InferRequest req; + req = execNet.create_infer_request(); + + std::vector buffer(one_shape_size * 2 * 2, 0); + + { + std::vector tensors; + + auto tensor0 = ov::Tensor(element::f32, one_shape, &buffer[(0 * 2) * one_shape_size]); + auto tensor1 = context.create_l0_host_tensor(ov::element::f32, one_shape); + auto tensor2 = context.create_l0_host_tensor(ov::element::f32, one_shape); + auto tensor3 = context.create_host_tensor(ov::element::f32, one_shape); + + tensors.push_back(tensor0); + tensors.push_back(tensor1); + tensors.push_back(tensor2); + tensors.push_back(tensor3); + + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + { + auto* f = tensor0.data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* data = tensor1.get(); + float* f = static_cast(data); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* data = tensor2.get(); + float* f = static_cast(data); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* f = tensor3.data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 21) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } + } + + { + std::vector tensors; + + auto tensor0 = context.create_l0_host_tensor(ov::element::f32, one_shape); + auto tensor1 = context.create_host_tensor(ov::element::f32, one_shape); + auto tensor2 = ov::Tensor(element::f32, one_shape, &buffer[(1 * 2) * one_shape_size]); + auto tensor3 = context.create_l0_host_tensor(ov::element::f32, one_shape); + + tensors.push_back(tensor0); + tensors.push_back(tensor1); + tensors.push_back(tensor2); + tensors.push_back(tensor3); + + req.set_tensors("tensor_input0", tensors); + + auto actual_tensor = req.get_tensor("tensor_output0"); + auto* actual = actual_tensor.data(); + for (auto testNum = 0; testNum < 5; testNum++) { + { + auto* data = tensor0.get(); + float* f = static_cast(data); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* f = tensor1.data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* f = tensor2.data(); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + { + auto* data = tensor3.get(); + float* f = static_cast(data); + for (size_t j = 0; j < one_shape_size; ++j) { + f[j] = static_cast(testNum + 20); + } + } + + req.infer(); // Adds '1' to each element + for (size_t j = 0; j < one_shape_size * batch; ++j) { + EXPECT_EQ(actual[j], testNum + 21) << "Infer " << testNum << ": Expected=" << testNum + 21 + << ", actual=" << actual[j] << " for index " << j; + } + } + } +} + +} // namespace behavior +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp index 8abe86a77c55c2..d5e154a26ac127 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp @@ -233,7 +233,7 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, OVClassCompiledModel ::testing::ValuesIn(compiledModelIncorrectConfigs)), ov::test::utils::appendPlatformTypeTestName); -INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, OVCompiledModelIncorrectDevice, +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelIncorrectDevice, ::testing::Values(ov::test::utils::DEVICE_NPU), ov::test::utils::appendPlatformTypeTestName); diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4eb829045c964a..aa61afdcacc1bc 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -702,18 +702,9 @@ std::vector disabledTestPatterns() { ".*OVCompiledModelPropertiesDefaultSupportedTests.CanCompileWithDefaultValueFromPlugin.*" }); - // [Tracking number: E#116494] - _skipRegistry.addPatterns( - "NPU plugin doesn't implement `set_tensors` function", { - ".*OVInferRequestBatchedTests.SetInputTensorsBase.*", - ".*OVInferRequestBatchedTests.SetInputTensorsAsync.*", - ".*OVInferRequestBatchedTests.SetInputTensors_override_with_set.*", - ".*OVInferRequestBatchedTests.SetInputTensorsBase_Caching.*", - ".*OVInferRequestBatchedTests.SetInputTensors_Multiple_Infer.*", + _skipRegistry.addPatterns( + "NPU plugin doesn't support infer dynamic", { ".*OVInferRequestBatchedTests.SetInputTensors_Can_Infer_Dynamic.*", - ".*OVInferRequestBatchedTests.SetInputTensors_Get_Tensor_Not_Allowed.*", - ".*OVInferRequestBatchedTests.SetInputTensors_Correct_all.*", - ".*OVInferRequestBatchedTests.SetInputTensors_Cache_CheckDeepCopy.*" }); // [Tracking number: E#118381] diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt new file mode 100644 index 00000000000000..861a0ff6a47076 --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt @@ -0,0 +1,46 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(TARGET_NAME "ov_npu_unit_tests") + +set(MANDATORY_UNIT_TESTS_LIBS + "openvino::commonTestUtils" + "openvino::gmock" + "openvino::gtest" + "openvino::gtest_main" + "openvino::runtime" + "openvino::npu_al" + "openvino::npu_logger_utils" +) + +ov_add_test_target( + NAME ${TARGET_NAME} + ROOT ${CMAKE_CURRENT_SOURCE_DIR} + ADDITIONAL_SOURCE_DIRS + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw/ + DEPENDENCIES + openvino::runtime + INCLUDES + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/npuw + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include + LINK_LIBRARIES + ${MANDATORY_UNIT_TESTS_LIBS} + LABELS + NPUW +) + +if(ENABLE_AVX2) + ov_avx2_optimization_flags(avx2_flags) + target_compile_options(${TARGET_NAME} PRIVATE "${avx2_flags}") +endif() + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION tests + COMPONENT tests + EXCLUDE_FROM_ALL +) diff --git a/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp new file mode 100644 index 00000000000000..af1fc5de8e92c7 --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp @@ -0,0 +1,692 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "partitioning/online/compiler.hpp" +#include "partitioning/online/snapshot.hpp" +#include "partitioning/online/group.hpp" + +#include "intel_npu/al/config/config.hpp" +#include "intel_npu/al/config/npuw.hpp" + +#include "openvino/openvino.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/op/util/op_types.hpp" + +bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2); +bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2) { + if (ens1.groups.size() != ens2.groups.size()) { + return false; + } + + for (auto& g : ens1.groups) { + std::sort(g.input_layers.begin(), g.input_layers.end()); + std::sort(g.output_layers.begin(), g.output_layers.end()); + std::sort(g.all_layers.begin(), g.all_layers.end()); + } + + for (auto& g : ens2.groups) { + std::sort(g.input_layers.begin(), g.input_layers.end()); + std::sort(g.output_layers.begin(), g.output_layers.end()); + std::sort(g.all_layers.begin(), g.all_layers.end()); + } + + std::sort(ens1.groups.begin(), ens1.groups.end(), [](const ov::npuw::Group& g1, + const ov::npuw::Group& g2){ + return g1.all_layers.front() < g2.all_layers.front(); + }); + + std::sort(ens2.groups.begin(), ens2.groups.end(), [](const ov::npuw::Group& g1, + const ov::npuw::Group& g2){ + return g1.all_layers.front() < g2.all_layers.front(); + }); + + for (size_t i = 0; i < ens1.groups.size(); ++i) { + const auto& g1 = ens1.groups.at(i); + const auto& g2 = ens2.groups.at(i); + + if (g1.avoid_list != g2.avoid_list || + g1.input_layers != g2.input_layers || + g1.output_layers != g2.output_layers || + g1.all_layers != g2.all_layers) { + return false; + } + + // Can't compare them directly since they are random, but dont't affect the structure + if ((g1.repeated_id.empty() && !g2.repeated_id.empty()) || + (!g1.repeated_id.empty() && g2.repeated_id.empty())) { + return false; + } + } + + if (ens1.repeated.size() != ens2.repeated.size()) { + return false; + } + + auto get_sorted_rep = [](const std::map& rep) { + std::vector>> sorted_rep; + + std::transform(rep.begin(), rep.end(), std::back_inserter(sorted_rep), [](const auto& v) { + return v.second.matches; + }); + + for (auto& g : sorted_rep) { + std::sort(g.begin(), g.end(), + [](const auto& a, const auto& b) {return *a.begin() < *b.begin();}); + } + + std::sort(sorted_rep.begin(), sorted_rep.end(), + [](const auto& a, const auto& b) {return *a.front().begin() < *b.front().begin();}); + + return sorted_rep; + }; + + + if (get_sorted_rep(ens1.repeated) != get_sorted_rep(ens2.repeated)) { + return false; + } + + return true; +} + +class ModelGenerator { +public: + ModelGenerator() = default; + + std::shared_ptr get_model_without_repeated_blocks() { + std::shared_ptr input = std::make_shared(ov::element::i32, ov::Shape{1, 1, 40}); + m_nodes.push_back(input); + set_name(input); + + std::shared_ptr res = get_block(input); + + auto result = std::make_shared(res); + m_nodes.push_back(result); + set_name(result); + + ov::ParameterVector params = {input}; + ov::ResultVector results = {result}; + + return std::make_shared(results, params); + } + + std::shared_ptr get_model_with_repeated_blocks() { + // Generate head + std::shared_ptr input = std::make_shared(ov::element::i32, ov::Shape{1, 1, 40}); + m_nodes.push_back(input); + set_name(input); + + std::vector> head(7, nullptr); + head[0] = std::make_shared(input, input); + head[1] = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{2}); + head[2] = std::make_shared(head[0], head[1], true); + head[3] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 4, 10}); + head[4] = std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, 1, 40}); + head[5] = std::make_shared(head[2], head[3], false); + head[6] = std::make_shared(head[5], head[4], false); + + for (const auto& h : head) { + m_nodes.push_back(h); + set_name(h); + } + + // Generate repeated blocks + std::shared_ptr output = get_block(head[6]); + std::vector> outputs; + outputs.push_back(output); + + for (size_t i = 0; i < 9; ++i) { + output = get_block(output); + outputs.push_back(output); + } + + // Generate tail + std::vector> tail(6, nullptr); + tail[0] = std::make_shared(outputs, -1); + tail[1] = std::make_shared(ov::element::i32, ov::Shape{3}, std::vector{1, 20, 20}); + tail[2] = std::make_shared(tail[0], tail[1], false); + tail[3] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1}); + tail[4] = std::make_shared(tail[2], tail[3]); + tail[5] = std::make_shared(tail[4], tail[4]); + + for (const auto& t : tail) { + m_nodes.push_back(t); + set_name(t); + } + + // Create model + auto result = std::make_shared(tail[5]); + m_nodes.push_back(result); + set_name(result); + + ov::ParameterVector params = {input}; + ov::ResultVector results = {result}; + + return std::make_shared(results, params); + } + + std::shared_ptr get_block(const std::shared_ptr& input) { + // Parameters + // input + + // Constants + std::vector> model_c(18, nullptr); + model_c[0] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{0, 2, 1, 3}); + model_c[1] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{1}); + model_c[2] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); + model_c[3] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{2}); + model_c[4] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); + model_c[5] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 1, 1}); + model_c[6] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{1}); + model_c[7] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); + model_c[8] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 1, 1}); + model_c[9] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 2}); + model_c[10] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 1}); + model_c[11] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 2}); + model_c[12] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); + model_c[13] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); + model_c[14] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); + model_c[15] = std::make_shared(ov::element::f32, ov::Shape{40, 40}); + model_c[16] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 4, 10}); + model_c[17] = std::make_shared(ov::element::i32, ov::Shape{3}, std::vector{1, 1, 40}); + + for (const auto& c : model_c) { + m_nodes.push_back(c); + set_name(c); + } + + // Converts + std::vector> convert(3, nullptr); + convert[0] = std::make_shared(model_c[15], ov::element::f16); + convert[1] = std::make_shared(convert[0], ov::element::i32); + convert[2] = std::make_shared(model_c[12], ov::element::i32); + + for (const auto& c : convert) { + m_nodes.push_back(c); + set_name(c); + } + + // Ops + std::vector> op(16, nullptr); + op[0] = std::make_shared(input, convert[1], false, true); + op[1] = std::make_shared(op[0], model_c[16], false); + op[2] = std::make_shared(op[1], model_c[0]); + op[3] = std::make_shared(op[2]); + op[4] = std::make_shared(op[3], model_c[1], model_c[2]); + op[5] = std::make_shared(op[4], model_c[3], true); + op[6] = std::make_shared(op[5]); + op[7] = std::make_shared(model_c[5], model_c[6], op[6], model_c[7]); + op[8] = std::make_shared(op[2], + model_c[8], + op[7], + model_c[9], + std::vector{1, 1, 1, 1}, + std::vector{1, 1, 1, 1}); + op[9] = std::make_shared(op[2], + op[7], + model_c[10], + model_c[11], + std::vector{1, 1, 1, 1}, + std::vector{1, 1, 1, 1}); + op[10] = std::make_shared(op[9], convert[2]); + op[11] = std::make_shared(std::vector>{op[10], op[8]}, -1); + op[12] = std::make_shared(model_c[13], op[11]); + op[13] = std::make_shared(model_c[14], op[2]); + op[14] = std::make_shared(op[13], op[12]); + op[15] = std::make_shared(op[14], model_c[17], false); + + for (const auto& o : op) { + m_nodes.push_back(o); + set_name(o); + } + + return op[15]; + } + +private: + void set_name(const std::shared_ptr& node) { + node->set_friendly_name("node_" + std::to_string(m_name_idx++)); + } + + std::vector> m_nodes; + size_t m_name_idx; +}; + +TEST(OnlinePartitioningTest, Partitioning_IsTheSame_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>(); + auto cfg = ::intel_npu::Config(opt_desc); + ::intel_npu::registerNPUWOptions(*opt_desc); + std::map cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }}; + cfg.update(cfg_map); + + auto ens = ov::npuw::online::buildPartitioning(model, cfg); + + for (size_t i = 0; i < 100; ++i) { + auto ens_again = ov::npuw::online::buildPartitioning(model, cfg); + EXPECT_TRUE(isEqualEns(ens, ens_again)); + } +} + +TEST(OnlinePartitioningTest, Partitioning_IsTheSame_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>(); + auto cfg = ::intel_npu::Config(opt_desc); + ::intel_npu::registerNPUWOptions(*opt_desc); + std::map cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }}; + cfg.update(cfg_map); + + auto ens = ov::npuw::online::buildPartitioning(model, cfg); + + for (size_t i = 0; i < 100; ++i) { + auto ens_again = ov::npuw::online::buildPartitioning(model, cfg); + EXPECT_TRUE(isEqualEns(ens, ens_again)); + } +} + +TEST(OnlinePartitioningTest, Partitioning_SingleGroup_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->singleGroup(); + EXPECT_EQ(snap->graphSize(), 1); +} + +TEST(OnlinePartitioningTest, Partitioning_SingleGroup_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->singleGroup(); + EXPECT_EQ(snap->graphSize(), 1); +} + +TEST(OnlinePartitioningTest, Partitioning_buildGraph_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + auto g = snap->getGraph(); + for (const auto& nh : g->sorted()) { + ov::npuw::online::Group::GPtr group = g->meta(nh).get(); + EXPECT_EQ(group->size(), 1); + } + EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize()); +} + +TEST(OnlinePartitioningTest, Partitioning_buildGraph_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + auto g = snap->getGraph(); + for (const auto& nh : g->sorted()) { + ov::npuw::online::Group::GPtr group = g->meta(nh).get(); + EXPECT_EQ(group->size(), 1); + } + EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize()); +} + +TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + ov::npuw::online::PassContext ctx; + ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}}; + snap->setCtx(ctx); + snap->buildGraph(); + snap->earlyAvoids(); + auto g = snap->getGraph(); + size_t count = 0; + for (const auto& nh : g->sorted()) { + ov::npuw::online::Group::GPtr group = g->meta(nh).get(); + EXPECT_EQ(group->size(), 1); + if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") { + ++count; + } + } + EXPECT_EQ(count, 2); +} + +TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + ov::npuw::online::PassContext ctx; + ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}}; + snap->setCtx(ctx); + snap->buildGraph(); + snap->earlyAvoids(); + auto g = snap->getGraph(); + size_t count = 0; + for (const auto& nh : g->sorted()) { + ov::npuw::online::Group::GPtr group = g->meta(nh).get(); + EXPECT_EQ(group->size(), 1); + if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") { + ++count; + } + } + EXPECT_EQ(count, 20); +} + +TEST(OnlinePartitioningTest, Partitioning_collectLHF_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {10, 10}; + size_t iter = 0; + + snap->repeat([&]{ + snap->collectLHF(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_collectLHF_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {82, 82}; + size_t iter = 0; + + snap->repeat([&]{ + snap->collectLHF(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {10, 10}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseRemnants(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {75, 38, 19, 10}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseRemnants(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {10, 10}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseRemnantsExtended(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {10, 10}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseRemnantsExtended(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseInputs_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {15, 14, 14}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseInputs(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_fuseInputs_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes = {148, 138, 138}; + size_t iter = 0; + + snap->repeat([&]{ + snap->fuseInputs(); + EXPECT_LT(iter, sizes.size()); + EXPECT_EQ(snap->graphSize(), sizes[iter++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes_lhf = {10, 10}; + size_t iter_lhf = 0; + + std::vector sizes_fr = {10, 10}; + size_t iter_fr = 0; + + snap->repeat([&] { + snap->collectLHF(); + EXPECT_LT(iter_lhf, sizes_lhf.size()); + EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]); + }); + snap->repeat([&] { + snap->fuseRemnants(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + std::vector sizes_lhf = {82, 82}; + size_t iter_lhf = 0; + + std::vector sizes_fr = {41, 21, 11, 10, 10}; + size_t iter_fr = 0; + + snap->repeat([&] { + snap->collectLHF(); + EXPECT_LT(iter_lhf, sizes_lhf.size()); + EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]); + }); + snap->repeat([&] { + snap->fuseRemnants(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + + std::vector sizes_fr = {10, 10}; + size_t iter_fr = 0; + + snap->earlyAvoids(); + snap->earlyRegroup(); + snap->repeatedBlocks(); + EXPECT_EQ(snap->graphSize(), 17); + + auto matches = snap->getMatches(); + EXPECT_EQ(matches.size(), 0); + + snap->repeat([&] { + snap->fuseRemnantsExtended(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + snap->buildGraph(); + + + std::vector sizes_fr = {12, 12}; + size_t iter_fr = 0; + + snap->earlyAvoids(); + snap->earlyRegroup(); + snap->repeatedBlocks(); + EXPECT_EQ(snap->graphSize(), 18); + + auto matches = snap->getMatches(); + EXPECT_EQ(matches.size(), 1); + + for (const auto& m : matches) { + EXPECT_EQ(m.second.size(), 17); + for (const auto& layers : m.second) { + EXPECT_EQ(layers.size(), 10); + } + } + + snap->repeat([&] { + snap->fuseRemnantsExtended(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_SmallModel) { + ModelGenerator mg; + auto model = mg.get_model_without_repeated_blocks(); + + auto snap = std::make_shared(model); + + std::vector sizes_fr = {10, 10}; + size_t iter_fr = 0; + + ov::npuw::online::PassContext ctx; + ctx.isolates = {{ov::npuw::online::PatternType::OP, "Transpose", "test_compute"}, {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}}; + ctx.nofolds = {"test_compute"}; + snap->setCtx(ctx); + + snap->buildGraph(); + snap->earlyAvoids(); + snap->earlyRegroup(); + snap->repeatedBlocks(); + EXPECT_EQ(snap->graphSize(), 17); + + auto matches = snap->getMatches(); + EXPECT_EQ(matches.size(), 0); + + snap->repeat([&] { + snap->fuseRemnantsExtended(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} + +TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_RepeatedModel) { + ModelGenerator mg; + auto model = mg.get_model_with_repeated_blocks(); + + auto snap = std::make_shared(model); + + std::vector sizes_fr = {10, 10}; + size_t iter_fr = 0; + + ov::npuw::online::PassContext ctx; + ctx.isolates = {{ov::npuw::online::PatternType::OP, "Gather", "test_compute"}, + {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}, + {ov::npuw::online::PatternType::OP, "ShapeOf", "test_compute"}, + {ov::npuw::online::PatternType::OP, "Divide", "test_compute"}, + {ov::npuw::online::PatternType::OP, "Floor", "test_compute"}}; + ctx.nofolds = {"test_compute"}; + snap->setCtx(ctx); + + snap->buildGraph(); + snap->earlyAvoids(); + snap->earlyRegroup(); + snap->repeatedBlocks(); + EXPECT_EQ(snap->graphSize(), 29); + + // FIXME: create a config in which there will be repeated blocks + auto matches = snap->getMatches(); + EXPECT_EQ(matches.size(), 0); + + snap->repeat([&] { + snap->fuseRemnantsExtended(); + EXPECT_LT(iter_fr, sizes_fr.size()); + EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); + }); +} diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp new file mode 100644 index 00000000000000..1049832f6ead7c --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef HAVE_AVX2 +#include "unpack.hpp" + +namespace { + +const auto TestCases = ::testing::Combine( + ::testing::ValuesIn({ov::element::Type_t::i4}), + ::testing::ValuesIn({ov::element::Type_t::i8, ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test + ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test + ::testing::ValuesIn({3lu, 0lu}), + ::details::ShapesIn({Tensors{input={1, 1, 1, 32};}, + Tensors{input={1,1,1, 128};}, + Tensors{input={1,1,1, 390};}, + Tensors{input={1,1,1, 82};}}), + ::testing::ValuesIn({true, false}), + ::testing::ValuesIn({true, false}) +); + +INSTANTIATE_TEST_SUITE_P(UnpackTests, UnpackTests, + TestCases, + UnpackTests::getTestCaseName); + +const auto TestCasesScale = ::testing::Combine( + ::testing::ValuesIn({ov::element::Type_t::i4}), // TODO: add i8 as input for test + ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}), + ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}), + ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test + ::testing::ValuesIn({3lu, 0lu}), + ::details::ShapesIn({Tensors{input={1,32, 128}; scale = {1, 32, 1};}, + Tensors{input={32, 128}; scale = {32, 1};}, + Tensors{input={64, 160}; scale = {64, 1};}, + Tensors{input={1024, 4}; scale = {64, 1};}, + Tensors{input={1, 1, 1024, 4}; scale = {1, 1, 64, 1};}}), + ::testing::ValuesIn({true, false}), + ::testing::ValuesIn({true, false}) +); + +INSTANTIATE_TEST_SUITE_P(UnpackWithScaleTests, UnpackWithScaleTests, + TestCasesScale, + UnpackWithScaleTests::getTestCaseName); + + +const auto TestCasesScaleAndZeroPoints = ::testing::Combine( + ::testing::ValuesIn({ov::element::Type_t::u4}), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::u4}), + ::testing::ValuesIn({3lu, 0lu}), + ::details::ShapesIn({Tensors{input={1,32, 128}; scale = {1, 32, 1};}, + Tensors{input={1,64, 160}; scale = {1, 64, 1};}, + Tensors{input={1,1024, 4}; scale = {1, 64, 1};}, + Tensors{input={1,1, 1024, 4}; scale = {1, 1, 64, 1};}, + Tensors{input={64, 1}; scale = {64, 1};}}), + ::testing::ValuesIn({true, false}), + ::testing::ValuesIn({true, false}) +); + +INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPoint, UnpackTestsWithScaleAndZeroPoint, + TestCasesScaleAndZeroPoints, + UnpackTestsWithScaleAndZeroPoint::getTestCaseName); + +const auto TestCasesScaleAndZeroPoints2 = ::testing::Combine( + ::testing::ValuesIn({ov::element::Type_t::u4}), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::f32}), + ::testing::ValuesIn({ov::element::Type_t::f32}), + ::testing::ValuesIn({3lu, 0lu}), + ::details::ShapesIn({Tensors{input={32, 32, 64}; scale = {32, 1, 64};}, + Tensors{input={64, 64, 128}; scale = {64, 1, 128};}, + Tensors{input={64, 32, 32}; scale = {64, 1, 32};}}), + ::testing::ValuesIn({true, false}), + ::testing::ValuesIn({true, false}) +); + +INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest2, UnpackTestsWithScaleAndZeroPointTest2, + TestCasesScaleAndZeroPoints2, + UnpackTestsWithScaleAndZeroPointTest2::getTestCaseName); + +const auto TestCasesScaleAndZeroPoints3 = ::testing::Combine( + ::testing::ValuesIn({ov::element::Type_t::u4}), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::u4}), + ::testing::ValuesIn({3lu, 0lu}), + ::details::ShapesIn({Tensors{input={1, 32, 128}; scale = {1, 32, 1}; zerop = {1, 32, 1};}, + Tensors{input={16, 64, 64}; scale = {16, 64, 1}; zerop = {16, 64, 1};}, + Tensors{input={1, 1024, 4}; scale = {1, 64, 1}; zerop = {1, 32, 1};}}), + ::testing::ValuesIn({true, false}), + ::testing::ValuesIn({true, false}) +); + +INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest3, UnpackTestsWithScaleAndZeroPointTest3, + TestCasesScaleAndZeroPoints3, + UnpackTestsWithScaleAndZeroPointTest3::getTestCaseName); + +} // anonymous namespace + +#endif // __AVX2__ diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp new file mode 100644 index 00000000000000..da5bb4e4720f3e --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp @@ -0,0 +1,628 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include + +#include "openvino/runtime/make_tensor.hpp" + +#include "util.hpp" + +namespace { + +#define ASSERT_NO_THROW_WITH_MESSAGE(code) do{ \ + try {\ + code;\ + }catch (const std::exception &ex ) {\ + FAIL()<> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4); +} + +inline int8_t lo4(int8_t x) { + return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0)); +} + +inline uint8_t hi4(uint8_t x) { + return x >> 4; +} + +inline uint8_t lo4(uint8_t x) { + return x & 0x0F; +} + +inline int8_t upc(int8_t h) { + return h | (-((h & (1 << 3)) >> 3) & (-8)); +} + +typedef unsigned short ushort; +typedef unsigned int uint; + +float half_to_float(const ushort x) { + + __m128i halfVector = _mm_cvtsi32_si128(x); + __m128 floatVector = _mm_cvtph_ps(halfVector); + return _mm_cvtss_f32(floatVector); +} + +ushort float_to_half(const float x) { + __m128 floatVector = _mm_set_ss(x); + __m128i halfVector = _mm_cvtps_ph(floatVector, _MM_FROUND_TO_NEAREST_INT); + return _mm_extract_epi16(halfVector, 0); +} + +inline uint16_t int2hfloat(int8_t x) +{ + float inputFl32 = static_cast(x); + float* inputFl32_ptr = &inputFl32; + unsigned int* fltInt32Ptr = reinterpret_cast(inputFl32_ptr); + unsigned int fltInt32 = *fltInt32Ptr; + unsigned short fltInt16; + + fltInt16 = (fltInt32 >> 31) << 5; + unsigned short tmp = (fltInt32 >> 23) & 0xff; + tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); + fltInt16 = (fltInt16 | tmp) << 10; + fltInt16 |= (fltInt32 >> 13) & 0x3ff; + + return fltInt16; +} + + +void unpack(const int8_t* in, int8_t* out, int size) { + for (int i = 0; i < size / 2; i++) { + *(out++) = upc(lo4(*in)); + *(out++) = upc(hi4(*in)); + in++; + } +} + +void unpack_i4f16(const int8_t* in, int8_t* out, int size) { + uint16_t *hFloatOut = reinterpret_cast(out); + + for (int i = 0; i < size / 2; i++) { + *(hFloatOut++) = int2hfloat(upc(lo4(*in))); + *(hFloatOut++) = int2hfloat(upc(hi4(*in))); + in++; + } +} + +/*u4 order*/ +void unpack_u4f32(const int8_t* in, float* out, int size) { + for (int i = 0; i < size / 2; i++) { + *(out++) = static_cast(lo4(*in)); + *(out++) = static_cast(hi4(*in)); + in++; + } +} + +template +::testing::AssertionResult fp16ArraysMatch(const T &actual, + const T &expected, + const T &i4Input, + bool int4 = 1 /*i4 or u4*/){ + for (size_t i = 0; i < expected.size() / 2; ++i) { + + int int8Input[] ={ + details::lo4(i4Input[i / 2]), + details::hi4(i4Input[i / 2]) + }; + + if (int4) { + int8Input[0] = details::upc(int8Input[1]); + int8Input[1] = details::upc(int8Input[0]); + }; + + auto fp16ref = int{*((uint16_t*)expected.data() + i)}; + auto fp16out = int{*((uint16_t*)actual.data() + i)}; + +#define _P(x) std::dec << std::setw(5) << (x) << '(' << std::setw(4) << std::hex << (x) << ')' + if (fp16ref != fp16out) { + return ::testing::AssertionFailure() << std::dec << std::setw(4) << i << ", i4:" + << std::setw(2) << int8Input[i % 2] + << " | ref " << _P(fp16ref) + << ", test " << _P(fp16out) << "\n"; + } +#undef _P + + } + + return ::testing::AssertionSuccess(); +} + +} // namespace details + +using ShapesInitializer = std::function&, std::vector&, std::vector&)>; + + +using UnpackTestsParams = std::tuple< + ov::element::Type_t, // fromPrecision + ov::element::Type_t, // toPrecision + ov::element::Type_t, // scalePrecision + ov::element::Type_t, // zeroPointPrecision + unsigned long, // nPartitions + ShapesInitializer, // input_shape , scale_shape, zerop initializer + bool, // use parallel_for + bool // strict partitioning + >; + +class UnpackTestsBase { +protected: + ov::element::Type fromType; + ov::element::Type toType; + ov::element::Type scaleType; + ov::element::Type zeropType; + std::shared_ptr from, to, scale, zerop; + + std::vector input; + std::vector output; + std::vector ref_output; + std::vector scalesStorage; + std::vector zeropStorage; + float zeropValue; + ov::Shape input_shape; + ov::Shape scale_shape; + ov::Shape zerop_shape; + + size_t nPartitions; + bool useParallelFor = false; + bool strictPartitions = false; + + void make_zeropoints() { + if (zeropType == ov::element::undefined) { + return; + } + + const std::vector zeropValues = {15.0f, 12.0f, 0.0f, 31.0f}; + const size_t nElements = shape_size(zerop_shape); + + // Set zeropValue if there's only one element + if (nElements == 1) { + zeropValue = zeropValues.front(); + } + + // Determine the size of the storage based on the type and resize the storage vector + if (zeropType == ov::element::Type_t::u4) { + zeropStorage.resize((nElements + 1) / 2, 0); // Each u4 zeropoint is 4 bits, so two zeropoints fit in one byte + } else if (zeropType == ov::element::Type_t::f32) { + zeropStorage.resize(nElements * sizeof(float), 0); + } else { + ASSERT_TRUE(zeropType == ov::element::u4 || zeropType == ov::element::f32); + } + + // Fill the storage with the appropriate values + if (zeropType == ov::element::Type_t::u4) { + for (size_t i = 0; i < nElements; ++i) { + uint8_t zeropValueU4 = static_cast(zeropValues[i % zeropValues.size()]) & 0x0F; + size_t byteIndex = i / 2; + if (i % 2 == 0) { + zeropStorage[byteIndex] = zeropValueU4; + } else { + zeropStorage[byteIndex] = (zeropValueU4 << 4); + } + } + } else if (zeropType == ov::element::Type_t::f32) { + float* ptrWork = reinterpret_cast(zeropStorage.data()); + for (size_t i = 0; i < nElements; ++i) { + ptrWork[i] = zeropValues[i % zeropValues.size()]; + } + } + + // Create the tensor + zerop = ov::make_tensor(zeropType, zerop_shape, zeropStorage.data()); + } + + void make_scales() { + if (scaleType == ov::element::undefined) { + return; + } + ASSERT_TRUE(scaleType == ov::element::f16 || scaleType == ov::element::f32); + size_t nElements = shape_size(scale_shape); + + // creating custom scale factors + const size_t nScaleBytes = scaleType.bitwidth() * nElements / 8; + + std::vector sc(nElements); + float coeffTable[] = { + 0.1f, + 0.5f, + 1.f, + 2.f + }; + for (size_t i = 0; i != nElements; i++) { + sc[i] = coeffTable[i % (sizeof (coeffTable) / sizeof(*coeffTable))]; + } + scalesStorage.resize(nScaleBytes); + + if (scaleType == ov::element::f16) { + uint16_t * ptrWork = reinterpret_cast(scalesStorage.data()); + for (size_t i = 0; i != nElements; i++) { + ptrWork[i] = details::float_to_half(sc[i]); + } + } + if (scaleType == ov::element::f32) { + float* ptrWork = reinterpret_cast(scalesStorage.data()); + for (size_t i = 0; i != nElements; i++) { + ptrWork[i] = sc[i]; + } + } + scale = ov::make_tensor(scaleType, scale_shape, scalesStorage.data()); + } + + void make_input() { + + size_t nElements = shape_size(input_shape); + + ASSERT_EQ((fromType.bitwidth() * nElements) % 8, 0) << "Input len has to be byte boundary aligned, but was " + << fromType.bitwidth() * nElements << " bits"; + ASSERT_EQ((toType.bitwidth() * nElements) % 8, 0) << "Output len has to be byte boundary aligned"; + + const size_t nInputBytes = fromType.bitwidth() * nElements / 8; + const size_t nOutputBytes = toType.bitwidth() * nElements / 8; + + input.resize(nInputBytes); + ref_output.resize(nOutputBytes); + output.resize(nOutputBytes); + std::fill(ref_output.begin(), ref_output.end(), 0); + std::fill(output.begin(), output.end(), 0); + + std::array input_local = { + 0x0A, 0x0B, 0x1C, 0x1D, 0x2E, 0x2F, 0x35, 0x36, + 0x4A, 0x4B, 0x5A, 0x5B, 0x6A, 0x6B, 0x7A, 0x7B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x2C, 0x2D, 0x3C, 0x3D, + 0x4C, 0x4D, 0x5C, 0x5D, 0x6C, 0x6D, 0x7C, 0x7D, + }; + + for (size_t idx = 0, k = 0; k < nInputBytes; k++, idx = (idx + 1) % input_local.size()) { + input[k] = input_local[idx]; + } + + from = ov::make_tensor(fromType, input_shape, input.data()); + to = ov::make_tensor(toType, input_shape, output.data()); + } +public: + void SetUp(const UnpackTestsParams & getParam) { + ShapesInitializer shapeInit; + + std::tie(fromType, toType, scaleType, zeropType, nPartitions, shapeInit, useParallelFor, strictPartitions) = getParam; + + std::vector input, scale, zerop; + shapeInit(input, scale, zerop); + + input_shape = ov::Shape{input.begin(), input.end()}; + scale_shape = ov::Shape{scale.begin(), scale.end()}; + if (zerop.empty()) { + zerop_shape = ov::Shape({1}); + } else { + zerop_shape = ov::Shape{zerop.begin(), zerop.end()}; + } + + make_input(); + make_scales(); + make_zeropoints(); + + make_ref_output(); + } + std::string ToString() const { + std::ostringstream result; + result << (isNegative() ? "NEGATIVE_" : "") + <<"["; + + for (size_t i = 0; i != input_shape.size(); i++) { + result << input_shape[i] << ((i + 1 == input_shape.size()) ? "" : "x"); + } + result <<"]" + << "_p" << nPartitions + << (strictPartitions ? "_SP" : "") + << (useParallelFor ? "_parallel" : "_serial") + << "_from_" << fromType + << "_to_" << toType; + if (scaleType != ov::element::Type_t::undefined) + result << "_scale_" << scaleType; + if (zeropType != ov::element::Type_t::undefined) + result << "_zerop_" << zeropType; + + return result.str(); + } + + /** + * Negative test cases has to be carefully reviewed, to still remain positive runs at some points + * @return + */ + virtual bool isNegative() const { + return false; + } + + virtual void make_ref_output() { + size_t nElements = 1; + for (size_t dim : input_shape) { + nElements *= dim; + } + if (toType == ov::element::i8) { + details::unpack(input.data(), ref_output.data(), static_cast(nElements)); + } else if (toType == ov::element::f16) { + details::unpack_i4f16(input.data(), ref_output.data(), static_cast(nElements)); + } + } +}; + +template +class UnpackTestsTmpl : + public ::testing::Test, + public T, + public ::testing::WithParamInterface { +protected: + + void SetUp() override { + T::SetUp(GetParam()); + } +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + T _bt; + _bt.SetUp(obj.param); + return _bt.ToString(); + } +}; + +using UnpackTests = UnpackTestsTmpl; +class UnpackTestsRef : public UnpackTests {}; + +TEST_P(UnpackTests, i4) { + ASSERT_NO_THROW_WITH_MESSAGE(ov::npuw::util::unpack(from, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); + ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input)); +} + +class UnpackWithScaleTestsBase : public UnpackTestsBase { +protected: + bool isNegative() const override { + if (scale_shape.size() != 3 && scale_shape.size() != 2) return true; + if (input_shape.back() % 64) return true; + if ((from->get_size() / scale->get_size()) % 64) return true; + if (toType != ov::element::f16) return true; + + return false; + } + + void make_ref_output() override { + if (isNegative()) return; + + size_t nElements = from->get_size(); + + const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); + + details::unpack_i4f16(input.data(), ref_output.data(), static_cast(nElements)); + + // lets apply per channel scale + uint16_t * pRef = reinterpret_cast(ref_output.data()); + uint16_t * pScale_f16 = reinterpret_cast(scale->data()); + float * pScale_f32 = reinterpret_cast(scale->data()); + + for (size_t i = 0; i < scale->get_size(); i++) { + for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { + float ref_scaled = details::half_to_float(pRef[0]); + if (scaleType == ov::element::f32) { + ref_scaled *= pScale_f32[0]; + } else if (scaleType == ov::element::f16) { + ref_scaled *= details::half_to_float(pScale_f16[0]); + } + *pRef = details::float_to_half(ref_scaled); + pRef++; + } + pScale_f32++; + pScale_f16++; + } + } + +}; + +using UnpackWithScaleTests = UnpackTestsTmpl; + + +TEST_P(UnpackWithScaleTests, i4_scale) { + ASSERT_NO_THROW_IF(!isNegative(), + ov::npuw::util::unpack(from, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); + if (!isNegative()) { + ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input)); + } +} + + +class UnpackTestsWithScaleAndZeroPointBase : public UnpackTestsBase { +protected: + bool isNegative() const override { + if (scale_shape.size() != 3 && scale_shape.size() != 2) return true; + if (input_shape.back() % 64) return true; + + return false; + } + + void make_ref_output() override { + if (isNegative()) return; + + size_t nElements = from->get_size(); + + const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); + + std::vector floatRef(nElements); + details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); + + + // lets apply per channel scale + uint16_t * pRef = reinterpret_cast(ref_output.data()); + float * pFloatRef = reinterpret_cast(floatRef.data()); + const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); + const float * pScale_f32 = reinterpret_cast(scale->data()); + + for (size_t i = 0; i < scale->get_size(); i++) { + for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { + // applying zeropoint + float ref_scaled = *pFloatRef - zeropValue; + + if (scaleType == ov::element::f32) { + ref_scaled *= pScale_f32[0]; + } else if (scaleType == ov::element::f16) { + ref_scaled *= details::half_to_float(pScale_f16[0]); + } + *pRef = details::float_to_half(ref_scaled); + + pFloatRef++; + pRef++; + } + pScale_f32++; + pScale_f16++; + } + } +}; + +using UnpackTestsWithScaleAndZeroPoint = UnpackTestsTmpl; + +TEST_P(UnpackTestsWithScaleAndZeroPoint, u4) { + ASSERT_NO_THROW_IF(!isNegative(), + ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); + if (!isNegative()) { + ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); + } +} + +class UnpackTestsWithScaleAndZeroPoint2 : public UnpackTestsWithScaleAndZeroPointBase { +protected: + bool isNegative() const override { + if (input_shape.back() % 64 || input_shape.size() != 3) return true; + if (scale_shape.back() % 64 || scale_shape.size() != 3) return true; + + return false; + } + + void make_ref_output() override { + if (isNegative()) return; + + size_t nElements = from->get_size(); + const auto from_shape = from->get_shape(); + + const size_t C = from_shape[from_shape.size() - 3]; + const size_t H = from_shape[from_shape.size() - 2]; + const size_t W = from_shape[from_shape.size() - 1]; + + std::vector floatRef(nElements); + details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); + + uint16_t * pRef = reinterpret_cast(ref_output.data()); + float * pFloatRef = reinterpret_cast(floatRef.data()); + const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); + const float * pScale_f32 = reinterpret_cast(scale->data()); + + for (size_t c = 0; c < C; ++c) { + for (size_t h = 0; h < H; ++h) { + for (size_t w = 0; w < W; ++w) { + size_t input_index = w + W * h + W * H * c; + size_t scale_index = w + W * c; + float ref_scaled = pFloatRef[input_index] - zeropValue; + if (scaleType == ov::element::f32) { + ref_scaled *= pScale_f32[scale_index]; + } else if (scaleType == ov::element::f16) { + ref_scaled *= details::half_to_float(pScale_f16[scale_index]); + } + pRef[w + W * h + c * W * H] = details::float_to_half(ref_scaled); + } + } + } + } +}; + +using UnpackTestsWithScaleAndZeroPointTest2 = UnpackTestsTmpl; + +TEST_P(UnpackTestsWithScaleAndZeroPointTest2, u4) { + ASSERT_NO_THROW_IF(!isNegative(), + ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); + if (!isNegative()) { + ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); + } +} + +class UnpackTestsWithScaleAndZeroPoint3 : public UnpackTestsWithScaleAndZeroPointBase { +protected: + bool isNegative() const override { + if (scale_shape.size() != 3 || zerop_shape.size() != 3) return true; + if (input_shape[2] % 64 || input_shape.size() != 3) return true; + + return false; + } + + void make_ref_output() override { + if (isNegative()) return; + + size_t nElements = from->get_size(); + + const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); + + std::vector floatRef(nElements); + details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); + + + // lets apply per channel scale + uint16_t * pRef = reinterpret_cast(ref_output.data()); + const uint8_t* pZer = static_cast(zerop->data()); + float * pFloatRef = reinterpret_cast(floatRef.data()); + const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); + const float * pScale_f32 = reinterpret_cast(scale->data()); + + for (size_t i = 0; i < scale->get_size(); i++) { + float zeroPointValue = static_cast((i % 2 == 0) ? details::lo4(pZer[i / 2]) : details::hi4(pZer[i / 2])); + for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { + // applying zeropoint + float ref_scaled = *pFloatRef - zeroPointValue; + + if (scaleType == ov::element::f32) { + ref_scaled *= pScale_f32[0]; + } else if (scaleType == ov::element::f16) { + ref_scaled *= details::half_to_float(pScale_f16[0]); + } + *pRef = details::float_to_half(ref_scaled); + + pFloatRef++; + pRef++; + } + pScale_f32++; + pScale_f16++; + } + } +}; + +using UnpackTestsWithScaleAndZeroPointTest3 = UnpackTestsTmpl; + +TEST_P(UnpackTestsWithScaleAndZeroPointTest3, u4) { + ASSERT_NO_THROW_IF(!isNegative(), + ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); + if (!isNegative()) { + ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); + } +} + +#define Tensors [](std::vector& input, std::vector&scale, std::vector&zerop) + + +namespace details { +::testing::internal::ParamGenerator::value_type> ShapesIn( + const std::vector& container) { + return ::testing::ValuesIn(container.begin(), container.end()); +} + +} // namespace details +} // anonymous namespace diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index 816b5ce120096c..cdb761dd63b1d4 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit 816b5ce120096cbc115b56ed43f8a030eb420b19 +Subproject commit cdb761dd63b1d47230d501e631a2d725db09ba0d diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp index 84a4be181cf3d6..6c36034301d223 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp @@ -81,12 +81,11 @@ class ConvertOutputFunction : public SnippetsFunctionBase { }; -/// There are 2 subgraphs: Add + Convert(Stub) and Relu /// Tokenized simply by starting subgraph. // in1 in2 in1 in2 -// Add Subgraph -// Convert -> | -// Relu Subgraph +// Add | +// Convert -> Subgraph +// Relu | // Result Result class ConvertStubFunction : public SnippetsFunctionBase { public: diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp index 00143f9f623f4e..1324d380294502 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp @@ -24,10 +24,7 @@ std::shared_ptr ConvertFunction::initOriginal() const { } std::shared_ptr ConvertFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{std::make_shared(indata0, outType)}, - ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -41,13 +38,7 @@ std::shared_ptr ConvertInputFunction::initOriginal() const { std::shared_ptr ConvertInputFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(outType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(outType, data1->get_shape()); - auto convert = std::make_shared(indata0, outType); - auto subgraph = std::make_shared(NodeVector{data0, data1}, - std::make_shared( - NodeVector{std::make_shared(convert, indata1)}, - ParameterVector{indata0, indata1})); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } @@ -61,14 +52,7 @@ std::shared_ptr ConvertOutputFunction::initOriginal() const { std::shared_ptr ConvertOutputFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(inType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(inType, data1->get_shape()); - auto add = std::make_shared(indata0, indata1); - auto convert = std::make_shared(add, outType); - auto subgraph = std::make_shared(NodeVector{data0, data1}, - std::make_shared( - NodeVector{convert}, - ParameterVector{indata0, indata1})); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } @@ -83,17 +67,8 @@ std::shared_ptr ConvertStubFunction::initOriginal() const { std::shared_ptr ConvertStubFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(inType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(inType, data1->get_shape()); - auto add = std::make_shared(indata0, indata1); - auto convert = std::make_shared(add, outType); - auto subgraph0 = std::make_shared( - NodeVector{data0, data1}, std::make_shared(NodeVector{convert}, ParameterVector{indata0, indata1})); - auto indata2 = std::make_shared(convert->get_destination_type(), convert->get_shape()); - auto relu = std::make_shared(indata2); - auto subgraph1 = std::make_shared( - NodeVector{subgraph0}, std::make_shared(NodeVector{relu}, ParameterVector{indata2})); - return std::make_shared(NodeVector{subgraph1}, ParameterVector{data0, data1}); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); + return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } std::shared_ptr ConvertPartialInputsAndResultsFunction::initOriginal() const { @@ -116,12 +91,12 @@ std::shared_ptr ConvertPartialInputsAndResultsFunction::initReference auto indata0 = std::make_shared(inTypes[0], data0->get_shape()); auto indata1 = std::make_shared(inTypes[1], data1->get_shape()); auto indata2 = std::make_shared(inTypes[2], data2->get_shape()); - auto convert0 = std::make_shared(indata0, outTypes[0]); - auto convert1 = std::make_shared(indata1, outTypes[0]); + auto convert0 = std::make_shared(indata0, outTypes[0]); + auto convert1 = std::make_shared(indata1, outTypes[0]); auto add = std::make_shared(convert0, convert1); auto relu = std::make_shared(add); auto sub = std::make_shared(relu, indata2); - auto convert2 = std::make_shared(relu, outTypes[1]); + auto convert2 = std::make_shared(relu, outTypes[1]); auto subgraph = std::make_shared( NodeVector{data0, data1, data2}, std::make_shared(NodeVector{sub, convert2}, ParameterVector{indata0, indata1, indata2})); auto stub3 = createRollAsStub(subgraph); @@ -141,15 +116,7 @@ std::shared_ptr ConvertManyOnInputsFunction::initOriginal() const { } std::shared_ptr ConvertManyOnInputsFunction::initReference() const { auto data0 = std::make_shared(types[0], input_shapes[0]); - auto indata0 = std::make_shared(types[0], data0->get_shape()); - std::shared_ptr out = indata0; - for (auto i = 1; i < types.size(); i++) { - auto convert = std::make_shared(out, types[i]); - out = convert; - } - auto relu = std::make_shared(out); - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{relu}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -165,15 +132,7 @@ std::shared_ptr ConvertManyOnOutputsFunction::initOriginal() const { } std::shared_ptr ConvertManyOnOutputsFunction::initReference() const { auto data0 = std::make_shared(types[0], input_shapes[0]); - auto indata0 = std::make_shared(types[0], data0->get_shape()); - auto relu = std::make_shared(indata0); - std::shared_ptr out = relu; - for (auto i = 1; i < types.size(); i++) { - auto convert = std::make_shared(out, types[i]); - out = convert; - } - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{out}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -194,20 +153,7 @@ std::shared_ptr ConvertManyOnInputOutputFunction::initOriginal() cons } std::shared_ptr ConvertManyOnInputOutputFunction::initReference() const { auto data0 = std::make_shared(inTypes[0], input_shapes[0]); - auto indata0 = std::make_shared(inTypes[0], data0->get_shape()); - std::shared_ptr out = indata0; - for (auto i = 1; i < inTypes.size(); i++) { - auto convert = std::make_shared(out, inTypes[i]); - out = convert; - } - auto relu = std::make_shared(data0); - out = relu; - for (auto i = 0; i < outTypes.size(); i++) { - auto convert = std::make_shared(out, outTypes[i]); - out = convert; - } - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{out}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } } // namespace snippets diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp index c47cb754b5b891..1dbf8d7d22ed26 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp @@ -393,8 +393,8 @@ std::shared_ptr MHASelectFunction::initOriginal() const { // Value is equal to '1' - to avoid situation e^(-1000) / (sum(e^(-1000)) = 0/0 = NAN auto selectConst = ov::op::v0::Constant::create(precisions[2], ov::Shape{1}, std::vector{1}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -531,8 +531,8 @@ std::shared_ptr MHAWOTransposeOnInputsFunction::initOriginal() const auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape({4}), std::vector{0, 2, 1, 3}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto mulConst = ov::test::utils::make_constant(precision, ov::Shape({1})); const auto mul = std::make_shared(param1, mulConst); const auto matMul0 = std::make_shared(param0, mul, transA, transB); @@ -550,8 +550,8 @@ std::shared_ptr MHAWOTransposeFunction::initOriginal() const { auto param2 = std::make_shared(precisions[2], input_shapes[2]); ov::ParameterVector ngraphParam = {param0, param1, param2}; - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto matMul0 = std::make_shared(param0, param1, transA, transB); const auto softmax = std::make_shared(matMul0, -1); const auto matMul1 = std::make_shared(softmax, param2, transA, transB); @@ -615,8 +615,8 @@ std::shared_ptr MHAFQAfterMatMulFunction::initOriginal() const { static_cast(input_shapes[0].get_shape()[1])}; auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -665,8 +665,8 @@ std::shared_ptr MHAINT8MatMulFunction::initOriginal() const { {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq2 = ov::test::utils::make_fake_quantize(transpose2Param, ov::element::f32, 256, {1}, {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -756,8 +756,8 @@ std::shared_ptr MHAFQFunction::initOriginal() const { const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, {-1000}, {0}, {-1000}, {0}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); @@ -806,12 +806,12 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); const auto fq_signed_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {-36912.66015625}, {36624.28125}, {-128}, {127}, ov::element::i8); - const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose0Param, ov::element::i8, fq_signed_params); - const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose1Param, ov::element::i8, fq_signed_params); - const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose2Param, ov::element::i8, fq_signed_params); + const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose0Param, ov::element::f32, fq_signed_params); + const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose1Param, ov::element::f32, fq_signed_params); + const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose2Param, ov::element::f32, fq_signed_params); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared>( @@ -820,7 +820,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons ov::op::TemporaryReplaceOutputType(transpose0, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose1, element::f32).get(), transA, transB); - const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::i8, fq_signed_params); + const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::f32, fq_signed_params); const auto add = std::make_shared>( std::vector{ element::f32, element::f32 }, std::vector{ element::f32 }, @@ -833,12 +833,12 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons ov::op::TemporaryReplaceOutputType(add, element::f32).get(), ov::op::TemporaryReplaceOutputType(deq, element::f32).get()); - const auto reshape0 = std::make_shared(add, reshape0Const, true); + const auto reshape0 = std::make_shared(deq_mul, reshape0Const, true); const auto softMax = std::make_shared(reshape0, 1); const auto reshape1 = std::make_shared(softMax, reshape1Const, true); const auto fq_unsigned_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {0}, {0.245}, {0}, {255}, ov::element::u8); - const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::u8, fq_unsigned_params); + const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::f32, fq_unsigned_params); const auto transpose2 = std::make_shared(fq2, transpose2Const); const auto matMul1 = std::make_shared>( @@ -846,7 +846,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons std::vector{ element::f32 }, ov::op::TemporaryReplaceOutputType(fq4, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose2, element::f32).get(), transA, transB); - const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::i8, fq_signed_params); + const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::f32, fq_signed_params); const auto transpose3 = std::make_shared(fq5, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -860,9 +860,9 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::ParameterVector ngraphParams = {data0, data1, data2, data3}; const auto fq_signed_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {-36912.66015625}, {36624.28125}, {-128}, {127}, ov::element::i8); - const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data0, ov::element::i8, fq_signed_params); - const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data1, ov::element::i8, fq_signed_params); - const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data3, ov::element::i8, fq_signed_params); + const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data0, ov::element::f32, fq_signed_params); + const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data1, ov::element::f32, fq_signed_params); + const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data3, ov::element::f32, fq_signed_params); NodeVector subgraph_inputs = {fq0, fq1, data2, fq2}; auto transpose0Param = std::make_shared(precision, input_shapes[0]); @@ -877,19 +877,8 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared>( @@ -898,7 +887,18 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::op::TemporaryReplaceOutputType(transpose0, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose1, element::f32).get(), transA, transB); - const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::i8, fq_signed_params); + auto decomposed_fq = + [](const ov::Output& input, const ov::element::Type& out_precision, float il, float ih, float scale) { + const auto input_low = ov::op::v0::Constant::create(ov::element::f32, {1}, {il}); + const auto input_high = ov::op::v0::Constant::create(ov::element::f32, {1}, {ih}); + const auto output_scale = ov::op::v0::Constant::create(ov::element::f32, {1}, {scale}); + const auto max = std::make_shared(input, input_low); + const auto min = std::make_shared(max, input_high); + const auto mul = std::make_shared(min, output_scale); + return std::make_shared(mul, out_precision); + }; + + const auto fq3 = decomposed_fq(matMul0, ov::element::i8, fq_signed_params.inputLowValues[0], fq_signed_params.inputHighValues[0], 0.00346764503f); const auto add = std::make_shared>( std::vector{ element::f32, element::f32 }, std::vector{ element::f32 }, @@ -911,12 +911,8 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::op::TemporaryReplaceOutputType(add, element::f32).get(), ov::op::TemporaryReplaceOutputType(deq, element::f32).get()); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); - - const auto fq_unsigned_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {0}, {0.245}, {0}, {255}, ov::element::u8); - const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::u8, fq_unsigned_params); + const auto softMax = std::make_shared(deq_mul, 3); + const auto fq4 = decomposed_fq(softMax, ov::element::u8, 0.f, 0.245f, 1040.81628f); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); const auto matMul1 = std::make_shared>( @@ -924,7 +920,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con std::vector{ element::f32 }, ov::op::TemporaryReplaceOutputType(fq4, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose2, element::f32).get(), transA, transB); - const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::i8, fq_signed_params); + const auto fq5 = decomposed_fq(matMul1, ov::element::i8, fq_signed_params.inputLowValues[0], fq_signed_params.inputHighValues[0], 0.00346764503f); auto subgraph = std::make_shared(subgraph_inputs, std::make_shared(NodeVector{fq5}, subgraph_params)); @@ -946,8 +942,8 @@ std::shared_ptr MHAMulAddFunction::initOriginal() const { auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{input_shapes[2].size()}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{input_shapes[2].size()}, std::vector{0, 2, 1, 3}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp index 67a1382b4f658a..12758a90c07652 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp @@ -20,11 +20,7 @@ std::shared_ptr AddFunction::initOriginal() const { std::shared_ptr AddFunction::initReference() const { auto data0 = std::make_shared(precision, input_shapes[0]); auto data1 = std::make_shared(precision, input_shapes[1]); - auto indata0 = std::make_shared(precision, data0->get_shape()); - auto indata1 = std::make_shared(precision, data1->get_shape()); - auto add = std::make_shared(NodeVector{data0, data1}, - std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, - ParameterVector{indata0, indata1})); + auto add = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{add}, ParameterVector{data0, data1}); } std::shared_ptr ExpFunction::initOriginal() const { @@ -78,7 +74,7 @@ std::shared_ptr EltwiseFunction::initReference() const { auto indata1 = std::make_shared(precision, data1->get_shape()); auto indata2 = std::make_shared(precision, data1->get_shape()); auto add = std::make_shared(indata0, indata1); - auto sub = std::make_shared(add, const_data); + auto sub = std::make_shared(add, indata2); auto mul = std::make_shared(NodeVector{data0, data1, const_data}, std::make_shared(NodeVector{std::make_shared(add, sub)}, ParameterVector{indata0, indata1, indata2})); @@ -151,10 +147,10 @@ std::shared_ptr MatMulEltwiseBranchesFunction::initReference() const const std::vector const_values = ov::test::utils::generate_float_numbers(4, -10., 10.); // snippet inputs auto non_snippet_op = std::make_shared(sinh_1, sinh_2); - auto mul_const_1 = std::make_shared(precision, Shape{1}, const_values[0]); - auto add_const_1 = std::make_shared(precision, Shape{1}, const_values[1]); - auto mul_const_2 = std::make_shared(precision, Shape{1}, const_values[2]); - auto sub_const_2 = std::make_shared(precision, Shape{1}, const_values[3]); + auto mul_const_1 = std::make_shared(precision, Shape{1}, const_values[0]); + auto add_const_1 = std::make_shared(precision, Shape{1}, const_values[1]); + auto mul_const_2 = std::make_shared(precision, Shape{1}, const_values[2]); + auto sub_const_2 = std::make_shared(precision, Shape{1}, const_values[3]); // snippet function Shape matMulOutShape = input_shapes[0].get_shape(); diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index a2f54076de9d7f..5bf019db3c131e 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -5,17 +5,18 @@ import warnings from copy import deepcopy import os - +import torch +import pytest +import logging import numpy as np + from common.constants import test_device, test_precision from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder - from openvino.frontend import FrontEndManager from openvino.runtime import Core, Type, PartialShape import openvino.properties.hint as hints -import torch -from packaging import version -import pytest + +logging.basicConfig(level=logging.DEBUG) def skip_check(param): @@ -124,13 +125,9 @@ def numpy_to_torch_recursively(x): from torch.export import export em = export(model, tuple(torch_inputs)) - if version.parse(torch.__version__) >= version.parse("2.3"): - em = em.run_decompositions() - gm = em.module() - print(gm.code) converted_model = convert_model( - em, example_input=torch_inputs) + em, example_input=torch_inputs, verbose=True) self._resolve_input_shape_dtype( converted_model, ov_inputs, dynamic_shapes) smodel = model @@ -242,7 +239,7 @@ def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_i if not dynamic_shapes: input_shapes = [inp.shape for inp in ov_inputs] kwargs["input"] = input_shapes - om = convert_model(decoder, **kwargs) + om = convert_model(decoder, verbose=True, **kwargs) self._resolve_input_shape_dtype(om, ov_inputs, dynamic_shapes) return smodel, om diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py index 1cf458500bcc71..a400f6dcd76d17 100644 --- a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py +++ b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py @@ -4,6 +4,8 @@ import numpy as np import pytest import torch +from packaging import version + from pytorch_layer_test_class import PytorchLayerTest, skip_if_export @@ -69,7 +71,7 @@ def forward_not_out(self, tensor_a, out): ) @pytest.mark.parametrize("out", [False, skip_if_export(True)]) def test_bitwise_mixed_dtypes( - self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version + self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version ): if ie_device == "GPU" and (lhs_dtype != "bool" or rhs_dtype != "bool"): pytest.xfail(reason="bitwise ops are not supported on GPU") diff --git a/tests/layer_tests/pytorch_tests/test_col2im.py b/tests/layer_tests/pytorch_tests/test_col2im.py index 8cb7ea96cb8391..1dc44557c359fb 100644 --- a/tests/layer_tests/pytorch_tests/test_col2im.py +++ b/tests/layer_tests/pytorch_tests/test_col2im.py @@ -40,6 +40,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("output_size,kernel_size", [([4, 5], [2, 2])]) @pytest.mark.parametrize("dilation", [1, 2, [1, 2]]) @pytest.mark.parametrize("padding", [0, 5, [2, 3]]) diff --git a/tests/layer_tests/pytorch_tests/test_eye.py b/tests/layer_tests/pytorch_tests/test_eye.py index 37b850088844cd..f93e77a8b2844a 100644 --- a/tests/layer_tests/pytorch_tests/test_eye.py +++ b/tests/layer_tests/pytorch_tests/test_eye.py @@ -3,6 +3,7 @@ import pytest import torch +from packaging import version from pytorch_layer_test_class import PytorchLayerTest @@ -14,7 +15,6 @@ def _prepare_input(self, m, n=None): return (np.array(m, dtype="int32"), ) return (np.array(m, dtype="int32"), np.array(n, dtype="int32")) - def create_model(self, num_inputs, dtype): import torch dtype_map = { @@ -45,29 +45,31 @@ def __init__(self, dtype): def forward(self, x, y): return torch.eye(x, y, dtype=self.dtype) - - ref_net = None - - return aten_eye_1_input(pt_dtype) if num_inputs == 1 else aten_eye_2_inputs(pt_dtype), ref_net, ("aten::eye", "aten::IntImplicit") + model = aten_eye_1_input(pt_dtype) if num_inputs == 1 else aten_eye_2_inputs(pt_dtype) + return model, None, ["aten::eye", "aten::IntImplicit"] @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.parametrize("dtype", ["bool", "int8", "uint8", "int32", "int64", "float32", "float64"]) @pytest.mark.parametrize("m", [2, 3, 4, 5]) - @pytest.mark.skipif(torch.__version__ < '2.3.0', reason="`aten.eye` is not supported in PyTorch versions earlier than 2.3.") def test_eye_square(self, dtype, m, ie_device, precision, ir_version): + if PytorchLayerTest.use_torch_export() and version.parse(torch.__version__) < version.parse("2.3"): + pytest.skip("Not supported in PyTorch versions earlier than 2.3.") if ie_device == "GPU": pytest.xfail(reason="eye is not supported on GPU") - self._test(*self.create_model(1, dtype), ie_device, precision, ir_version, kwargs_to_prepare_input={"m": m}) + self._test(*self.create_model(1, dtype), ie_device, precision, + ir_version, kwargs_to_prepare_input={"m": m}) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.parametrize("dtype", ["bool", "int8", "uint8", "int32", "int64", "float32", "float64"]) @pytest.mark.parametrize(("m", "n"), [[2, 2], [3, 4], [5, 3]]) - @pytest.mark.skipif(torch.__version__ < '2.3.0', reason="`aten.eye` is not supported in PyTorch versions earlier than 2.3.") def test_eye(self, dtype, m, n, ie_device, precision, ir_version): + if (PytorchLayerTest.use_torch_export() and version.parse(torch.__version__) < version.parse("2.3")): + pytest.skip("Not supported in PyTorch versions earlier than 2.3.") if ie_device == "GPU": pytest.xfail(reason="eye is not supported on GPU") - self._test(*self.create_model(2, dtype), ie_device, precision, ir_version, kwargs_to_prepare_input={"m": m, "n": n}) + self._test(*self.create_model(2, dtype), ie_device, precision, + ir_version, kwargs_to_prepare_input={"m": m, "n": n}) diff --git a/tests/layer_tests/pytorch_tests/test_fork_wait.py b/tests/layer_tests/pytorch_tests/test_fork_wait.py new file mode 100644 index 00000000000000..577d20aba83afc --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_fork_wait.py @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestForkWait(PytorchLayerTest): + + def _prepare_input(self): + return (np.random.randn(10, 20),) + + def create_model(self): + + class AddMod(torch.nn.Module): + def forward(self, a: torch.Tensor, b: int): + return a + b, a - b + + class Mod(torch.nn.Module): + def __init__(self): + super().__init__() + self.mod = AddMod() + + def forward(self, input): + fut = torch.jit.fork(self.mod, a=input, b=2) + return torch.jit.wait(fut) + + return Mod(), None, ["prim::fork", "aten::wait"] + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize(("to_trace"), [True, False]) + def test_fork_wait(self, to_trace, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, + ir_version, trace_model=to_trace) diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_dot.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_dot.py index fdb3b17ba33b68..ce0251ce860b65 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_dot.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_dot.py @@ -44,9 +44,7 @@ def create_keras_dot_net(self, input_shapes, axes, input_type, normalize): ([[5, 1, 5, 2], [5, 1, 2, 5]], (2, 3)), ([[5, 1, 102, 2], [5, 1, 4, 102]], (-2, -1)) ]) - @pytest.mark.parametrize('input_type', [np.float32, np.float64, - np.int8, np.int16, np.int32, np.int64, - np.uint8, np.uint16, np.uint32, np.uint64]) + @pytest.mark.parametrize('input_type', [np.float32, np.float64]) @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.nightly @pytest.mark.precommit diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py b/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py index f0f9085d32ba2f..e982867c9ac08d 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ExpandDims.py @@ -6,6 +6,7 @@ import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest +rng = np.random.default_rng(62362) class TestExpandDims(CommonTFLayerTest): def _prepare_input(self, inputs_info): @@ -40,3 +41,54 @@ def test_expand_dims_basic(self, params, ie_device, precision, ir_version, temp_ self._test(*self.create_expand_dims_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) + + +class TestExpandDimsComplex(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + # generate elements so that the input tensor may contain repeating elements + assert 'param_real:0' in inputs_info + assert 'param_imag:0' in inputs_info + + input_shape = inputs_info['param_real:0'] + + inputs_data = {} + inputs_data['param_real:0'] = rng.integers(-10.0, 10.0, input_shape).astype(np.float32) + inputs_data['param_imag:0'] = rng.integers(-10.0, 10.0, input_shape).astype(np.float32) + + return inputs_data + + def create_expand_dims_complex_net(self, axis_dtype, input_shape, axis): + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real') + param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag') + + complex = tf.raw_ops.Complex(real=param_real, imag=param_imag) + + axis = tf.constant(axis, dtype=axis_dtype) + + result = tf.raw_ops.ExpandDims(input=complex, axis=axis) + + tf.raw_ops.Real(input=result) + tf.raw_ops.Imag(input=result) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_basic = [ + dict(input_shape=[], axis=0), + dict(input_shape=[2, 3], axis=1), + dict(input_shape=[2, 3, 4], axis=-1), + dict(input_shape=[2, 6, 5], axis=-2), + ] + + @pytest.mark.parametrize("axis_dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("op_args", test_basic) + @pytest.mark.nightly + @pytest.mark.precommit + def test_expand_dims_basic_complex(self, axis_dtype, op_args, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + self._test(*self.create_expand_dims_complex_net(axis_dtype, **op_args), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) diff --git a/tests/model_hub_tests/pytorch/torch_utils.py b/tests/model_hub_tests/pytorch/torch_utils.py index 09826b058c7855..5b351c6317e9bd 100644 --- a/tests/model_hub_tests/pytorch/torch_utils.py +++ b/tests/model_hub_tests/pytorch/torch_utils.py @@ -75,7 +75,10 @@ def convert_model_impl(self, model_obj): pt_res = model_obj(**self.example) graph = export(model_obj, tuple(), self.example) if version.parse(torch.__version__) >= version.parse("2.2"): - graph = graph.run_decompositions() + from torch._decomp import get_decompositions + from openvino.frontend.pytorch.torchdynamo.decompositions import get_export_decomposition_list + decomp = get_decompositions(get_export_decomposition_list()) + graph = graph.run_decompositions(decomp_table=decomp) gm = graph.module() print(gm.code) diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index b82e0c76409057..0bda286eb83252 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -1,10 +1,14 @@ +# test ovc with NumPy 2.x on Ubuntu 24 with default Python 3.12 +# test against NumPy 1.x with older Python versions # optimum still requires numpy<2.0.0 -numpy==1.26.4 +numpy==1.26.4; python_version < "3.12" +numpy==2.1.1; python_version >= "3.12" torch==2.4.1; platform_system != "Darwin" or platform_machine != "x86_64" -torch==2.2.0; platform_system == "Darwin" and platform_machine == "x86_64" +torch==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" --extra-index-url https://download.pytorch.org/whl/cpu -torchvision==0.19.1 +torchvision==0.19.1; platform_system != "Darwin" or platform_machine != "x86_64" +torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64" # transformers 4.45.1 is available # but optimum still requires <4.45.0 transformers==4.44.2 @@ -13,30 +17,30 @@ pytest-html==4.1.1 pytest-xdist[psutil]==3.6.1 defusedxml==0.7.1 -auto-gptq==0.7.1; platform_system == "Linux" and platform_machine == "x86_64" +auto-gptq==0.7.1; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12" av==13.0.0 -basicsr==1.4.2 +basicsr==1.4.2; python_version < "3.12" datasets==3.0.1 easyocr==1.7.2 -facexlib==0.3.0 -librosa==0.10.2 -optimum==1.22.0 +facexlib==0.3.0; python_version < "3.12" +librosa==0.10.2; python_version < "3.12" packaging==24.1 pandas==2.2.3 protobuf==5.28.2 -pyctcdecode==0.5.0 +pyctcdecode==0.5.0; python_version < "3.12" sacremoses==0.1.1 sentencepiece==0.2.0 soundfile==0.12.1 -super-image==0.1.7 +super-image==0.1.7; python_version < "3.12" timm==1.0.8 -torchaudio==2.4.1 +torchaudio==2.4.1; platform_system != "Darwin" or platform_machine != "x86_64" +torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" wheel==0.44.0 PyYAML==6.0.2 kornia==0.7.3 # use latest released version once it's available -git+https://github.com/huggingface/optimum-intel.git@main +git+https://github.com/huggingface/optimum-intel.git@main; python_version < "3.12" # set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer hf_transfer==0.1.8 diff --git a/tests/requirements_tensorflow b/tests/requirements_tensorflow index 9d025397ed1fbd..c29aa777fda537 100644 --- a/tests/requirements_tensorflow +++ b/tests/requirements_tensorflow @@ -4,7 +4,10 @@ pytest==7.0.1 pytest-xdist[psutil]==3.6.1 pytest-html==4.1.1 transformers==4.45.1 -tensorflow==2.17.0 +# install exact keras version since tensorflow depends and has no upper bound for it +keras==3.6.0 +tensorflow==2.17.0; platform_system != "Darwin" or platform_machine != "x86_64" +tensorflow==2.16.2; platform_system == "Darwin" and platform_machine == "x86_64" # tensorflow-text is not available for both Windows and ARM platforms tensorflow-text==2.17.0; platform_system == "Linux" and platform_machine == "x86_64" tensorflow-hub==0.16.1 diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index b79b24e9ce76a3..dfe25f27d13d7d 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -40,15 +40,20 @@ def extract_module_extensions(args): except: pass if not is_good_version: - raise RuntimeError( - "NNCF models produced by nncf<2.6 are not supported directly. Please upgrade nncf or export to ONNX first.") + raise RuntimeError("NNCF models produced by nncf<2.6 are not " + "supported directly. Please upgrade nncf or " + "export to ONNX first.") inputs = prepare_torch_inputs(example_inputs) if not isinstance(model, (TorchScriptPythonDecoder, TorchFXPythonDecoder)): if hasattr(torch, "export") and isinstance(model, (torch.export.ExportedProgram)): from packaging import version if version.parse(torch.__version__) >= version.parse("2.2"): - model = model.run_decompositions() + from torch._decomp import get_decompositions + from openvino.frontend.pytorch.torchdynamo.decompositions import get_export_decomposition_list + decomp = get_decompositions(get_export_decomposition_list()) + model = model.run_decompositions(decomp_table=decomp) gm = model.module() + log.debug(gm.code) decoder = TorchFXPythonDecoder(gm) else: decoder = TorchScriptPythonDecoder(