diff --git a/.github/actions/handle_docker/action.yml b/.github/actions/handle_docker/action.yml index a0aa99c1442d5b..f79bc06afa9f5c 100644 --- a/.github/actions/handle_docker/action.yml +++ b/.github/actions/handle_docker/action.yml @@ -27,17 +27,17 @@ runs: using: 'composite' steps: - name: Checkout head - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Checkout base - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: ref: ${{ github.base_ref || github.event.merge_group.base_ref }} sparse-checkout: ${{ inputs.dockerfiles_root_dir }}/docker_tag path: base - name: Install Python dependencies - uses: py-actions/py-dependency-install@v4 + uses: py-actions/py-dependency-install@30aa0023464ed4b5b116bd9fbdab87acf01a484e # v4.1.0 with: path: "${{ github.action_path }}/requirements.txt" update-setuptools: "false" @@ -45,7 +45,10 @@ runs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.1 + with: + driver-opts: | + image=${{ inputs.registry }}/dockerio/moby/buildkit:buildx-stable-1 - name: Handle docker images id: handle_images @@ -62,7 +65,7 @@ runs: --base_tag_file "base/${{ inputs.dockerfiles_root_dir }}/docker_tag" \ --docker_env_changed "${{ fromJSON(inputs.changed_components).docker_env }}" \ --dockerfiles_changed "${{ fromJSON(inputs.changed_components).dockerfiles }}" \ - --docker_builder "${{ steps.buildx.outputs.name}}" \ + --docker_builder "${{ steps.buildx.outputs.name }}" \ --repo "${{ github.repository }}" \ --ref_name "${{ github.ref_name }}" \ $([[ -n $pr ]] && echo "--pr $pr" || echo '-s ${{ github.sha }}') \ diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml index 0d9138bc643d2a..80e22866170535 100644 --- a/.github/actions/setup_python/action.yml +++ b/.github/actions/setup_python/action.yml @@ -56,7 +56,7 @@ runs: - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64' && steps.check_python.outputs.installed == 'false' ) }} name: Setup Python ${{ inputs.version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ inputs.version }} env: diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml index cd111d617ddc1b..ec65d2f1e9e82c 100644 --- a/.github/actions/smart-ci/action.yml +++ b/.github/actions/smart-ci/action.yml @@ -30,7 +30,6 @@ inputs: components_config_schema: description: "Path to the schema file for components configuration" required: false - default: ".github/actions/smart-ci/components_schema.yml" labeler_config: description: "Path to labeler configuration file" required: false @@ -66,7 +65,7 @@ runs: using: "composite" steps: - name: Wait for labeler to finish - uses: lewagon/wait-on-check-action@v1.3.1 + uses: lewagon/wait-on-check-action@ccfb013c15c8afb7bf2b7c028fb74dc5a068cccc # v1.3.4 if: ${{ github.event_name == 'pull_request' }} with: ref: ${{ github.event.pull_request.head.sha }} @@ -75,13 +74,13 @@ runs: wait-interval: 10 - name: checkout components file - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: sparse-checkout: .github/components.yml sparse-checkout-cone-mode: false - name: Install Python dependencies - uses: py-actions/py-dependency-install@v4 + uses: py-actions/py-dependency-install@30aa0023464ed4b5b116bd9fbdab87acf01a484e # v4.1.0 with: path: "${{ github.action_path }}/requirements.txt" update-setuptools: "false" @@ -101,7 +100,7 @@ runs: -f "${{ inputs.ref_name }}" \ -p "${{ inputs.component_pattern }}" \ -c "${{ inputs.components_config }}" \ - -m "${{ inputs.components_config_schema }}" \ + -m "${{ inputs.components_config_schema || env.DEFAULT_CONFIG_SCHEMA }}" \ -l "${{ inputs.labeler_config }}" \ --enable_for_org "${{ inputs.enable_for_org }}" \ --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \ @@ -109,3 +108,4 @@ runs: shell: bash env: GITHUB_TOKEN: ${{ inputs.repo_token }} + DEFAULT_CONFIG_SCHEMA: "${{ github.action_path }}/components_schema.yml" diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 56faa37d1da67f..5a4f7795ea4a44 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-26993 \ No newline at end of file +pr-27384 diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index a667a07da5bd3e..1cdb2023784979 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -135,6 +135,7 @@ jobs: -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ -DENABLE_LTO=ON \ -DENABLE_PYTHON=OFF \ + -DENABLE_TESTS=ON \ -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_GENAI_REPO }} \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 6623f5ea182da1..8c78375e61769c 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -29,7 +29,7 @@ jobs: packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9 version: 3.0 - - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 id: cp310 with: python-version: '3.10' @@ -41,7 +41,7 @@ jobs: - name: Install python dependencies run: | python3 -m pip install -r docs/requirements.txt - (cd docs/openvino_sphinx_theme && python3 setup.py install) + (cd docs/openvino_sphinx_theme && python3 -m pip install .) python3 -m pip install docs/openvino_custom_sphinx_sitemap - name: Download and install doxygen diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fdb41226f4efb8..db5ba3de1a3c85 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Setup python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10.10' architecture: 'x64' diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index d87de4257e0270..9797414cde56c8 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -25,7 +25,56 @@ env: TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: + + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg' + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_20_04_x64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + Build: + needs: Docker timeout-minutes: 150 defaults: run: @@ -33,7 +82,10 @@ jobs: runs-on: aks-linux-16-cores-32gb if: ${{ github.repository_owner == 'openvinotoolkit' }} container: - image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }} + volumes: + - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' @@ -43,13 +95,16 @@ jobs: OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib BUILD_DIR: /__w/openvino/openvino/openvino_build COVERITY_TOOL_DIR: /__w/openvino/openvino/coverity_tool + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_ERROR_LOG: /__w/openvino/sccache_log.txt + SCCACHE_LOG: warn + SCCACHE_AZURE_KEY_PREFIX: coverity_lin + SCCACHE_CACHE_SIZE: 50G steps: - - name: Install git - run: | - apt-get update - apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: @@ -86,7 +141,9 @@ jobs: # # Build # - + - name: Clean sccache stats + run: ${SCCACHE_PATH} --zero-stats + - name: CMake configure - OpenVINO run: | cmake \ @@ -108,12 +165,13 @@ jobs: - name: Cmake build - OpenVINO with Coverity run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-build --dir ${BUILD_DIR}/cov-int cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Pack Artefacts - run: | - pushd ${BUILD_DIR} - tar -cvf - cov-int | pigz > openvino.tgz - popd + run: tar -cvf - cov-int | pigz > openvino.tgz + working-directory: ${{ env.BUILD_DIR }} - name: Submit artefacts run: | diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index b9862eac09cc05..195abbbd5fb0f9 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -74,7 +74,7 @@ jobs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input TZ: "Europe/London" # to prevent tzdata from waiting user input - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ env.PYTHON_VERSION }} diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index 6097d3e6f18bc4..ecb278fdb54ca3 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -52,7 +52,7 @@ jobs: - name: Setup Node ${{ env.NODE_VERSION }} if: runner.os != 'Linux' # Node is already installed in the Docker image - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 1dd6ebbfc204d8..64be9ef4bbcc44 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -111,10 +111,7 @@ jobs: run: | # To enable pytest parallel features python3 -m pip install pytest-xdist[psutil] - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt - - # TODO: replace with Python API tests requirements + python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt python3 -m pip install -r ${INSTALL_TEST_DIR}/mo/requirements_dev.txt # @@ -158,6 +155,9 @@ jobs: - name: Install Python Layer tests dependencies run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt + # layer test requirements python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt @@ -267,7 +267,7 @@ jobs: if: ${{ fromJSON(inputs.affected-components).Python_API.test }} run: | python3 -m pip uninstall -y numpy - python3 -m pip install "numpy>=2.0.0,<2.1.0" + python3 -m pip install "numpy>=2.0.0,<2.2.0" python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 14243bda13531a..5c5e59aa3bec97 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -20,12 +20,15 @@ on: description: 'Components that are affected by changes in the commit defined by the Smart CI Action' type: string required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true permissions: read-all env: PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: @@ -63,10 +66,10 @@ jobs: if: runner.os == 'macOS' run: brew install pigz - - name: Setup Python ${{ env.PYTHON_VERSION }} + - name: Setup Python ${{ inputs.python-version }} uses: ./.github/actions/setup_python with: - version: ${{ env.PYTHON_VERSION }} + version: ${{ inputs.python-version }} pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} should-setup-pip-paths: ${{ runner.os == 'Linux' }} self-hosted-runner: ${{ runner.os == 'Linux' }} diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 67cd4d0d1a5d84..0af30621a2a7fd 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -146,6 +146,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 7c47c1c635c2f8..5492ad40aa17b4 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -258,6 +258,7 @@ jobs: runner: 'macos-13' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 81cd229d1dd9f6..8100b74734ab17 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -258,6 +258,7 @@ jobs: runner: 'macos-13-xlarge' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 2405103755b552..f48986d4a0d304 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -27,7 +27,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index 7d18643def3ce6..4d69563a741d3a 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index 13ddbaaa1ec41c..caed37eee89056 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.9' diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 753708d9b3ba51..92178fce7f5054 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -491,6 +491,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS iGPU: diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index 9d9aba6739f22f..d874e06a189232 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -144,9 +144,31 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + TensorFlow_Layer_Tests: + name: TensorFlow Layer Tests + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] + uses: ./.github/workflows/job_tensorflow_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + + Openvino_tokenizers: + name: OpenVINO tokenizers extension + needs: [ Build, Smart_CI, Docker ] + uses: ./.github/workflows/job_tokenizers.yml + with: + runner: 'aks-linux-4-cores-16gb' + shell: bash + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS + Overall_Status: name: ci/gha_overall_status_ubuntu_24 - needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests] + needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests, Pytorch_Layer_Tests, TensorFlow_Layer_Tests, Openvino_tokenizers] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 1a3e8753f15421..b9b8fa76d37c34 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -189,7 +189,7 @@ jobs: path: ${{ env.OPENVINO_JS_LIBS_DIR }} - name: Setup Node ${{ env.NODE_VERSION }} - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} @@ -235,6 +235,7 @@ jobs: runner: 'aks-win-4-cores-8gb' shell: pwsh affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS Python_Unit_Tests: diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index d2aa0410476245..0815297a11a5eb 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -304,6 +304,9 @@ macro(ov_add_frontend) # then we need to mark it to be CXX ABI free ov_abi_free_target(${TARGET_NAME}) + # public target name + set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) + # installation if(NOT OV_FRONTEND_SKIP_INSTALL) @@ -351,9 +354,6 @@ macro(ov_add_frontend) COMPONENT ${dev_component} ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL} FILES_MATCHING PATTERN "*.hpp") - - # public target name - set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) endif() else() # skipped frontend has to be installed in static libraries case diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 0ec054da853e2c..4ac0124d3089f0 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -30,7 +30,7 @@ macro(ov_common_libraries_cpack_set_dirs) ov_get_pyversion(pyversion) if(pyversion) - # should not be used in production; only by setup.py install + # should not be used in production; only by pip install set(OV_CPACK_PYTHONDIR lib/${pyversion}/site-packages) endif() @@ -94,7 +94,7 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - # we don't pack artifacts of setup.py install, because it's called explicitly in conda / brew + # we don't pack artifacts of pip install, because it's called explicitly in conda / brew # or not used at all like in cases with conan / vcpkg set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) # we don't need wheels in the distribution packages diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index 2b95fcfde5c145..a23d5290044e3d 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -95,12 +95,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 45d9b0c0ca2121..bb4e7942d7640b 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -86,12 +86,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index 0eb020a5584f1c..59b312963c180d 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -98,6 +98,7 @@ macro(ov_cpack_settings) 2024.2.0 2024.3.0 2024.4.0 + 2024.5.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index cf915ee852417e..a4a63c35858bf9 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -86,6 +86,7 @@ macro(ov_cpack_settings) 2024.2.0 2024.3.0 2024.4.0 + 2024.5.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake index 7dd4e1e7f96ded..b331d370bfe7bf 100644 --- a/cmake/toolchains/mt.runtime.win32.toolchain.cmake +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -27,6 +27,11 @@ if(use_static_runtime) foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") set(flag_var "CMAKE_${lang}_FLAGS${build_type}_INIT") string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + if (build_type STREQUAL "_DEBUG") + set(${flag_var} "/MTd") + else() + set(${flag_var} "/MT") + endif() endforeach() endforeach() endif() diff --git a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake index f5e9e68aabedc6..5bc16de8df91e8 100644 --- a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV0p7 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake index e00e30f975598f..0664b38a9ba68d 100644 --- a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV1p0 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-gnu.toolchain.cmake b/cmake/toolchains/riscv64-gnu.toolchain.cmake index 994b05f66b52f6..b58dcf169fc2da 100644 --- a/cmake/toolchains/riscv64-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-gnu.toolchain.cmake @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#llvm-clang-tool-chain +# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#gnu-toolchain set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER_TARGET riscv64-unknown-linux-gnu) @@ -26,9 +26,6 @@ set(CMAKE_OBJDUMP ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-objdump) set(CMAKE_READELF ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-readelf) set(PKG_CONFIG_EXECUTABLE "NOT-FOUND" CACHE PATH "Path to RISC-V pkg-config") -# Don't run the linker on compiler check -set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - set(CMAKE_SHARED_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_EXE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_MODULE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") diff --git a/cmake/toolchains/riscv64.linux.toolchain.cmake b/cmake/toolchains/riscv64.linux.toolchain.cmake new file mode 100644 index 00000000000000..cb088f5eca5052 --- /dev/null +++ b/cmake/toolchains/riscv64.linux.toolchain.cmake @@ -0,0 +1,13 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Install compiler on debian using: +# apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(CMAKE_C_COMPILER riscv64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER riscv64-linux-gnu-g++) +set(CMAKE_STRIP riscv64-linux-gnu-strip) diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index a49a9d8dba81d1..3f3d0064e8a4c6 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -15,23 +15,25 @@ deep learning models: | For their usage guides, see :doc:`Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>`. | For a detailed list of devices, see :doc:`System Requirements <../release-notes-openvino/system-requirements>`. + Beside running inference with a specific device, OpenVINO offers the option of running automated inference with the following inference modes: | :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>`: -| automatically selects the best device available for the given task. It offers many - additional options and optimizations, including inference on multiple devices at the - same time. +| automatically selects the best device available for the given task. It offers many + additional options and optimizations, including inference on multiple devices at the + same time. + | :doc:`Heterogeneous Inference <../../openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution>`: -| enables splitting inference among several devices automatically, for example, if one device - doesn't support certain operations. +| enables splitting inference among several devices automatically, for example, if one device + doesn't support certain operations. | :doc:`Automatic Batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>`: -| automatically groups inference requests to improve device utilization. +| automatically groups inference requests to improve device utilization. | :doc:`(LEGACY) Multi-device Inference <./../../documentation/legacy-features/multi-device>`: -| executes inference on multiple devices. Currently, this mode is considered a legacy - solution. Using Automatic Device Selection instead is advised. +| executes inference on multiple devices. Currently, this mode is considered a legacy + solution. Using Automatic Device Selection instead is advised. Feature Support and API Coverage @@ -74,13 +76,25 @@ Feature Support and API Coverage | HETERO | 61.22 % | 99.24 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ | || Percentage of API supported by the device, | -| || as of OpenVINO 2023.3, 08 Jan, 2024. | +| || as of OpenVINO 2024.4, 25 Oct, 2024. | +-------------------------+-----------+------------------+-------------------+ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). +.. dropdown:: Device support across OpenVINO 2024.4 distributions + + =============== ========== ====== =============== ======== ============ ========== ========== ========== + Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm + =============== ========== ====== =============== ======== ============ ========== ========== ========== + CPU V V V V V V V V + GPU V V V V V V V V + NPU V\* V\* V\ * n/a n/a n/a n/a V\* + =============== ========== ====== =============== ======== ============ ========== ========== ========== + + | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** + | **For Windows, CPU inference on ARM64 is not supported.** .. note:: @@ -89,6 +103,7 @@ topic (step 3 "Configure input and output"). in your solutions, revert to the 2023.3 (LTS) version. With the OpenVINO™ 2023.0 release, support has been cancelled for: + - Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X - Intel® Vision Accelerator Design with Intel® Movidius™ diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 39b27d12c970fd..b8256af650e2f8 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -3,9 +3,11 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. +The current data is as of OpenVINO 2024.4, 24 Oct. 2024 -The tables below list key performance indicators for a selection of Large Language Models, -running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. +The tables below list the key performance indicators for a selection of Large Language Models, +running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra +7-288V based system, on built-in GPUs. @@ -15,16 +17,13 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. -.. tab-set:: - - .. tab-item:: OpenVINO - - .. csv-table:: - :class: modeldata stripe - :name: supportedModelsTableOv - :header-rows: 1 - :file: ../../_static/benchmarks_files/llm_models.csv +.. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models.csv +| .. grid:: 1 1 2 2 :gutter: 4 @@ -34,18 +33,17 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. All models listed here were tested with the following parameters: * Framework: PyTorch - * Model precision: INT4 * Beam: 1 * Batch size: 1 .. grid-item:: - .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/OV-2024.4-platform_list.pdf + .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/llm_models_platform_list_.pdf :color: primary :outline: :expand: - :material-regular:`download;1.5em` Get full system info [PDF] + :material-regular:`download;1.5em` Get system descriptions [PDF] .. button-link:: ../../_static/benchmarks_files/llm_models.csv :color: primary diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index 50ac3a3350658e..a12cacf8402953 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -114,7 +114,7 @@ Operating systems and developer environment Build environment components: - * Python 3.8-3.12 + * Python 3.9-3.12 * `Intel® HD Graphics Driver `__ required for inference on GPU * GNU Compiler Collection and CMake are needed for building from source: @@ -128,11 +128,16 @@ Operating systems and developer environment .. tab-item:: Windows 10 and 11 + OpenVINO Runtime requires certain C++ libraries to operate. To execute ready-made apps, + the libraries distributed by `Visual Studio redistributable package `__ + are suggested. For development and compilation of OpenVINO-integrated apps, the build + environment components are required instead. + Build environment components: - * `Microsoft Visual Studio 2019 `__ + * `Microsoft Visual Studio 2019 or later `__ * `CMake `__ 3.16 or higher - * `Python `__ 3.8-3.12 + * `Python `__ 3.9-3.12 * `Intel® HD Graphics Driver `__ required for inference on GPU @@ -144,7 +149,7 @@ Operating systems and developer environment * `Xcode `__ 10.3 * `CMake `__ 3.13 or higher - * `Python `__ 3.8-3.12 + * `Python `__ 3.9-3.12 .. tab-item:: DL framework versions: @@ -162,7 +167,193 @@ Operating systems and developer environment OpenVINO Python binaries are built with and redistribute oneTBB libraries. +OpenVINO Distributions +###################### + +Different OpenVINO distributions may support slightly different sets of features. +Read installation guides for particular distributions for more details. +Refer to the :doc:`OpenVINO Release Policy <../../../about-openvino/release-notes-openvino/release-policy>` +to learn more about the release types. + + +.. tab-set:: + + .. tab-item:: Archive + :name: archive-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :name: archive-lnx-sysreq + + * `CMake 3.13 or higher, 64-bit `__ + * `Python 3.9 - 3.12, 64-bit `__ + * GCC: + + .. tab-set:: + + .. tab-item:: Ubuntu + :sync: ubuntu + + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + + .. tab-item:: RHEL 8 + :sync: rhel-8 + + * GCC 8.4.1 + + .. tab-item:: CentOS 7 + :sync: centos-7 + + * GCC 8.3.1 + + Use the following instructions to install it: + + Install GCC 8.3.1 via devtoolset-8 + + .. code-block:: sh + + sudo yum update -y && sudo yum install -y centos-release-scl epel-release + sudo yum install -y devtoolset-8 + + Enable devtoolset-8 and check current gcc version + + .. code-block:: sh + + source /opt/rh/devtoolset-8/enable + gcc -v + + .. tab-item:: macOS + :name: archive-win-sysreq + + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). + * `Python 3.9 - 3.12 `__ (choose 3.9 - 3.12). Install and add to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory + * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + + .. tab-item:: Windows + :name: archive-win-sysreq + + * `C++ libraries (included in Visual Studio redistributable) `__ (a core dependency for OpenVINO Runtime) + * `Microsoft Visual Studio 2019 or later `__ (for development and app compilation with OpenVINO) + * `CMake 3.14 or higher, 64-bit `__ (optional, only required for building sample applications) + * `Python 3.9 - 3.12, 64-bit `__ + + .. note:: + + To install Microsoft Visual Studio, follow the `Microsoft Visual Studio installation guide `__. + You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. + + .. note:: + + You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. + + .. important:: + + When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `__ to your `PATH` environment variable. + + .. tab-item:: APT + :sync: apt-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `CMake 3.13 or higher, 64-bit `__ + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + * `Python 3.9 - 3.12, 64-bit `__ + + .. tab-item:: Homebrew + :name: homebrew-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `Homebrew `_ + * `CMake 3.13 or higher, 64-bit `__ + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + * `Python 3.9 - 3.12, 64-bit `__ + + .. tab-item:: macOS + :sync: macos + + * `Homebrew `_ + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). + * `Python 3.9 - 3.12 `__ . Install and add it to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. + * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + + .. tab-item:: npm + :name: npm-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + All x86_64 / arm64 architectures are supported. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: macOS + :sync: macos + + All x86_64 / arm64 architectures are supported, however, only for CPU inference. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: Windows + :sync: Windows + + All x86_64 architectures are supported. Windows ARM is not supported. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: YUM + :name: yum-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + OpenVINO RPM packages are compatible with and can be run on the following operating systems: + + * RHEL 8.2 and higher + * Amazon Linux 2022 and 2023 + * Rocky Linux 8.7, 8.8 and 9.2-9.3 + * Alma Linux 8.7, 8.8 and 9.2-9.4 + * Oracle Linux 8.7, 8.8 and 9.2-9.4 + * Fedora 29 and higher up to 41 + * OpenEuler 20.03, 22.03, 23.03 and 24.03 + * Anolis OS 8.6 and 8.8 + * CentOS Stream 8 and 9 + + Software: + + * `CMake 3.13 or higher, 64-bit `_ + * GCC 8.4.1 + * `Python 3.9 - 3.12, 64-bit `_ + + .. tab-item:: ZYPPER + :name: zypper-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + OpenVINO RPM packages are compatible with and can be run on openSUSE Tumbleweed only. + + Software: + + * `CMake 3.13 or higher, 64-bit `_ + * GCC 8.2.0 + * `Python 3.9 - 3.12, 64-bit `_ The claims stated here may not apply to all use cases and setups. See -:doc:`Legal notices and terms of use <../additional-resources/terms-of-use>` for more information. \ No newline at end of file +:doc:`Legal notices and terms of use <../additional-resources/terms-of-use>` for more information. diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst index 55b29d30fa8502..1ce95152c20c90 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst @@ -66,7 +66,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -122,5 +121,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst index 04f151a3e26b84..56397ecd852305 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst @@ -107,7 +107,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -192,4 +192,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst index 6ae5882e2e4fd7..8a918bde06ebb1 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst @@ -107,7 +107,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -192,4 +192,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst index 6e6ddd61ba1e0e..06cfaa7f8f6a20 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst @@ -108,7 +108,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -193,4 +193,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst index 87396dfd542203..6bcea43cdeb103 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst @@ -113,7 +113,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -201,4 +201,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst index 05131a907d15b4..eb967a6fc94e11 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst @@ -115,7 +115,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-14>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst index b32d7b71580177..12368033a1fa20 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst @@ -120,7 +120,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-14>` @@ -198,7 +198,7 @@ Table of Contents * :doc:`Split <../operation-specs/movement/split-1>` * :doc:`Sqrt <../operation-specs/arithmetic/sqrt-1>` * :doc:`SquaredDifference <../operation-specs/arithmetic/squared-difference-1>` -* :doc:`Squeeze <../operation-specs/shape/squeeze-1>` +* :doc:`Squeeze <../operation-specs/shape/squeeze-15>` * :doc:`STFT <../operation-specs/signals/stft-15>` * :doc:`StridedSlice <../operation-specs/movement/strided-slice-1>` * :doc:`StringTensorPack <../operation-specs/type/string-tensor-pack-15>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst index 5ea1c3bde54018..41170ad2036a25 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst @@ -68,7 +68,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -128,5 +127,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst index fcce876e6944df..9b2992db18cc36 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst @@ -76,7 +76,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -144,4 +143,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst index 0c0ae150b1d21d..fdada28b6bdc06 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst @@ -86,7 +86,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -162,5 +162,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst index 8d626c71be2d89..d587370800aa9e 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst @@ -93,7 +93,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -168,4 +168,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst index 7dd5d8fbba14d7..6de9bccf5921b7 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst @@ -96,7 +96,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -172,4 +172,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst index 0bb5b336f9e2f2..f35299c9f4c97a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst @@ -101,7 +101,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -183,5 +183,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst index ebb5dfdbdc6555..2421e4b9bc53c1 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst @@ -105,7 +105,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -189,4 +189,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 6ecbf2695699f9..9f7badf1a8d06a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -215,7 +215,7 @@ Operation Specifications Sin-1 Sinh-1 Slice-8 - SliceScatter + SliceScatter-15 SoftMax-1 SoftMax-8 SoftPlus-4 @@ -226,6 +226,7 @@ Operation Specifications Sqrt-1 SquaredDifference-1 Squeeze-1 + Squeeze-15 STFT-15 StridedSlice-1 StringTensorPack-15 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst similarity index 98% rename from docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst rename to docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst index dcc21ca5321451..c00b4c819cc66a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst @@ -3,10 +3,10 @@ LSTMSequence .. meta:: - :description: Learn about LSTMSequence-1 - a sequence processing operation, which + :description: Learn about LSTMSequence-5 - a sequence processing operation, which can be performed on seven required input tensors. -**Versioned name**: *LSTMSequence-1* +**Versioned name**: *LSTMSequence-5* **Category**: *Sequence processing* @@ -145,4 +145,3 @@ A single cell in the sequence is implemented in the same way as in :doc:`LSTM Ce - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst new file mode 100644 index 00000000000000..1e112dce118e26 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst @@ -0,0 +1,174 @@ +Squeeze +======= + + +.. meta:: + :description: Learn about Squeeze-15 - a shape manipulation operation, which + can be performed on one required and one optional input tensor. + +**Versioned name**: *Squeeze-15* + +**Category**: *Shape manipulation* + +**Short description**: *Squeeze* removes dimensions equal to 1 from the first input tensor. + +**Detailed description**: *Squeeze* can be used with or without the second input tensor. + +* If only the first input is provided, every dimension that is equal to 1 will be removed from it. +* With the second input provided, each value is an index of a dimension from the first tensor that is to be removed. Specified dimension should be equal to 1, otherwise it will be ignored and copied as is. + Dimension indices can be specified directly, or by negative indices (counting dimensions from the end). + +.. note:: + + - If index of the dimension to squeeze is provided as a constant input and it points to a dynamic dimension that might be `1`, and the *allow_axis_skip* attribute is ``false``, then the dimension is considered as squeezable. Therefore the rank of the output shape will be reduced, but not dynamic. If dynamic rank is expected for such case, *allow_axis_skip* attribute need to be set to ``true``. + - If the input with indices is empty or not provided, dynamic dimension compatible with `1` leads to dynamic rank of the output shape. + + +**Attributes**: + +* *allow_axis_skip* + + * **Description**: If true, shape inference results in a dynamic rank if selected axis has value 1 in its dimension range. + * **Range of values**: ``false`` or ``true`` + * **Type**: ``boolean`` + * **Required**: *no* + * **Default value**: ``false`` + +**Inputs**: + +* **1**: Multidimensional input tensor of type *T*. **Required.** + +* **2**: Scalar or 1D tensor of type *T_INT* with indices of dimensions to squeeze. Values could be negative (have to be from range ``[-R, R-1]``, where ``R`` is the rank of the first input). **Optional.** + +**Outputs**: + +* **1**: Tensor with squeezed values of type *T*. + +**Types** + +* *T*: any numeric type. + +* *T_INT*: any supported integer type. + +**Example** + +*Example 1: squeeze 4D tensor to a 2D tensor* + +.. code-block:: xml + :force: + + + + + + 1 + 3 + 1 + 2 + + + + + 2 + + + + + 3 + 2 + + + + +*Example 2: squeeze 1D tensor with 1 element to a 0D tensor (constant)* + +.. code-block:: xml + :force: + + + + + + 1 + + + + + 1 + + + + + + + + +*Example 3: squeeze 1D tensor with 1 dynamic shape element to a fully dynamic shape* + +.. code-block:: xml + :force: + + + + + + -1 + + + + + 1 + + + + + + + +*Example 4: squeeze 2D tensor with dynamic and static shape elements to a static shape output, according to the opset1 rules* + +.. code-block:: xml + :force: + + + + + + 2 + -1 + + + + + 1 + + + + + 2 + + + + +*Example 5: squeeze 2D tensor with dynamic and static shape elements to a dynamic shape output, according to the opset15 rules* + +.. code-block:: xml + :force: + + + + + + 2 + -1 + + + + + 1 + + + + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst index 4a41df7214317c..bcc420f5db25c9 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst @@ -14,14 +14,14 @@ Short Time Fourier Transformation for real-valued input (STFT) **Short description**: *STFT* operation performs Short-Time Fourier Transform (real-to-complex). -**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued batched input tensor of shape ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. +**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued input tensor of shape ``[signal_size]`` or ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. **Attributes**: -* *transform_frames* +* *transpose_frames* - * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[2], otherwise it is at out_shape[1]. + * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[-2], otherwise it is at out_shape[-3]. * **Range of values**: * ``false`` - do not transpose output shape @@ -31,25 +31,25 @@ Short Time Fourier Transformation for real-valued input (STFT) **Inputs** -* **1**: ``signal`` - Tensor of type *T* and 2D shape [batch, signal_size] with signal data for the STFT. **Required.** -* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** -* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** -* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** +* **1**: ``signal`` - Tensor of type *T* and 1D shape [signal_size] or 2D shape [batch, signal_size] with signal data for the STFT. **Required.** +* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** +* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** +* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** **Outputs** -* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: +* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: - + When ``transform_frames == false`` the output shape is ``[batch, frames, fft_results, 2]`` - + When ``transform_frames == true`` the output shape is ``[batch, fft_results, frames, 2]`` + * When ``transpose_frames == false`` the output shape is ``[frames, fft_results, 2]`` for 1D signal input or ``[batch, frames, fft_results, 2]`` for 2D signal input. + * When ``transpose_frames == true`` the output shape is ``[fft_results, frames, 2]`` for 1D signal input or ``[batch, fft_results, frames, 2]`` for 2D signal input. - where: + where: - + ``batch`` is a batch size dimension - + ``frames`` is a number calculated as ``(signal_shape[1] - frame_size) / frame_step) + 1`` - + ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` - + ``2`` is the last dimension is for complex value real and imaginary part + * ``batch`` is a batch size dimension + * ``frames`` is a number calculated as ``(signal_shape[-1] - frame_size) / frame_step) + 1`` + * ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` + * ``2`` is the last dimension is for complex value real and imaginary part **Types** @@ -59,27 +59,109 @@ Short Time Fourier Transformation for real-valued input (STFT) * *T_INT*: ``int64`` or ``int32``. -**Example**: +**Examples**: + +*Example 1D signal, transpose_frames=false:* .. code-block:: xml :force: + + 56 + + + 7 + + + + + + 16 + 6 2 - 48 + + + + + +*Example 1D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 56 - 8 + 7 - - + + - + + 6 + 16 2 - 9 - 9 + + + + +*Example 2D signal, transpose_frames=false:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 16 + 6 + 2 + + + + + +*Example 2D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 6 + 16 2 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst index 81c592d3341a35..7a623a1e16739c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst @@ -17,29 +17,32 @@ SearchSorted **Attributes** -* *right* +* *right_mode* - * **Description**: If False, set the first suitable index. If True, return the last suitable index for given value. Default is False. - * **Range of values**: true or false - * **Type**: boolean + * **Description**: flag to control whether output would contain leftmost or rightmost indices for given values. + * **Range of values**: + + * *true* - return the rightmost (last) suitable index for given value. + * *false* - return the leftmost (first) suitable index for given value. + * **Type**: ``boolean`` * **Default value**: false * **Required**: *no* **Inputs**: -* **1**: ``sorted`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** +* **1**: ``sorted_sequence`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** * **2**: ``values`` - ND input tensor of type *T*, containing the search values. If sorted sequence is 1D, then the values can have any shape, otherwise the rank should be equal to the rank of sorted input. **Required.** **Outputs**: -* **1**: Tensor of type *TOut*, with the same shape as second input tensor, containing the indices. +* **1**: Tensor of type *T_IND*, with the same shape as second input tensor ``values``, containing the indices. **Types** * *T*: any supported floating-point and integer type. -* *TOut*: int64. +* *T_IND*: ``int64``. **Example** @@ -47,7 +50,7 @@ SearchSorted :force: - + 7 @@ -63,7 +66,7 @@ SearchSorted - + 7 256 200 diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 6dda915af93b69..7f26ab9ec72c9f 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -30,26 +30,6 @@ All currently supported versions are: * 2023.3 (LTS) * 2022.3 (LTS) -.. dropdown:: Distributions and Device Support - - Different OpenVINO distributions may support slightly different sets of features. - Read installation guides for particular distributions for more details. - Refer to the :doc:`OpenVINO Release Policy <../../../about-openvino/release-notes-openvino/release-policy>` - to learn more about the release types. - - .. dropdown:: Distribution Comparison for OpenVINO 2024.4 - - =============== ========== ====== =============== ======== ============ ========== ========== ========== - Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm - =============== ========== ====== =============== ======== ============ ========== ========== ========== - CPU V V V V V V V V - GPU V V V V V V V V - NPU V\* V\* V\ * n/a n/a n/a n/a V\* - =============== ========== ====== =============== ======== ============ ========== ========== ========== - - | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** - | **For Windows, CPU inference on ARM64 is not supported.** - .. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor A new OpenVINO GenAI Flavor streamlines application development by providing diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst index 511ecc627d45ad..21a35a14047dca 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst @@ -14,28 +14,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit for Linux Using APT Reposito * is dedicated to Linux users only * additionally includes code samples - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `CMake 3.13 or higher, 64-bit `__ - * GCC 7.5.0 (for Ubuntu 18.04), GCC 9.3.0 (for Ubuntu 20.04) or GCC 11.3.0 (for Ubuntu 22.04) - * `Python 3.9 - 3.12, 64-bit `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ####################################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index 5b40422115179c..e777c06253a37a 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -30,58 +30,8 @@ Install OpenVINO™ Runtime on Linux from an Archive File RHEL8 x86_64 V V n/a =================== ===== ===== ===== -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `__ - * `Python 3.9 - 3.12, 64-bit `__ - * GCC: - - .. tab-set:: - - .. tab-item:: Ubuntu 20.04 - :sync: ubuntu-20 - - * GCC 9.3.0 - - .. tab-item:: RHEL 8 - :sync: rhel-8 - - * GCC 8.4.1 - - .. tab-item:: CentOS 7 - :sync: centos-7 - - * GCC 8.3.1 - Use the following instructions to install it: - - Install GCC 8.3.1 via devtoolset-8 - - .. code-block:: sh - - sudo yum update -y && sudo yum install -y centos-release-scl epel-release - sudo yum install -y devtoolset-8 - - Enable devtoolset-8 and check current gcc version - - .. code-block:: sh - - source /opt/rh/devtoolset-8/enable - gcc -v + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst index 20f5df9b30a9d1..e9157a99e1c882 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst @@ -16,22 +16,8 @@ Install OpenVINO™ Runtime on macOS from an Archive File * is dedicated to macOS users (archives for other systems are also available) * is only supported for CPU Plugin - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). - * `Python 3.9 - 3.12 `__ (choose 3.9 - 3.12). Install and add to path. - * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory - * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index bcc03f37d74295..8f3efeeb720dc9 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -15,43 +15,8 @@ Install OpenVINO™ Runtime on Windows from an Archive File * additionally includes code samples * is dedicated to Windows users (archives for other systems are also available) - -System Requirements -#################### - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `Microsoft Visual Studio 2019 with MSBuild `__ or `Microsoft Visual Studio 2022 `__ - * `CMake 3.14 or higher, 64-bit `__ (optional, only required for building sample applications) - * `Python 3.9 - 3.12, 64-bit `__ - - .. note:: - - To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `__. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. - - .. note:: - - You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. - - .. important:: - - When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `__ to your `PATH` environment variable. - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst index f111c0f18e3fec..b1710f3bb358e8 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst @@ -14,43 +14,8 @@ Install OpenVINO™ Runtime via Homebrew * does not offer support for NPU inference * is dedicated to macOS (both arm64 and x86_64) and Linux (x86_64 only) users. - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - .. tab-set:: - - .. tab-item:: Linux - :sync: linux - - * `Homebrew `_ - * `CMake 3.13 or higher, 64-bit `__ - * GCC 7.5.0 (for Ubuntu 18.04), GCC 9.3.0 (for Ubuntu 20.04) or GCC 11.3.0 (for Ubuntu 22.04) - * `Python 3.9 - 3.12, 64-bit `__ - - .. tab-item:: macOS - :sync: macos - - * `Homebrew `_ - * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). - * `Python 3.9 - 3.12 `__ . Install and add it to path. - * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. - * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst index 4cf32c7fc474c2..06557003b3cbf6 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst @@ -15,26 +15,8 @@ Install OpenVINO™ Runtime from Conan Package Manager * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - | There are many ways to work with Conan Package Manager. Before you proceed, learn more about it on the - | `Conan distribution page `__ + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime with Conan Package Manager ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index e551971fd98f92..d1392d3f46a513 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -16,28 +16,8 @@ Install OpenVINO™ Runtime from Conda Forge * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - - .. tab-item:: Software - :sync: software - - | There are many ways to work with Conda. Before you proceed, learn more about it on the - | `Anaconda distribution page `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime with Anaconda Package Manager ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst index 38795bd529c082..de7b2ed3ff622e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst @@ -6,12 +6,7 @@ Install Intel® Distribution of OpenVINO™ Toolkit From a Docker Image manually to install OpenVINO™ Runtime on Linux and Windows operating systems. This guide presents information on how to use a pre-built Docker image or create a new image -manually, to install OpenVINO™ Runtime. The supported host operating systems for the Docker -base image are: - -- Linux -- Windows (WSL2) -- macOS (CPU exectuion only) +manually, to install OpenVINO™ Runtime. You can get started easily with pre-built and published docker images, which are available at: @@ -34,11 +29,25 @@ The Docker CI repository includes guides on how to `get started with docker images `__ and how to use `OpenVINO™ Toolkit containers with GPU accelerators. `__ -To start using Dockerfiles the following conditions must be met: +To start using Dockerfiles, install Docker Engine or a compatible container +engine on your system: + +.. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `Docker Desktop `__ + * `Docker Engine `__ + + .. tab-item:: Windows (WSL2) + :sync: win + + OpenVINO can be installed under :ref:`Windows Subsystem for Linux (WSL2) `. + + * `Docker Desktop `__ -- Linux OS or Windows (under :ref:`Windows Subsystem for Linux (WSL2) `) -- Installed docker engine or compatible container engine -- Permissions to run containers (sudo or docker group membership) +Also, verify you have permissions to run containers (sudo or docker group membership). .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst b/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst index 7d811c69e5991c..5060ccfc654229 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst @@ -15,19 +15,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit from npm Registry (all x86_64 / arm64 architectures) * macOS offers support only for CPU inference -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - - Windows, Linux, macOS - - x86, ARM (Windows ARM not supported) - - .. tab-item:: Software Requirements - :sync: software-requirements - - `Node.js version 21.0.0 and higher `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Node.js ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst index c079f167761ada..6326513fa3cea1 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst @@ -16,26 +16,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit from PyPI Repository (all x86_64 / arm64 architectures) * macOS offers support only for CPU inference - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - | `PyPI OpenVINO page `__ - - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU - inference, refer to: - | `Product Specifications `__ - - - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst index 747d4d873057f6..af9fe85528ca5d 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst @@ -15,26 +15,8 @@ Install OpenVINO™ Runtime via vcpkg * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `vcpkg `__ - - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst index 6775495fcd2a30..970bb47a095d5b 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst @@ -15,41 +15,8 @@ Install OpenVINO™ Runtime on Linux From YUM Repository * is dedicated to Linux users only * additionally includes code samples -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. note:: - - OpenVINO RPM packages are compatible with and can be run on the following operating systems: - - - RHEL 8.2 and higher - - Amazon Linux 2022 and 2023 - - Rocky Linux 8.7, 8.8 and 9.2-9.3 - - Alma Linux 8.7, 8.8 and 9.2-9.4 - - Oracle Linux 8.7, 8.8 and 9.2-9.4 - - Fedora 29 and higher up to 41 - - OpenEuler 20.03, 22.03, 23.03 and 24.03 - - Anolis OS 8.6 and 8.8 - - CentOS Stream 8 and 9 - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `_ - * GCC 8.2.0 - * `Python 3.9 - 3.12, 64-bit `_ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Install OpenVINO Runtime ######################## diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst index 8a1ceff7271187..127b26cac0590f 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst @@ -15,33 +15,8 @@ Install OpenVINO™ Runtime on Linux From ZYPPER Repository * is dedicated to Linux users only * additionally includes code samples -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. note:: - - OpenVINO RPM packages are compatible with and can be run on the following operating systems: - - - openSUSE Tumbleweed - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `_ - * GCC 8.2.0 - * `Python 3.9 - 3.12, 64-bit `_ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Install OpenVINO Runtime ######################## diff --git a/docs/dev/build_riscv64.md b/docs/dev/build_riscv64.md index 13ab9e98d56958..19d6e8714b45fc 100644 --- a/docs/dev/build_riscv64.md +++ b/docs/dev/build_riscv64.md @@ -16,15 +16,23 @@ The software was validated on the following devices: - Python 3.10 for OpenVINO Runtime Python API ## How to build +Currently, there are three ways to build OpenVINO Runtime for 64-bit RISC-V platforms: + +1. **Recommended**. The build with vectorized (using RVV instructions) primitives for limited scope of operations from [`SHL`](https://github.com/XUANTIE-RV/csi-nn2) using [`xuantie-gnu-toolchain`](https://github.com/XUANTIE-RV/). This GNU Compiler Toolchain supports RVV 0.7.1, ratified RVV 1.0 and Xuantie-specific instruction sets. The vector intrinsics don't use the common prefix `__riscv_`. This method provides the best performance available at the moment. +2. The build without optimized primitives using [`riscv-gnu-toolchain`](https://github.com/riscv-collab/riscv-gnu-toolchain.git). This GNU Compiler Toolchain supports RVV 0.7.1 and ratified RVV 1.0. The vector intrinsics use the common prefix `__riscv_`. However, as mentioned earlier, this build method doesn't yet provide optimized primitives implemented using the RVV intrinsics. +3. The build without optimized primitives using installed Linux packages. The compilers in these packages don't support RVV intrinsics. + +### Steps + 0. Prerequisite: -- For target with RVV - build `xuantie-gnu-toolchain` and `qemu`: +- For target with vectorized primitives from `SHL` - build `xuantie-gnu-toolchain` and `qemu`: ```sh git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git cd xuantie-gnu-toolchain ./configure --prefix= make linux build-qemu -j$(nproc) ``` -- For target without RVV - build `riscv-gnu-toolchain`: +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh git clone https://github.com/riscv-collab/riscv-gnu-toolchain.git cd riscv-gnu-toolchain @@ -32,6 +40,11 @@ The software was validated on the following devices: make linux build-qemu -j$(nproc) ``` > **NOTE**: The `build-qemu` target is optional, as it is used to build the `qemu` simulator. However, it is recommended to build the `qemu` simulator, since it is much more convenient to validate the software on your host than on your devices. More information can be seen [here](https://github.com/riscv-collab/riscv-gnu-toolchain). +- For target without optimized primitives using installed Linux packages: + ```sh + apt-get update + apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + ``` 1. Clone OpenVINO repository and init submodules: ```sh @@ -50,8 +63,8 @@ The software was validated on the following devices: mkdir build && cd build ``` -4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT`. -- For target with RVV: +4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT` (the last one is needed only for build using GNU toolchain). +- For target with vectorized primitives from `SHL`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -59,8 +72,8 @@ The software was validated on the following devices: -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/ \ -DRISCV_TOOLCHAIN_ROOT= ``` - > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For exmaple, you can replace `` with `riscv64-071-thead-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-thead-gnu.toolchain.cmake` for RVV 1.0 respectively. -- For target without RVV: + > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For example, you can replace `` with `riscv64-071-xuantie-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-xuantie-gnu.toolchain.cmake` for RVV 1.0 respectively. +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -69,7 +82,13 @@ The software was validated on the following devices: -DRISCV_TOOLCHAIN_ROOT=/opt/riscv ``` > **NOTE**: The `riscv-gnu-toolchain` is build as there are essential files used for cross compilation under `/opt/riscv/sysroot`. The latest stable versions of Clang or GCC both support compiling source code into RISC-V instructions, so it is acceptable to choose your preferable compilers by specifying `-DCMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`. But remember to add the key `-DCMAKE_SYSROOT=/opt/riscv/sysroot`, otherwise many fundamental headers and libs could not be found during cross compilation. - +- For target without optimized primitives using installed Linux packages: + ```sh + cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX= \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/riscv64.linux.toolchain.cmake + ``` > **NOTE**: By default OpenVINO is built with OpenMP support on RISC-V devices. Then run `make` to build the project: diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index 10049485202cca..4a9761f5364046 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -25,7 +25,7 @@ Supported configurations: ```sh git clone https://github.com/openvinotoolkit/openvino.git cd openvino - git submodule update --init + git submodule update --init --recursive ``` 2. Create build directory: diff --git a/docs/dev/ci/commit_signoff_policy.md b/docs/dev/ci/commit_signoff_policy.md new file mode 100644 index 00000000000000..0328d3c3ec308c --- /dev/null +++ b/docs/dev/ci/commit_signoff_policy.md @@ -0,0 +1,74 @@ +# How to sign-off commits + +We require a sign-off commit message in the following format on each commit in pull request. + +``` +This is a commit message. + +Signed-off-by: Author Name +``` + +## How to sign-off new commits + +In a local Git environment, the sign-off message can be added to a commit either manually (as a text) +or via the **-s** flag used with the “git commit” command, for example: + +`git commit -s -m "My commit message"` + +To avoid manually adding the flag for each commit, we recommend setting up a Git hook with the following steps: + +1. Navigate to the `/.git/hooks` folder. +2. Open the `prepare-commit-msg.sample` file and paste the following content: + +``` +#!/bin/sh + +COMMIT_MSG_FILE=$1 +COMMIT_SOURCE=$2 +SHA1=$3 + +NAME=$(git config user.name) +EMAIL=$(git config user.email) + +if [ -z "$NAME" ]; then + echo "empty git config user.name" + exit 1 +fi + +if [ -z "$EMAIL" ]; then + echo "empty git config user.email" + exit 1 +fi + +git interpret-trailers --if-exists doNothing --trailer \ + "Signed-off-by: $NAME <$EMAIL>" \ + --in-place "$1" +``` + +3. Save the file with the name `prepare-commit-msg` (remove the .sample extension). +4. Make the file executable (on Linux / Git Bash: `chmod +x /.git/hooks/prepare-commit-msg`). + +**Note**: For both sign-off approaches, ensure your user name and email address are configured in Git first: + +``` +git config user.name 'FIRST_NAME LAST_NAME' +git config user.email 'MY_EMAIL@example.com' +``` + +### Sign-off web-based commits + +To enable automatic sign-off of commits made via GitHub web interface, make sure that +[Require contributors to sign off on web-based commits](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/managing-the-commit-signoff-policy-for-your-repository#enabling-or-disabling-compulsory-commit-signoffs-for-your-repository) +setting is selected in the Settings menu of your OpenVINO repository fork. + +## How to sign-off older commits in the history + +If you forget to add the sign-off to your last commit, you can amend it and force-push to GitHub: + +``` +git commit --amend --signoff +``` + +To sign off on even older commits, use an interactive rebase, edit unsigned commits, and execute +`git commit --amend --signoff` for each. However, please note that if others have already started working based on +the commits in this branch, this will rewrite history and may cause issues for collaborators. diff --git a/docs/documentation_build_instructions.md b/docs/documentation_build_instructions.md index d9219454b86a19..a1412cfc3f358d 100644 --- a/docs/documentation_build_instructions.md +++ b/docs/documentation_build_instructions.md @@ -26,7 +26,7 @@ $ source env/bin/activate ``` 5. Install the sphinx theme ``` -(env) $ cd docs/openvino_sphinx_theme && python setup.py install && cd - +(env) $ python -m pip install docs/openvino_sphinx_theme `````` 6. Install the custom sphinx sitemap ``` diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 75c8111bafcc6b..bfad4b042e5359 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241022220806/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241104220807/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index e5e8e3813b5173..f39aa93b36851d 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -93,7 +93,7 @@ Lab instead.** .. code:: ipython3 - %pip install pythreejs "openvino-dev>=2024.0.0" "opencv-python" "torch" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install pythreejs "openvino>=2024.4.0" "opencv-python" "torch" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: @@ -101,85 +101,75 @@ Lab instead.** Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting pythreejs Using cached pythreejs-2.4.2-py3-none-any.whl.metadata (5.4 kB) - Collecting openvino-dev>=2024.0.0 - Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl.metadata (16 kB) + Collecting openvino>=2024.4.0 + Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) Collecting opencv-python Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB) Collecting torch Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) - Collecting onnx<1.16.2 - Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Collecting tqdm + Using cached tqdm-4.66.6-py3-none-any.whl.metadata (57 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) - Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) - Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) - Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Collecting openvino-telemetry>=2023.2.1 (from openvino>=2024.4.0) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) - Collecting openvino==2024.4.0 (from openvino-dev>=2024.0.0) - Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.1) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Collecting networkx (from torch) + Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) - Collecting protobuf>=3.20.2 (from onnx<1.16.2) - Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + INFO: pip is looking at multiple versions of networkx to determine which version is compatible with other requirements. This could take a while. + Collecting networkx (from torch) + Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) - Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl (4.7 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB) + Using cached tqdm-4.66.6-py3-none-any.whl (78 kB) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) - Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) - Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) + Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) - Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 + Installing collected packages: openvino-telemetry, mpmath, traittypes, tqdm, sympy, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, ipydatawidgets, pythreejs + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.1.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.66.6 traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -228,41 +218,31 @@ Download the model -We use ``omz_downloader``, which is a command line tool from the -``openvino-dev`` package. ``omz_downloader`` automatically creates a -directory structure and downloads the selected model. - .. code:: ipython3 - # directory where model will be downloaded - base_model_dir = "model" + from notebook_utils import download_file + import tarfile - # model name as named in Open Model Zoo - model_name = "human-pose-estimation-3d-0001" - # selected precision (FP32, FP16) - precision = "FP32" - BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}" - model_path = Path(BASE_MODEL_NAME).with_suffix(".pth") - onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx") + # directory where model will be downloaded + base_model_dir = Path("model") + + download_file( + "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", + directory=base_model_dir, + ) - ir_model_path = Path(f"model/public/{model_name}/{precision}/{model_name}.xml") - model_weights_path = Path(f"model/public/{model_name}/{precision}/{model_name}.bin") + ckpt_file = base_model_dir / "human-pose-estimation-3d-0001.pth" - if not model_path.exists(): - download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" - ! $download_command + if not ckpt_file.exists(): + with tarfile.open(base_model_dir / "human-pose-estimation-3d.tar.gz") as f: + f.extractall(base_model_dir) + .. parsed-literal:: - ################|| Downloading human-pose-estimation-3d-0001 ||################ - - ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - - ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - + model/human-pose-estimation-3d.tar.gz: 0%| | 0.00/17.6M [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:861: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:866: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:889: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:892: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:988: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1486: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -351,13 +352,13 @@ suitable. This function repeats part of ``AmusedPipeline``. .. parsed-literal:: - /tmp/ipykernel_2578393/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_498025/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -490,7 +491,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -696,7 +697,7 @@ model. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino @@ -707,7 +708,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -760,17 +761,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -794,7 +795,7 @@ Demo generation with quantized pipeline .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -878,7 +879,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -890,7 +891,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() @@ -908,7 +909,7 @@ a rough estimate of generation quality. .. parsed-literal:: Quantized pipeline Inception Score: 11.0730562210083 - Quantization speed-up: 2.09x + Quantization speed-up: 2.08x Interactive inference diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index e8f5b80d429d81..9f9130a4fe0db2 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -61,13 +61,6 @@ Imports %pip install -q "openvino>=2023.1.0" %pip install -q opencv-python "matplotlib>=3.4" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import cv2 @@ -119,14 +112,12 @@ the person in each frame of the video. ################|| Downloading person-detection-0202 ||################ - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml - + ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.xml from the cache - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.bin + ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.bin from the cache - Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -348,8 +339,8 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 63.79 fps - + average throuput in sync mode: 55.59 fps + Async Mode ~~~~~~~~~~ @@ -487,8 +478,8 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 106.83 fps - + average throuput in async mode: 75.17 fps + Compare the performance ~~~~~~~~~~~~~~~~~~~~~~~ @@ -630,5 +621,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 145.36 fps - + average throughput in async mode with async infer queue: 103.81 fps + diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index f3492582efc67f..3694e16797b727 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84a9b6aab4a04feb319b3243644da6837b3b894122657a8f6639fa604e3b48dd -size 29468 +oid sha256:fd71c3c8d066b8a16264c215b773c6200e981634ac24d04db99b0288d1ea1cca +size 30409 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 28d894eb72b22e..2ebcbe7d80deb2 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -197,16 +197,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [22:37:15.4888]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [22:37:15.4888]I[schedule.cpp:17][AUTO] scheduler starting - [22:37:15.4888]I[auto_schedule.cpp:181][AUTO] select device:CPU - [22:37:15.5995]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 110.638049 ms - [22:37:15.5996]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [22:37:15.5997]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [22:41:57.1267]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [22:41:57.1268]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [22:41:57.1268]I[schedule.cpp:17][AUTO] scheduler starting + [22:41:57.1269]I[auto_schedule.cpp:181][AUTO] select device:CPU + [22:41:57.2582]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 131.300219 ms + [22:41:57.2583]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [22:41:57.2584]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -219,8 +219,8 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - Deleted compiled_model[22:37:15.6060]I[schedule.cpp:308][AUTO] scheduler ending - + Deleted compiled_model + [22:41:57.2639]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -378,7 +378,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.15 seconds. + Time to load model using AUTO device and get first inference: 0.12 seconds. .. code:: ipython3 @@ -553,12 +553,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 184.25fps, latency: 31.12ms, time interval: 10.02s - throughput: 184.19fps, latency: 31.80ms, time interval: 10.00s - throughput: 183.00fps, latency: 32.00ms, time interval: 10.01s - throughput: 183.37fps, latency: 31.91ms, time interval: 10.01s - throughput: 178.30fps, latency: 32.90ms, time interval: 10.01s - throughput: 182.80fps, latency: 32.08ms, time interval: 10.01s + throughput: 179.70fps, latency: 32.12ms, time interval: 10.00s + throughput: 183.61fps, latency: 31.86ms, time interval: 10.01s + throughput: 183.96fps, latency: 31.88ms, time interval: 10.01s + throughput: 183.98fps, latency: 31.91ms, time interval: 10.00s + throughput: 183.26fps, latency: 31.98ms, time interval: 10.01s + throughput: 183.40fps, latency: 32.01ms, time interval: 10.00s Done @@ -604,12 +604,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 139.34fps, latency: 6.64ms, time interval: 10.00s - throughput: 141.45fps, latency: 6.63ms, time interval: 10.00s - throughput: 141.42fps, latency: 6.63ms, time interval: 10.01s - throughput: 141.70fps, latency: 6.62ms, time interval: 10.01s - throughput: 130.57fps, latency: 7.22ms, time interval: 10.00s - throughput: 141.61fps, latency: 6.62ms, time interval: 10.01s + throughput: 130.56fps, latency: 7.18ms, time interval: 10.00s + throughput: 142.51fps, latency: 6.61ms, time interval: 10.01s + throughput: 142.47fps, latency: 6.62ms, time interval: 10.00s + throughput: 142.46fps, latency: 6.61ms, time interval: 10.00s + throughput: 142.63fps, latency: 6.61ms, time interval: 10.00s + throughput: 142.73fps, latency: 6.60ms, time interval: 10.00s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index af14e6c0ac24c7..cc037738f18096 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05bc2541cede3c32f2f9acff3f93d19572aab80a5bb7ac3e6c77bb397817bb54 -size 25899 +oid sha256:1bedd8ff3e65a23fb4af380958a261d0916d2e0134b9426652a2779bdc06d6de +size 26887 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index e12bb8a48eaa0e..21be57ac89d68d 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1e2e56c28c18b5d31607518b846e78c58e76b2b59a9ee083d02e0d8f8ec2b52 -size 40077 +oid sha256:ed1ab24c30040707a36155169f4aaa91a5bff6cb48a2c5d10401ecbd87ca6f54 +size 40117 diff --git a/docs/notebooks/catvton-with-output.rst b/docs/notebooks/catvton-with-output.rst new file mode 100644 index 00000000000000..a7a9a04359f338 --- /dev/null +++ b/docs/notebooks/catvton-with-output.rst @@ -0,0 +1,360 @@ +Virtual Try-On with CatVTON and OpenVINO +======================================== + +Virtual try-on methods based on diffusion models achieve realistic +try-on effects but replicate the backbone network as a ReferenceNet or +leverage additional image encoders to process condition inputs, +resulting in high training and inference costs. `In this +work `__, authors rethink the necessity +of ReferenceNet and image encoders and innovate the interaction between +garment and person, proposing CatVTON, a simple and efficient virtual +try-on diffusion model. It facilitates the seamless transfer of in-shop +or worn garments of arbitrary categories to target persons by simply +concatenating them in spatial dimensions as inputs. The efficiency of +the model is demonstrated in three aspects: 1. Lightweight network. Only +the original diffusion modules are used, without additional network +modules. The text encoder and cross attentions for text injection in the +backbone are removed, further reducing the parameters by 167.02M. 2. +Parameter-efficient training. We identified the try-on relevant modules +through experiments and achieved high-quality try-on effects by training +only 49.57M parameters (∼5.51% of the backbone network’s parameters). 3. +Simplified inference. CatVTON eliminates all unnecessary conditions and +preprocessing steps, including pose estimation, human parsing, and text +input, requiring only garment reference, target person image, and mask +for the virtual try-on process. Extensive experiments demonstrate that +CatVTON achieves superior qualitative and quantitative results with +fewer prerequisites and trainable parameters than baseline methods. +Furthermore, CatVTON shows good generalization in in-the-wild scenarios +despite using open-source datasets with only 73K samples. + +Teaser image from `CatVTON +GitHub `__ |teaser| + +In this tutorial we consider how to convert and run this model using +OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert the model to OpenVINO + IR <#convert-the-model-to-openvino-ir>`__ +- `Compiling models <#compiling-models>`__ +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |teaser| image:: https://github.com/Zheng-Chong/CatVTON/blob/edited/resource/img/teaser.jpg?raw=true + +Prerequisites +------------- + + + +.. code:: ipython3 + + import platform + + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + %pip install -q "openvino>=2024.4" + %pip install -q "torch>=2.1" "diffusers>=0.29.1" torchvision opencv_python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q fvcore "pillow" "tqdm" "gradio>=4.36" "omegaconf==2.4.0.dev3" av pycocotools cloudpickle scipy accelerate "transformers>=4.27.3" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/aleksandr-mokrov/openvino_notebooks/refs/heads/catvton/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + + +.. parsed-literal:: + + 741 + + + +.. code:: ipython3 + + from cmd_helper import clone_repo + + + clone_repo("https://github.com/Zheng-Chong/CatVTON.git", "3b795364a4d2f3b5adb365f39cdea376d20bc53c") + + + + +.. parsed-literal:: + + PosixPath('CatVTON') + + + +Convert the model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +``ov_catvton_helper.py`` script contains helper function for models +downloading and models conversion, please check its content if you +interested in conversion details. + +To download checkpoints and load models, just call the helper function +``download_models``. It takes care about it. Functions +``convert_pipeline_models`` and ``convert_automasker_models`` will +convert models from pipeline and ``automasker`` in OpenVINO format. + +The original pipeline contains VAE encoder and decoder and UNET. +|CatVTON-overview| + +The ``automasker`` contains ``DensePose`` with +``detectron2.GeneralizedRCNN`` model and ``SCHP`` (``LIP`` and ``ATR`` +version). + +.. |CatVTON-overview| image:: https://github.com/user-attachments/assets/e35c8dab-1c54-47b1-a73b-2a62e6cdca7c + +.. code:: ipython3 + + from pathlib import Path + + from ov_catvton_helper import download_models, convert_pipeline_models, convert_automasker_models + + + MODEL_DIR = Path("models") + VAE_ENCODER_PATH = MODEL_DIR / "vae_encoder.xml" + VAE_DECODER_PATH = MODEL_DIR / "vae_decoder.xml" + UNET_PATH = MODEL_DIR / "unet.xml" + DENSEPOSE_PROCESSOR_PATH = MODEL_DIR / "densepose_processor.xml" + SCHP_PROCESSOR_ATR = MODEL_DIR / "schp_processor_atr.xml" + SCHP_PROCESSOR_LIP = MODEL_DIR / "schp_processor_lip.xml" + + + pipeline, mask_processor, automasker = download_models(MODEL_DIR) + convert_pipeline_models(pipeline, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + convert_automasker_models(automasker, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) + + +.. parsed-literal:: + + Note: switching to '3b795364a4d2f3b5adb365f39cdea376d20bc53c'. + + You are in 'detached HEAD' state. You can look around, make experimental + changes and commit them, and you can discard any commits you make in this + state without impacting any branches by switching back to a branch. + + If you want to create a new branch to retain commits you create, you may + do so (now or later) by using -c with the switch command. Example: + + git switch -c + + Or undo this operation with: + + git switch - + + Turn off this advice by setting config variable advice.detachedHead to false + + HEAD is now at 3b79536 Update default base model path + + + +.. parsed-literal:: + + Fetching 10 files: 0%| | 0/10 [00:00= 64: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if dim % default_overall_up_factor != 0: + + +Compiling models +---------------- + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import openvino as ov + + from notebook_utils import device_widget + + + core = ov.Core() + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +``get_compiled_pipeline`` and ``get_compiled_automasker`` functions +defined in ``ov_catvton_helper.py`` provides convenient way for getting +the pipeline and the ``automasker`` with compiled ov-models that are +compatible with the original interface. It accepts the original pipeline +and ``automasker``, inference device and directories with converted +models as arguments. Under the hood we create callable wrapper classes +for compiled models to allow interaction with original pipelines. Note +that all of wrapper classes return ``torch.Tensor``\ s instead of +``np.array``\ s. And then insert wrappers instances in the pipeline. + +.. code:: ipython3 + + from ov_catvton_helper import get_compiled_pipeline, get_compiled_automasker + + + pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) + +Interactive inference +--------------------- + + + +Please select below whether you would like to use the quantized models +to launch the interactive demo. + +.. code:: ipython3 + + from gradio_helper import make_demo + + + output_dir = "output" + demo = make_demo(pipeline, mask_processor, automasker, output_dir) + try: + demo.launch(debug=False) + except Exception: + demo.launch(debug=False, share=True) + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 3da831e6d9d0dd..fd572a83ffb834 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -729,7 +729,6 @@ up of the dynamic quantized models. Interactive demo ---------------- - Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index 8ec32adef0e04a..2baaf0043e7f04 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -163,7 +163,7 @@ NLP model from Hugging Face and export it in ONNX format: .. code:: ipython3 from transformers import AutoModelForSequenceClassification, AutoTokenizer - from transformers.onnx import export, FeaturesManager + import torch ONNX_NLP_MODEL_PATH = MODEL_DIRECTORY_PATH / "distilbert.onnx" @@ -172,37 +172,27 @@ NLP model from Hugging Face and export it in ONNX format: # initialize tokenizer tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") - # get model onnx config function for output feature format sequence-classification - model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(hf_model, feature="sequence-classification") - # fill onnx config based on pytorch model config - onnx_config = model_onnx_config(hf_model.config) - - # export to onnx format - export( - preprocessor=tokenizer, - model=hf_model, - config=onnx_config, - opset=onnx_config.default_onnx_opset, - output=ONNX_NLP_MODEL_PATH, - ) + if not ONNX_NLP_MODEL_PATH.exists(): + inputs = tokenizer("Hi, how are you?", return_tensors="pt") + input_names = list(inputs.keys()) + dynamic_axes = {input_name: {0: "batch_size", 1: "seq_length"} for input_name in input_names} + torch.onnx.export( + hf_model, args=dict(inputs), input_names=input_names, output_names=["logits"], dynamic_axes=dynamic_axes, f=ONNX_NLP_MODEL_PATH, opset_version=14 + ) + print(f"ONNX model exported to {ONNX_NLP_MODEL_PATH}") .. parsed-literal:: - 2024-10-22 22:40:21.522113: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-22 22:40:21.555890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:48:30.842642: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:48:30.876775: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-22 22:40:22.084160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask, torch.tensor(torch.finfo(scores.dtype).min) - - + 2024-11-04 22:48:31.539454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: - (['input_ids', 'attention_mask'], ['logits']) - + ONNX model exported to model/distilbert.onnx Fetch @@ -670,7 +660,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-10-22 22:40:40.138322: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-11-04 22:48:47.716205: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 1204ea2c17f106..6e1c039f7013c6 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.5351971983909607 + Predicted Probability: 0.4661690592765808 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index 6f003fb71d75fb..30778bafc8e884 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -154,10 +154,10 @@ Imports .. parsed-literal:: - 2024-10-22 22:41:03.560708: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-22 22:41:03.596278: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:49:10.827255: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:49:10.861330: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-22 22:41:04.191138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:49:11.454332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -223,7 +223,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_2583350/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_503635/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -444,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -526,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 249255 / 262144 (95.1%) - Greatest absolute difference: 3.8265769481658936 at index (0, 0, 126, 353) (up to 1e-05 allowed) - Greatest relative difference: 18364.59337143498 at index (0, 0, 305, 451) (up to 1e-05 allowed) + Mismatched elements: 245783 / 262144 (93.8%) + Greatest absolute difference: 3.1180567741394043 at index (0, 0, 474, 435) (up to 1e-05 allowed) + Greatest relative difference: 16087.83647354372 at index (0, 0, 37, 224) (up to 1e-05 allowed) _check_trace( @@ -663,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.83 ms + [ INFO ] Read model took 8.85 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -677,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 250.74 ms + [ INFO ] Compile model took 253.47 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -714,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 51.21 ms + [ INFO ] First inference took 56.51 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 392 iterations - [ INFO ] Duration: 15027.48 ms + [ INFO ] Count: 406 iterations + [ INFO ] Duration: 15019.48 ms [ INFO ] Latency: - [ INFO ] Median: 34.96 ms - [ INFO ] Average: 38.10 ms - [ INFO ] Min: 34.49 ms + [ INFO ] Median: 35.01 ms + [ INFO ] Average: 36.77 ms + [ INFO ] Min: 34.63 ms [ INFO ] Max: 48.05 ms - [ INFO ] Throughput: 26.09 FPS + [ INFO ] Throughput: 27.03 FPS .. code:: ipython3 @@ -750,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 11.01 ms + [ INFO ] Read model took 10.78 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -764,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 238.95 ms + [ INFO ] Compile model took 250.08 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -801,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 30.03 ms + [ INFO ] First inference took 29.09 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 952 iterations - [ INFO ] Duration: 15015.31 ms + [ INFO ] Count: 938 iterations + [ INFO ] Duration: 15008.12 ms [ INFO ] Latency: - [ INFO ] Median: 15.55 ms - [ INFO ] Average: 15.57 ms - [ INFO ] Min: 15.23 ms - [ INFO ] Max: 17.26 ms - [ INFO ] Throughput: 63.40 FPS + [ INFO ] Median: 15.77 ms + [ INFO ] Average: 15.80 ms + [ INFO ] Min: 15.47 ms + [ INFO ] Max: 17.13 ms + [ INFO ] Throughput: 62.50 FPS Visually Compare Inference Results @@ -905,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1729629747 + Visualizing results with seed 1730757034 @@ -988,8 +988,8 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: - Loaded model to AUTO in 0.18 seconds. - Total time for 68 frames: 2.32 seconds, fps:29.79 + Loaded model to AUTO in 0.15 seconds. + Total time for 68 frames: 2.36 seconds, fps:29.25 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index f7c163c1c77604..5aa37909b71cf7 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d329fc21292f69aeee8164a3f805f2e8e61369c42eab565ebafe555cf6d1a1c -size 381131 +oid sha256:894600de56af211d4cc3e64ee092b5a62d1b0158c51048d17accadddea0f046e +size 382725 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index b2a76fd1a0ded8..409d2495e2fea6 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -105,12 +105,12 @@ Prerequisites .. parsed-literal:: Cloning into 'DDColor'... - remote: Enumerating objects: 233, done. - remote: Counting objects: 100% (76/76), done. - remote: Compressing objects: 100% (42/42), done. - remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 641.00 KiB/s, done. - Resolving deltas: 100% (80/80), done. + remote: Enumerating objects: 241, done. + remote: Counting objects: 100% (84/84), done. + remote: Compressing objects: 100% (49/49), done. + remote: Total 241 (delta 57), reused 37 (delta 35), pack-reused 157 (from 1) + Receiving objects: 100% (241/241), 14.10 MiB | 21.95 MiB/s, done. + Resolving deltas: 100% (83/83), done. @@ -131,7 +131,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -412,10 +412,10 @@ Perform model quantization .. parsed-literal:: - 2024-10-22 22:45:07.339219: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-22 22:45:07.378241: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:52:53.152561: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:52:53.191342: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-22 22:45:07.784302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:52:53.595160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -523,7 +523,7 @@ Tool + @@ -307,13 +304,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -405,7 +402,7 @@ range. .. parsed-literal:: - + @@ -627,7 +624,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.44 seconds. Total FPS (including video processing): 4.46.Inference FPS: 10.42 + Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.68 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -654,7 +651,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -787,10 +784,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-22 22:53:01.689628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-22 22:53:01.723459: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:01:18.047102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:01:18.080343: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-22 22:53:02.312695: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:01:18.654050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -916,10 +913,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.93 seconds. Total FPS (including video processing): 4.64.Inference FPS: 12.78 + Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.12 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -999,9 +996,9 @@ Tool =4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) - Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.44.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting git+https://github.com/facebookresearch/detectron2.git - Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-we1e_5gi + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-9ds1xx43 .. parsed-literal:: - Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-we1e_5gi + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-9ds1xx43 .. parsed-literal:: @@ -125,88 +125,88 @@ Install required packages for running model Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' - Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) - Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) - Collecting cloudpickle (from detectron2==0.6) - Using cached cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB) - Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6) - Using cached fvcore-0.1.5.post20221221-py3-none-any.whl + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) + Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) Collecting hydra-core>=1.1 (from detectron2==0.6) Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) - Using cached iopath-0.1.9-py3-none-any.whl.metadata (370 bytes) - Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) - Requirement already satisfied: omegaconf<2.4,>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.3.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) - Collecting pycocotools>=2.0.2 (from detectron2==0.6) - Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB) - Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) - Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) - Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) - Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.5) - Collecting yacs>=0.1.8 (from detectron2==0.6) - Using cached yacs-0.1.8-py3-none-any.whl.metadata (639 bytes) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) - Collecting portalocker (from iopath<0.1.10,>=0.1.7->detectron2==0.6) - Using cached portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB) - Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) - Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) - Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) - Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) - Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) - Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) - Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) - Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) - Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) - Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) - Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.0) - Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) - Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) - Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) - Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) - Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) - Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) - Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) - Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.4) - Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) - Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) - Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) - Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) - Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) - Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) - Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) - Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Using cached https://download.pytorch.org/whl/iopath-0.1.9-py3-none-any.whl (27 kB) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Collecting omegaconf<2.4,>=2.1 (from detectron2==0.6) + Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) + Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.6) + Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.10.1) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.1) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) - Using cached iopath-0.1.9-py3-none-any.whl (27 kB) - Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (439 kB) - Using cached yacs-0.1.8-py3-none-any.whl (14 kB) - Using cached cloudpickle-3.1.0-py3-none-any.whl (22 kB) - Using cached portalocker-2.10.1-py3-none-any.whl (18 kB) + Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB) Building wheels for collected packages: detectron2 Building wheel for detectron2 (setup.py): started Building wheel for detectron2 (setup.py): finished with status 'done' - Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313552 sha256=23ceb6e5b734ecc530172b613be139d732deaa2e962d5a8bc940e6b23a85309d - Stored in directory: /tmp/pip-ephem-wheel-cache-65iaghs7/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313237 sha256=7cd84a15a89de76a7ab5b648f2fb7ebff63b7e43ffc90c7f19a568d16858de8a + Stored in directory: /tmp/pip-ephem-wheel-cache-uvptv5zg/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 Successfully built detectron2 - Installing collected packages: yacs, portalocker, cloudpickle, iopath, hydra-core, pycocotools, fvcore, detectron2 - Successfully installed cloudpickle-3.1.0 detectron2-0.6 fvcore-0.1.5.post20221221 hydra-core-1.3.2 iopath-0.1.9 portalocker-2.10.1 pycocotools-2.0.7 yacs-0.1.8 - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Installing collected packages: omegaconf, iopath, hydra-core, detectron2 + Attempting uninstall: omegaconf + Found existing installation: omegaconf 2.4.0.dev3 + Uninstalling omegaconf-2.4.0.dev3: + Successfully uninstalled omegaconf-2.4.0.dev3 + Attempting uninstall: iopath + Found existing installation: iopath 0.1.10 + Uninstalling iopath-0.1.10: + Successfully uninstalled iopath-0.1.10 + Successfully installed detectron2-0.6 hydra-core-1.3.2 iopath-0.1.9 omegaconf-2.3.0 + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index 6160562199f757..f5b1d98eea3213 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6475f9155bca7152327255fb36bac32dea6f10aa834b6a91d46d4ecc718be0f -size 58279 +oid sha256:0df4e94924f81aab66086702d85a461f463078f0d06f67b1fe5d46ad8480aa91 +size 58652 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 77ac4e5e0c1fd8..f676b44edd1d9a 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b98f4e2e56bb62e6d7c68e536d05695f16b412d4bb8a502f5ced3c466716fe91 -size 508620 +oid sha256:b5a857cd060d740290ccc65aec47252aad9f41c665dc2808195c3185248977e8 +size 509376 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index 98087b7c1f10f2..67719cdcbd66b0 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02cb8226997c872abaee3c70c1ab90c8e6ac078adb81ec6d0721ece6049e7af3 -size 54825 +oid sha256:ddc40900fddf1a115903c4e200899306060114348bf2ca82fbb4d7d92a885b09 +size 53897 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index e317a3a3005ef6..af63ef41697b47 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:940552cfe8d62878a7b2fb827529df2ef6ef4fa135f82f578362142650e0d751 -size 457741 +oid sha256:d1276209027e5aac72e4bb6f39f4494d2a807ee4bd85054a1285b0832e4515b9 +size 460797 diff --git a/docs/notebooks/distil-whisper-asr-with-output.rst b/docs/notebooks/distil-whisper-asr-with-output.rst index 2cdecfe17a19d4..53950226a210c7 100644 --- a/docs/notebooks/distil-whisper-asr-with-output.rst +++ b/docs/notebooks/distil-whisper-asr-with-output.rst @@ -133,7 +133,8 @@ using tokenizer. "distil-whisper/distil-small.en", ], "Whisper": [ - "openai/whisper-large-v3-turbo" "openai/whisper-large-v3", + "openai/whisper-large-v3-turbo", + "openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-large", "openai/whisper-medium", diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst index 5f069551357b6d..862079f68aeeb7 100644 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -47,31 +47,31 @@ Imports .. parsed-literal:: Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.1) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.46.1) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.6) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.2) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -110,6 +110,15 @@ model from Hugging Face. checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=checkpoint) + +.. parsed-literal:: + + 2024-11-04 23:18:47.102633: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:18:47.135966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-04 23:18:47.793551: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + Initializing the Tokenizer -------------------------- @@ -166,10 +175,15 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask, torch.tensor(torch.finfo(scores.dtype).min) + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. OpenVINO™ Runtime uses the `Infer diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 9f6857b608d962..3b276a1725f0aa 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -136,6 +136,8 @@ documentation `__. .. code:: ipython3 import os + from pathlib import Path + import requests os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" @@ -144,12 +146,17 @@ documentation `__. %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" "onnx<1.16.2" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) Convert model using Optimum-CLI tool ------------------------------------ @@ -227,7 +234,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 - from IPython.display import Markdown, display + from IPython.display import display import ipywidgets as widgets prepare_int4_model = widgets.Checkbox( @@ -272,6 +279,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from pathlib import Path + from cmd_helper import optimum_cli model_id = "databricks/dolly-v2-3b" model_path = Path("dolly-v2-3b") @@ -284,36 +292,19 @@ sacrifice of the model size and inference latency. def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return - fp16_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16".format(model_id) - export_command = export_command_base + " " + str(fp16_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, fp16_model_dir, additional_args={"weight-format": "fp16"}) def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return - int8_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8".format(model_id) - export_command = export_command_base + " " + str(int8_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, int8_model_dir, additional_args={"weight-format": "int8"}) def convert_to_int4(): if (int4_model_dir / "openvino_model.xml").exists(): return - int4_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4 --ratio 1.0 --group-size 128".format( - model_id - ) - export_command = export_command_base + " " + str(int4_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, int4_model_dir, additional_args={"weight-format": "int4"}) if prepare_fp16_model.value: diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 584c3442c94af2..992c346194e31c 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -189,10 +189,10 @@ Prerequisites remote: Counting objects: 100% (153/153), done. remote: Compressing objects: 100% (99/99), done. remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) - Receiving objects: 100% (335/335), 72.41 MiB | 22.40 MiB/s, done. + Receiving objects: 100% (335/335), 72.41 MiB | 20.85 MiB/s, done. Resolving deltas: 100% (123/123), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images @@ -282,7 +282,7 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( @@ -300,8 +300,12 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + 2024-11-04 23:23:38.980054: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:23:39.013901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-04 23:23:39.616188: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -384,6 +388,17 @@ Convert CLIP text encoder del cond_stage_model gc.collect(); + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + Convert CLIP image encoder ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -409,43 +424,43 @@ resolutions. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. std = torch.as_tensor(std, device=data.device, dtype=data.dtype) @@ -474,7 +489,7 @@ Convert AE encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! w_ = w_ * (int(c)**(-0.5)) @@ -518,15 +533,15 @@ Convert Diffusion U-Net model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels @@ -913,14 +928,14 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_2590985/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_511478/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-22 23:16:18 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 196.25 seconds + start: man fishing in a boat at sunset 2024-11-04 23:26:56 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 206.55 seconds .. code:: ipython3 @@ -1170,14 +1185,6 @@ quantization time. INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -.. parsed-literal:: - - 2024-10-22 23:45:51.693284: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-22 23:45:51.735392: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-22 23:45:52.354791: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -1357,8 +1364,8 @@ Let’s run the optimized pipeline .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-23 00:47:13 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.40 seconds + start: man fishing in a boat at sunset 2024-11-05 00:58:08 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 97.78 seconds .. code:: ipython3 @@ -1463,9 +1470,9 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 195.358 - INT8 latency: 97.265 - Performance speed up: 2.009 + FP32 latency: 201.526 + INT8 latency: 96.036 + Performance speed up: 2.098 Interactive inference diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index 8d725c4594afc6..b50b82341f4af8 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -108,9 +108,9 @@ Prerequisites remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 24.94 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 23.37 MiB/s, done. Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 @@ -377,23 +377,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -640,10 +640,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-10-23 01:04:24.934254: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:04:24.966235: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:15:40.935673: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:15:40.968460: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:04:25.612813: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:15:41.606156: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -810,7 +810,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 30.59 ms + [ INFO ] Read model took 30.24 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -830,7 +830,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1397.68 ms + [ INFO ] Compile model took 1388.43 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -871,17 +871,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 822.58 ms + [ INFO ] First inference took 798.46 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 51 iterations - [ INFO ] Duration: 16272.65 ms + [ INFO ] Count: 49 iterations + [ INFO ] Duration: 16827.30 ms [ INFO ] Latency: - [ INFO ] Median: 1835.79 ms - [ INFO ] Average: 1847.61 ms - [ INFO ] Min: 1271.60 ms - [ INFO ] Max: 2300.40 ms - [ INFO ] Throughput: 3.13 FPS + [ INFO ] Median: 2025.54 ms + [ INFO ] Average: 1991.09 ms + [ INFO ] Min: 816.09 ms + [ INFO ] Max: 2176.67 ms + [ INFO ] Throughput: 2.91 FPS .. code:: ipython3 @@ -907,7 +907,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 43.92 ms + [ INFO ] Read model took 43.95 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -927,7 +927,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1623.93 ms + [ INFO ] Compile model took 1607.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -968,17 +968,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 578.24 ms + [ INFO ] First inference took 596.94 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 55 iterations - [ INFO ] Duration: 15797.98 ms + [ INFO ] Duration: 15959.69 ms [ INFO ] Latency: - [ INFO ] Median: 1695.87 ms - [ INFO ] Average: 1683.49 ms - [ INFO ] Min: 550.40 ms - [ INFO ] Max: 1833.79 ms - [ INFO ] Throughput: 3.48 FPS + [ INFO ] Median: 1701.74 ms + [ INFO ] Average: 1692.86 ms + [ INFO ] Min: 653.76 ms + [ INFO ] Max: 1817.85 ms + [ INFO ] Throughput: 3.45 FPS Interactive segmentation demo @@ -1308,7 +1308,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index c6c2e7d72e694a..9f65fa9db4554a 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee7d364aeabe9a2787785e4d0ee5bb8951f530f90a0a625c9f31047bdc157b59 -size 1261144 +oid sha256:9368b1fbd458d1e022a768f24e689af0fd6e5dacc98a920f45d3fc0f63062567 +size 1259373 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index ff03bff6360a90..7c0716600906a1 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40a049edc89f5339af256b80e0fc99740d9e3b6d3159e6e9d09e78683d24e0fe -size 1261722 +oid sha256:22f0e5bfd74e7426218d2bd007f9219433556530ddb10f33b9706398eb7cd370 +size 1263404 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index 766db096877cba..0a717e2c9aa38d 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4fcf4eecf855fa1a9ca72c0dbf5c070e11a4a520873971202b592bc65dd2ccc -size 1261487 +oid sha256:d1863ccc9483f6cbd60768b311d104ee68692c3a7181e06da4bc751b52cf0ca1 +size 1262535 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 394c6e5b92e9bf..7f0e153ffa4a55 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index c29a58c33e3490..e0f20e0f79974b 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 56.5MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 67.7MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 662.7ms - Speed: 3.8ms preprocess, 662.7ms inference, 766.0ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 728.3ms + Speed: 3.1ms preprocess, 728.3ms inference, 768.2ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -216,8 +216,8 @@ tracing. The FastSAM model itself is based on YOLOv8 model. OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 558.8ms - Speed: 5.7ms preprocess, 558.8ms inference, 34.6ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 504.9ms + Speed: 5.8ms preprocess, 504.9ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 23 seconds - Resulting in 5.57 fps - That is 3.0 times faster! + Segmented in 22 seconds + Resulting in 5.82 fps + That is 3.14 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index 4cca2d85a2bd6c..e929a95fb182c1 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -100,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-10-23 01:17:38.434215: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:17:38.467940: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:28:54.034484: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:28:54.069316: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:17:39.118965: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:28:54.728430: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -193,49 +193,49 @@ pipeline. .. parsed-literal:: - SECURITY.md: 0%| | 0.00/2.66k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index a1484a3eb4b6d0..37d11a47fd30c9 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5808e660c685e4518f0a83232c2b9c9e88263e05ecd05891f769d263df4c642 -size 259656 +oid sha256:d85b3df68708172ed849a9e182bdec6a94f0174643833bd8cc7184ac0d090fae +size 259636 diff --git a/docs/notebooks/flux.1-image-generation-with-output.rst b/docs/notebooks/flux.1-image-generation-with-output.rst index 62549bd074d4a6..fe0c47899c3601 100644 --- a/docs/notebooks/flux.1-image-generation-with-output.rst +++ b/docs/notebooks/flux.1-image-generation-with-output.rst @@ -26,7 +26,11 @@ using OpenVINO. - `Prerequisites <#prerequisites>`__ - `Select model <#select-model>`__ - `Convert model with OpenVINO <#convert-model-with-openvino>`__ -- `Compress model weights <#compress-model-weights>`__ + + - `Convert model using Optimum + Intel <#convert-model-using-optimum-intel>`__ + - `Compress model weights <#compress-model-weights>`__ + - `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ @@ -47,8 +51,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "gradio>=4.19" "torch>=2.1" "transformers" "nncf>=2.12.0" "diffusers>=0.30.0" "opencv-python" "pillow" "peft>=0.7.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "gradio>=4.19" "torch>=2.1" "transformers" "nncf>=2.12.0" "diffusers>=0.31.0" "opencv-python" "pillow" "peft>=0.7.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "sentencepiece" "protobuf" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" %pip install -qU "openvino>=2024.4.0" .. code:: ipython3 @@ -56,9 +61,9 @@ Prerequisites import requests from pathlib import Path - if not Path("flux_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/flux.1-image-generation/flux_helper.py") - open("flux_helper.py", "w").write(r.text) + if not Path("cmd_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") + open("cmd_helper.py", "w").write(r.text) if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/flux.1-image-generation/gradio_helper.py") @@ -100,25 +105,19 @@ FLUX.1-dev version using widget bellow. .. code:: ipython3 - from flux_helper import get_model_selector + import ipywidgets as widgets - model_selector = get_model_selector() - model_selector - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + model_ids = ["black-forest-labs/FLUX.1-schnell", "black-forest-labs/FLUX.1-dev"] + + model_selector = widgets.Dropdown( + options=model_ids, + default=model_ids[0], + description="Model:", + ) - -.. parsed-literal:: - - 2024-08-13 17:30:13.543036: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-13 17:30:13.544738: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-08-13 17:30:13.579013: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-13 17:30:14.449873: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + model_selector + @@ -169,40 +168,44 @@ The pipeline consists of four important parts: - Transformer for step-by-step denoising latent image representation. - Autoencoder (VAE) for decoding latent space to image. -We will use ``convert_flux`` helper function defined in -`flux_helper.py `__ that create original PyTorch model -and convert each part of pipeline using ``ov.convert_model``. - -.. code:: ipython3 +Convert model using Optimum Intel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - from flux_helper import convert_flux - - # uncomment the line to see model conversion code - # ??convert_flux -.. code:: ipython3 - model_dir = convert_flux(model_selector.value) +For convenience, we will use OpenVINO integration with HuggingFace +Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. -.. parsed-literal:: +General command format: - ✅ black-forest-labs/FLUX.1-schnell model already converted and can be found in FLUX.1-schnell - +.. code:: bash -.. code:: ipython3 + optimum-cli export openvino --model --task - from flux_helper import TRANSFORMER_PATH, VAE_DECODER_PATH, TEXT_ENCODER_PATH, TEXT_ENCODER_2_PATH - - model_dict = { - "transformer": model_dir / TRANSFORMER_PATH, - "text_encoder": model_dir / TEXT_ENCODER_PATH, - "text_encoder_2": model_dir / TEXT_ENCODER_2_PATH, - "vae": model_dir / VAE_DECODER_PATH, - } +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. Compress model weights ----------------------- +~~~~~~~~~~~~~~~~~~~~~~ @@ -215,14 +218,16 @@ where the size of weights is relatively larger than the size of activations, for example, Large Language Models (LLM). Compared to INT8 compression, INT4 compression improves performance even more, but introduces a minor drop in prediction quality. We will use -`NNCF `__ for weight -compression. +`NNCF `__ integration to +``optimum-cli`` tool for weight compression. .. code:: ipython3 - from flux_helper import weight_compression_widget - - to_compress = weight_compression_widget() + to_compress = widgets.Checkbox( + value=True, + description="Weight compression", + disabled=False, + ) to_compress @@ -237,112 +242,98 @@ compression. .. code:: ipython3 - import nncf - import openvino as ov - import gc + from pathlib import Path - compression_args = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0} + model_id = model_selector.value - int4_model_dict = {} + model_base_dir = Path(model_id.split("/")[-1]) + additional_args = {} if to_compress.value: - core = ov.Core() - - for model_name, model_path in model_dict.items(): - int4_path = model_path.parent / (model_path.stem + "_int4.xml") - if not int4_path.exists(): - print(f"⌛ {model_path.stem} compression started") - print( - f"Compression parameters:\n\tmode = {compression_args['mode']}\n\tratio = {compression_args['ratio']}\n\tgroup_size = {compression_args['group_size']}" - ) - model = core.read_model(model_path) - compressed_model = nncf.compress_weights(model, **compression_args) - ov.save_model(compressed_model, int4_path) - print(f"✅ {model_path.stem} compression finished") - del compressed_model - del model - gc.collect() - print(f"Compressed {model_path.stem} can be found in {int4_path}") - int4_model_dict[model_name] = int4_path + model_dir = model_base_dir / "INT4" + additional_args.update({"weight-format": "int4", "group-size": "64", "ratio": "1.0"}) + else: + model_dir = model_base_dir / "FP16" + additional_args.update({"weight-format": "fp16"}) +.. code:: ipython3 -.. parsed-literal:: - - Compressed transformer can be found in FLUX.1-schnell/transformer/transformer_int4.xml - Compressed text_encoder can be found in FLUX.1-schnell/text_encoder/text_encoder_int4.xml - Compressed text_encoder_2 can be found in FLUX.1-schnell/text_encoder_2/text_encoder_2_int4.xml - Compressed vae_decoder can be found in FLUX.1-schnell/vae/vae_decoder_int4.xml + from cmd_helper import optimum_cli + if not model_dir.exists(): + optimum_cli(model_id, model_dir, additional_args=additional_args) Run OpenVINO model inference ---------------------------- -``OVFluxPipeline`` class defined in ``flux_helper.py`` provides -convenient way for running model. It accepts directory with converted -model and inference device as arguments. +``OVDiffusionPipeline`` from Optimum Intel provides ready-to-use +interface for running Diffusers models using OpenVINO. It supports +various models including Stable Diffusion, Stable Diffusion XL, LCM, +Stable Diffusion v3 and Flux. Similar to original Diffusers pipeline, +for initialization, we should use ``from_preptrained`` method providing +model id from HuggingFace hub or local directory (both original PyTorch +and OpenVINO models formats supported, in the first case model class +additionally will trigger model conversion). .. code:: ipython3 - from flux_helper import get_pipeline_selection_option + from notebook_utils import device_widget - use_compressed = get_pipeline_selection_option(int4_model_dict) - use_compressed + device = device_widget(default="CPU", exclude=["NPU"]) + device .. parsed-literal:: - Checkbox(value=True, description='Use compressed models') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from flux_helper import OVFluxPipeline, init_pipeline # noqa: F401 + import ipywidgets as widgets - # uncomment the line to see model pipeline - # ??OVFluxPipeline - -.. code:: ipython3 - - from notebook_utils import device_widget + model_available = (model_base_dir / "INT4").is_dir() + use_quantized_models = widgets.Checkbox( + value=model_available, + description="Use compressed models", + disabled=not model_available, + ) - device = device_widget(default="CPU", exclude=["NPU"]) - device + use_quantized_models .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Checkbox(value=True, description='Use compressed models') .. code:: ipython3 - ov_pipe = init_pipeline(model_dir, model_dict if not use_compressed.value else int4_model_dict, device.value) - - -.. parsed-literal:: - - Models compilation - ✅ transformer - Done! - ✅ text_encoder - Done! - ✅ text_encoder_2 - Done! + from optimum.intel.openvino import OVDiffusionPipeline - -.. parsed-literal:: - - You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + model_dir = model_base_dir / "INT4" if use_quantized_models.value else model_base_dir / "FP16" + ov_pipe = OVDiffusionPipeline.from_pretrained(model_dir, device=device.value) + .. parsed-literal:: - ✅ vae - Done! + 2024-10-28 18:12:30.714636: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-28 18:12:30.727116: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1730124750.741387 52454 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1730124750.745955 52454 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-10-28 18:12:30.761443: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers .. code:: ipython3 @@ -365,7 +356,7 @@ model and inference device as arguments. -.. image:: flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +.. image:: flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg new file mode 100644 index 00000000000000..6ad8e593ef5115 --- /dev/null +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5caeca2e3ae8e99146bb786321cbad04584523b693e3cbdba2a5f8c7f0dbb096 +size 13490 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png new file mode 100644 index 00000000000000..7cb1e2903a902f --- /dev/null +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de23e409e068f39c1e0e568b921174c80b0c42e29a1473eeeba54a91b88aaef +size 113356 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg deleted file mode 100644 index 7223074e225dd8..00000000000000 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8923d4a7ce04ed66bb58c28ab4450594a0340b97a15245fc594e669d33b574ef -size 14369 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png deleted file mode 100644 index 83b60f80d75c13..00000000000000 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fab1fa5bf3f71d6bb6f505658da6765d37295b7b9618560d5f7c0c344e2a3896 -size 116250 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index a3c914a44b36a7..fe2ac780f5cca6 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -104,7 +104,7 @@ Check if FreeVC is installed and append its path to ``sys.path`` remote: Counting objects: 100% (74/74), done. remote: Compressing objects: 100% (47/47), done. remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 3.81 MiB/s, done. + Receiving objects: 100% (131/131), 15.28 MiB | 17.50 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -134,8 +134,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:26<00:00, 48.4MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:32<00:00, 38.5MB/s] .. code:: ipython3 @@ -239,7 +239,7 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -288,7 +288,7 @@ Inference .. parsed-literal:: - 2it [00:03, 1.72s/it] + 2it [00:04, 2.03s/it] Result audio files should be available in ‘outputs/freevc’ @@ -360,13 +360,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +581,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25909 / 25920 (100.0%) - Greatest absolute difference: 0.713585376739502 at index (0, 0, 4783) (up to 1e-05 allowed) - Greatest relative difference: 22806.258007743752 at index (0, 0, 19024) (up to 1e-05 allowed) + Mismatched elements: 25915 / 25920 (100.0%) + Greatest absolute difference: 1.3485908806324005 at index (0, 0, 24258) (up to 1e-05 allowed) + Greatest relative difference: 8204.075456053068 at index (0, 0, 5777) (up to 1e-05 allowed) _check_trace( @@ -645,7 +645,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:01, 1.30it/s] + 2it [00:01, 1.31it/s] Result audio files should be available in ‘outputs/freevc’ and you can @@ -707,7 +707,7 @@ Result audio: diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst index 2b35846b766bd1..732cc297aa9531 100644 --- a/docs/notebooks/gpu-device-with-output.rst +++ b/docs/notebooks/gpu-device-with-output.rst @@ -21,13 +21,7 @@ Working with GPUs in OpenVINO™ - `Compiling a Model on GPU <#compiling-a-model-on-gpu>`__ - - `Download and Convert a Model <#download-and-convert-a-model>`__ - - - `Download and unpack the - Model <#download-and-unpack-the-model>`__ - - `Convert the Model to OpenVINO IR - format <#convert-the-model-to-openvino-ir-format>`__ - + - `Download a Model <#download-a-model>`__ - `Compile with Default Configuration <#compile-with-default-configuration>`__ - `Reduce Compile Time through Model @@ -119,10 +113,7 @@ Install required packages .. code:: ipython3 - %pip install -q "openvino-dev>=2024.0.0" "opencv-python" "tqdm" - %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 - %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 - %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version > '3.8'" + %pip install -q "openvino>=2024.4.0" "opencv-python" "tqdm" "huggingface_hub" Checking GPUs with Query Device ------------------------------- @@ -305,8 +296,8 @@ properties. We can easily use one for compiling and running models with OpenVINO `GPU plugin `__. -Download and Convert a Model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Download a Model +~~~~~~~~~~~~~~~~ @@ -317,19 +308,9 @@ was trained on `Common Objects in Context categories of object. For details, see the `paper `__. -Download and unpack the Model -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Use the ``download_file`` function from the ``notebook_utils`` to -download an archive with the model. It automatically creates a directory -structure and downloads the selected model. This step is skipped if the -package is already downloaded. - .. code:: ipython3 - import tarfile + import huggingface_hub as hf_hub from pathlib import Path # Fetch `notebook_utils` module @@ -340,95 +321,18 @@ package is already downloaded. ) open("notebook_utils.py", "w").write(r.text) - from notebook_utils import download_file # A directory where the model will be downloaded. base_model_dir = Path("./model").expanduser() - model_name = "ssdlite_mobilenet_v2" - archive_name = Path(f"{model_name}_coco_2018_05_09.tar.gz") - - # Download the archive - downloaded_model_path = base_model_dir / archive_name - if not downloaded_model_path.exists(): - model_url = f"http://download.tensorflow.org/models/object_detection/{archive_name}" - download_file(model_url, downloaded_model_path.name, downloaded_model_path.parent) - - # Unpack the model - tf_model_path = base_model_dir / archive_name.with_suffix("").stem / "frozen_inference_graph.pb" - if not tf_model_path.exists(): - with tarfile.open(downloaded_model_path) as file: - file.extractall(base_model_dir) - - - -.. parsed-literal:: - - model/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz: 0%| | 0.00/48.7M [00:00`__. - -.. code:: ipython3 - - from openvino.tools.mo.front import tf as ov_tf_front - - precision = "FP16" + model_name = "ssdlite_mobilenet_v2_fp16" - # The output path for the conversion. - model_path = base_model_dir / "ir_model" / f"{model_name}_{precision.lower()}.xml" + ov_model_path = base_model_dir / model_name / f"{model_name}.xml" - trans_config_path = Path(ov_tf_front.__file__).parent / "ssd_v2_support.json" - pipeline_config = base_model_dir / archive_name.with_suffix("").stem / "pipeline.config" + if not (ov_model_path).exists(): + hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir) - model = None - if not model_path.exists(): - model = ov.tools.mo.convert_model( - input_model=tf_model_path, - input_shape=[1, 300, 300, 3], - layout="NHWC", - transformations_config=trans_config_path, - tensorflow_object_detection_api_pipeline_config=pipeline_config, - reverse_input_channels=True, - ) - ov.save_model(model, model_path, compress_to_fp16=(precision == "FP16")) - print("IR model saved to {}".format(model_path)) - else: - print("Read IR model from {}".format(model_path)) - model = core.read_model(model_path) - - -.. parsed-literal:: - - [ WARNING ] The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if applicable) are kept. - - -.. parsed-literal:: - - IR model saved to model/ir_model/ssdlite_mobilenet_v2_fp16.xml - + model = core.read_model(ov_model_path) Compile with Default Configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -484,7 +388,7 @@ following: core.set_property({props.cache_dir(): cache_folder}) # Compile the model as before - model = core.read_model(model=model_path) + model = core.read_model(ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache enabled (first time) - compile time: {time.time() - start}s") @@ -502,13 +406,13 @@ compile times with caching enabled and disabled as follows: start = time.time() core = ov.Core() core.set_property({props.cache_dir(): "cache"}) - model = core.read_model(model=model_path) + model = core.read_model(model=ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache enabled - compile time: {time.time() - start}s") start = time.time() core = ov.Core() - model = core.read_model(model=model_path) + model = core.read_model(ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache disabled - compile time: {time.time() - start}s") diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index 51522e791e04a4..232629422b14e0 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -124,16 +124,16 @@ segmentation you can select vanilla ``SAM``. Cloning into 'GroundingDINO'... remote: Enumerating objects: 379, done. remote: Counting objects: 100% (190/190), done. - remote: Compressing objects: 100% (81/81), done. - remote: Total 379 (delta 135), reused 109 (delta 109), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 18.28 MiB/s, done. + remote: Compressing objects: 100% (79/79), done. + remote: Total 379 (delta 136), reused 111 (delta 111), pack-reused 189 (from 1) + Receiving objects: 100% (379/379), 14.03 MiB | 20.95 MiB/s, done. Resolving deltas: 100% (194/194), done. Cloning into 'EfficientSAM'... remote: Enumerating objects: 424, done. remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 30.07 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 24.44 MiB/s, done. Resolving deltas: 100% (246/246), done. @@ -222,6 +222,10 @@ GroundingDINO imports .. parsed-literal:: + 2024-11-05 01:34:53.765709: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:34:53.988314: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 01:34:54.760718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers UserWarning: Failed to load custom C++ ops. Running on CPU mode Only! @@ -362,6 +366,30 @@ Convert GroundingDINO to OpenVINO IR format TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + + +.. parsed-literal:: + + output layer_id 0 is nan + num_nan 230400, num_inf 0 + output layer_id 1 is nan + num_nan 230400, num_inf 0 + output layer_id 2 is nan + num_nan 230400, num_inf 0 + output layer_id 3 is nan + num_nan 230400, num_inf 0 + output layer_id 4 is nan + num_nan 230400, num_inf 0 + output layer_id 5 is nan + num_nan 230400, num_inf 0 + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. Run OpenVINO optimized GroundingDINO @@ -504,15 +532,6 @@ class, but the inference will be done using OpenVINO optimized model. boxes_filt, pred_phrases, logits_filt = get_ov_grounding_output(ov_compiled_grounded_dino, pil_image, classes_prompt, BOX_THRESHOLD, TEXT_THRESHOLD) - -.. parsed-literal:: - - 2024-10-23 01:22:04.926963: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:22:04.966234: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:22:05.582957: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - Convert predicted boxes to supervision box detections format .. code:: ipython3 @@ -573,11 +592,6 @@ segmentation. First of all let’s convert ``SAM`` model to OpenVINO IR. ov_efficient_sam = core.read_model(ov_efficient_sam_path) -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - .. parsed-literal:: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index e490cf48533277..6ddc0e3b0aa78b 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -188,7 +188,7 @@ is provided. .. parsed-literal:: - + @@ -215,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index e17d63a031b445..a92f8cd18fba31 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -132,6 +132,10 @@ tutorials `__. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - 2024-10-23 01:23:29.312809: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:23:29.347879: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:23:29.943155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Initialize and Convert the Model Automatically using OVModel class @@ -377,11 +384,6 @@ inference run. - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - Convert model using Optimum CLI interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -434,7 +436,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-10-23 01:23:44.450300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:12.161579: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4}] @@ -465,20 +467,20 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['audio-xvector', 'text-to-audio', 'fill-mask', - 'feature-extraction', 'text-generation', 'zero-shot- - image-classification', 'text-to-image', 'text2text- - generation', 'zero-shot-object-detection', 'automatic- - speech-recognition', 'text-classification', 'semantic- - segmentation', 'masked-im', 'image-to-text', - 'sentence-similarity', 'object-detection', - 'inpainting', 'audio-frame-classification', 'image-to- - image', 'token-classification', 'question-answering', - 'audio-classification', 'image-segmentation', 'image- - classification', 'mask-generation', 'multiple-choice', - 'depth-estimation']. For decoder models, use `xxx- - with-past` to export the model using past key values - in the decoder. + ['mask-generation', 'image-classification', 'fill- + mask', 'audio-xvector', 'audio-frame-classification', + 'sentence-similarity', 'multiple-choice', 'automatic- + speech-recognition', 'text-to-image', 'token- + classification', 'image-to-text', 'image- + segmentation', 'question-answering', 'depth- + estimation', 'semantic-segmentation', 'feature- + extraction', 'text-generation', 'zero-shot-object- + detection', 'text-to-audio', 'zero-shot-image- + classification', 'object-detection', 'text2text- + generation', 'audio-classification', 'image-to-image', + 'masked-im', 'inpainting', 'text-classification']. For + decoder models, use `xxx-with-past` to export the + model using past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment. @@ -583,10 +585,11 @@ compression: .. parsed-literal:: - 2024-10-23 01:23:49.987001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:17.680673: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Tokenizer won't be converted. diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index b5b33fdfc852e7..7bf7172f720588 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -102,9 +102,9 @@ Model preparation stage has the following steps: Cloning into 'pytorch-cifar-models'... remote: Enumerating objects: 282, done. remote: Counting objects: 100% (281/281), done. - remote: Compressing objects: 100% (96/96), done. - remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 (from 1) - Receiving objects: 100% (282/282), 9.22 MiB | 24.58 MiB/s, done. + remote: Compressing objects: 100% (94/94), done. + remote: Total 282 (delta 135), reused 275 (delta 130), pack-reused 1 (from 1) + Receiving objects: 100% (282/282), 9.22 MiB | 9.58 MiB/s, done. Resolving deltas: 100% (135/135), done. @@ -176,7 +176,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:07<00:00, 23490143.19it/s] + 100%|██████████| 170498071/170498071 [00:07<00:00, 22536051.32it/s] .. parsed-literal:: @@ -248,10 +248,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-10-23 01:24:29.479834: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:24:29.511386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:37:57.500572: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:37:57.532367: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:24:30.071901: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:58.074631: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -419,7 +419,7 @@ Tool `__ - `Interactive demo <#interactive-demo>`__ + Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/notebooks/internvl2-with-output.rst b/docs/notebooks/internvl2-with-output.rst index 602bfe84590494..ed67209a0303eb 100644 --- a/docs/notebooks/internvl2-with-output.rst +++ b/docs/notebooks/internvl2-with-output.rst @@ -279,7 +279,7 @@ documentation self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -531,7 +531,7 @@ Let’s check model capabilities in answering questions about image: .. parsed-literal:: - The image depicts a red panda, a large feline with a distinctive, reddish-brown coat and white face and chest. It is peeking over what appears to be a wooden platform or platform made for panda viewing in captivity. The background is filled with greenery, indicating that the photo was likely taken in a conservatory or wildlife park where penguins or seabirds are displayed. + The image shows a red panda lying on its side, partially wrapped in a wooden structure, possibly a container or log. The red panda appears to be looking at the camera with large, expressive eyes, displaying an endearing and lively appearance. The background consists of a portion of the red panda's habitat environment, which appears to be a tree and some greenery. Interactive demo ---------------- diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 466e3be5d03fd1..1cdb2e1d286245 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -106,11 +106,11 @@ weights, using ``from_pretrained`` method. .. parsed-literal:: - 2024-10-23 01:28:42.177313: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:28:42.211816: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:41:58.578137: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:41:58.612620: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:28:42.875342: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + 2024-11-05 01:41:59.276782: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -301,7 +301,7 @@ loading on device using ``core.complie_model``. .. parsed-literal:: WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-bert-flash-implementation/b78d1595de294f13ffe7b19d6cd63892a6e4e7a4/mha.py:333: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! softmax_scale = self.softmax_scale or 1.0 / math.sqrt(q.shape[-1]) @@ -820,8 +820,8 @@ approximately estimate the speed up of the dynamic quantized models. .. parsed-literal:: - Performance speed up for text model: 1.560 - Performance speed up for vision model: 1.435 + Performance speed up for text model: 1.978 + Performance speed up for vision model: 1.428 Gradio demo diff --git a/docs/notebooks/knowledge-graphs-conve-with-output.rst b/docs/notebooks/knowledge-graphs-conve-with-output.rst index b9e4f56ebb05af..de9115fd9ab4a8 100644 --- a/docs/notebooks/knowledge-graphs-conve-with-output.rst +++ b/docs/notebooks/knowledge-graphs-conve-with-output.rst @@ -233,7 +233,7 @@ Download Model Checkpoint .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/knowledge-graphs-conve/models/conve.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/knowledge-graphs-conve/models/conve.pt') @@ -395,7 +395,7 @@ typical to use metrics such as Mean Reciprocal Rank, Hits@10 etc. .. parsed-literal:: - Average time taken for inference: 0.6894171237945557 ms + Average time taken for inference: 0.7430613040924072 ms Mean accuracy of the model on the test dataset: 0.875 @@ -534,7 +534,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Average time taken for inference: 0.8764564990997314 ms + Average time taken for inference: 1.0752081871032715 ms Mean accuracy of the model on the test dataset: 0.10416666666666667 @@ -553,7 +553,7 @@ Determine the platform specific speedup obtained through OpenVINO graph optimiza .. parsed-literal:: - Speedup with OpenVINO optimizations: 0.79 X + Speedup with OpenVINO optimizations: 0.69 X Benchmark the converted OpenVINO model using benchmark app @@ -598,7 +598,7 @@ inference can also be obtained by looking at the benchmark app results. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 4.34 ms + [ INFO ] Read model took 4.36 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] e1 (node: e1) : i64 / [...] / [] @@ -614,7 +614,7 @@ inference can also be obtained by looking at the benchmark app results. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: aten::softmax/Softmax) : f32 / [...] / [1,271] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 72.42 ms + [ INFO ] Compile model took 54.92 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -653,17 +653,17 @@ inference can also be obtained by looking at the benchmark app results. [ INFO ] Fill input 'rel' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 10000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 1.79 ms + [ INFO ] First inference took 1.66 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 94740 iterations - [ INFO ] Duration: 10002.15 ms + [ INFO ] Count: 94596 iterations + [ INFO ] Duration: 10001.69 ms [ INFO ] Latency: - [ INFO ] Median: 1.08 ms - [ INFO ] Average: 1.08 ms - [ INFO ] Min: 0.74 ms - [ INFO ] Max: 9.80 ms - [ INFO ] Throughput: 9471.96 FPS + [ INFO ] Median: 1.07 ms + [ INFO ] Average: 1.09 ms + [ INFO ] Min: 0.79 ms + [ INFO ] Max: 8.57 ms + [ INFO ] Throughput: 9458.00 FPS Conclusions diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst index c18464f77585b2..b6a7a971fef3be 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst @@ -85,7 +85,7 @@ Install requirements .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.3.1) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -152,10 +152,10 @@ example `__ .. parsed-literal:: - 2024-10-23 01:31:37.373532: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:31:37.408388: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:44:54.753766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:44:54.788691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:31:37.931462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:44:55.309895: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.696 seconds - Optimized pipeline: 1.137 seconds - Performance speed-up: 2.372 + FP32 pipeline: 2.746 seconds + Optimized pipeline: 1.140 seconds + Performance speed-up: 2.409 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 9673054cc1b8e0..2310cb001b0c6b 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c45f05bcfa118a3d7e9024cf83963a4bb5504cb97f669c0c8261c469bab6dc75 -size 121985 +oid sha256:9ca596f09c0f6c0dafa4aca0fbe7974941301cfcbc6bcb3a8c4255774c347d0b +size 123320 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index a5be9e2155f6cc..91289c35d7c60c 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fc77a26e4c6d30ca9d21fe47ddc25624662d566f6c98ac1c160637fbcb2363b -size 1151151 +oid sha256:56d06f7d654939feda627f67196b813de9b38a718acba9f5daed59a43314829f +size 1150807 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 0da4a480d88cc3..d98f56141b1252 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cfd18faba3aef57349c70b157fabd4dc2827a8bc6074252e717a67a9f9d488a -size 1148752 +oid sha256:0d7f8506e5f1bd369debee273b45c601d05901af4937d8cc976f985cd4a81fed +size 1149292 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index e992bdf66a9da7..b53344f52b7396 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f70d76f01ee01d54713ed161de8d09713816b4f3fd7df99a5ff96c4c5e0b1bf0 -size 124539 +oid sha256:edd5a47baf47ae90532b47bc5ee05e8503b7d1deda59d956a354688ed949c8b5 +size 121605 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index ec9e6a6277d4f0..2edc9a038ff8c3 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4dc2d892ef69df259eff4b46527d5e4382457b9b381a1e3b35b22d1d4312da4 -size 1150974 +oid sha256:aa184084b598dac717e99fe9677f1fe9dd4f6b85ec123c075d4109c75b134841 +size 1150675 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index 64f49c2b314811..21ecfe511f1b76 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,10 @@ Imports .. parsed-literal:: - 2024-10-23 01:38:12.900514: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:38:12.934654: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:51:49.197259: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:51:49.231710: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:38:13.484434: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:51:49.783615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,7 +211,7 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -247,7 +247,7 @@ tokenizer from HuggingFace. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -505,9 +505,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.67 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.46 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.19 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.68 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.24 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.47 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,7 +548,7 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.86 ms + [ INFO ] Read model took 19.11 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] @@ -559,7 +559,7 @@ in OpenVINO. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.46 ms + [ INFO ] Reshape model took 5.55 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] @@ -568,7 +568,7 @@ in OpenVINO. [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 373.39 ms + [ INFO ] Compile model took 344.20 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +609,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 24.05 ms + [ INFO ] First inference took 22.90 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5730 iterations - [ INFO ] Duration: 120000.60 ms + [ INFO ] Count: 6485 iterations + [ INFO ] Duration: 120011.48 ms [ INFO ] Latency: - [ INFO ] Median: 20.64 ms - [ INFO ] Average: 20.84 ms - [ INFO ] Min: 19.84 ms - [ INFO ] Max: 31.70 ms - [ INFO ] Throughput: 47.75 FPS + [ INFO ] Median: 18.09 ms + [ INFO ] Average: 18.41 ms + [ INFO ] Min: 17.32 ms + [ INFO ] Max: 26.49 ms + [ INFO ] Throughput: 54.04 FPS .. code:: ipython3 @@ -646,27 +646,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.60 ms + [ INFO ] Read model took 24.93 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.46 ms + [ INFO ] Reshape model took 7.14 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1067.96 ms + [ INFO ] Compile model took 1080.21 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +707,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 17.45 ms + [ INFO ] First inference took 16.00 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 13316 iterations - [ INFO ] Duration: 120006.19 ms + [ INFO ] Count: 13181 iterations + [ INFO ] Duration: 120003.10 ms [ INFO ] Latency: - [ INFO ] Median: 8.92 ms - [ INFO ] Average: 8.92 ms - [ INFO ] Min: 7.66 ms - [ INFO ] Max: 14.16 ms - [ INFO ] Throughput: 110.96 FPS + [ INFO ] Median: 8.93 ms + [ INFO ] Average: 9.01 ms + [ INFO ] Min: 7.68 ms + [ INFO ] Max: 12.00 ms + [ INFO ] Throughput: 109.84 FPS diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst index 3707d3f30d5f09..a0bce9d85c7196 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -9,7 +9,7 @@ and time required to generate an image for provided prompt. The notebook can be also used on other Intel hardware with minimal or no modifications. -|image0| +.. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 Optimum Intel is an interface from Hugging Face between both diffusers and transformers libraries and various tools provided by Intel to @@ -48,8 +48,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 - Prerequisites ~~~~~~~~~~~~~ @@ -67,9 +65,7 @@ Install required packages .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + .. code:: ipython3 @@ -112,8 +108,12 @@ this .. parsed-literal:: - CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz - + CPU: Intel(R) Core(TM) Ultra 7 155H + GNA.GNA_SW: GNA_SW + GNA.GNA_HW: GNA_HW + GPU: Intel(R) Arc(TM) Graphics (iGPU) + NPU: Intel(R) AI Boost + Using full precision model in CPU with ``LatentConsistencyModelPipeline`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,14 +132,6 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models pipeline = LatentConsistencyModelPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") -.. parsed-literal:: - - 2024-10-23 01:44:02.155955: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:44:02.191160: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:44:02.863862: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -172,6 +164,15 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models del pipeline gc.collect(); + + + +.. parsed-literal:: + + 345 + + + Select inference device for text-to-image generation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -190,15 +191,6 @@ Select inference device for text-to-image generation device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Running inference using Optimum Intel ``OVLatentConsistencyModelPipeline`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -228,74 +220,6 @@ and there is no need to do it manually ov_pipeline.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1) - -.. parsed-literal:: - - Keyword arguments {'subfolder': '', 'trust_remote_code': False} are not expected by LatentConsistencyModelPipeline and will be ignored. - - - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00 by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . - - - - -.. parsed-literal:: - - OVLatentConsistencyModelPipeline { - "_class_name": "OVLatentConsistencyModelPipeline", - "_diffusers_version": "0.31.0", - "_name_or_path": "/tmp/tmpz3tx8cvr", - "feature_extractor": [ - "transformers", - "CLIPImageProcessor" - ], - "image_encoder": [ - null, - null - ], - "requires_safety_checker": true, - "safety_checker": [ - null, - null - ], - "scheduler": [ - "diffusers", - "LCMScheduler" - ], - "text_encoder": [ - "optimum.intel.openvino.modeling_diffusion", - "OVModelTextEncoder" - ], - "tokenizer": [ - "transformers", - "CLIPTokenizer" - ], - "unet": [ - "optimum.intel.openvino.modeling_diffusion", - "OVModelUnet" - ], - "vae": [ - "optimum.intel.openvino.modeling_diffusion", - "OVModelVae" - ] - } - - - .. code:: ipython3 ov_pipeline.to(device.value) diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg index a19eb369148e10..25ee04df1c536d 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ccbcf6c99b8d75b1c683375852111ce17983fbea50bd0b0bb294d06bbb317fb -size 38768 +oid sha256:7167bdd42f946c71dcff8f7c7f35e48df5434ed2c54b56505da9c61d25a0d740 +size 79839 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png index d27692124393df..d89126405ceff7 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b936a832e23c30affb1988828f17a16e988b42413ff7dfe4381fb723e068ed97 -size 456597 +oid sha256:7f216ac91b5737006142f8426f0716c9ab8e3ce5ddea222987c69e8a4f11ed3e +size 1034680 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg index e320b91964889a..f0f1cd042ef9b5 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ab792ded8330c36072ec13fcfb0e0896ce3014413145167fd7a7c5b570919a4 -size 37604 +oid sha256:900339d0d0c9d4b8e25a5b72a159ca2c966f7e51dc112c30a7ac8636a01b1dd4 +size 81729 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png index e7a5d16aa88eb2..e083c8da654356 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f440a9e7cb3d6a9f6609165516be7e5025e26752004ca56baf570f49840af03c -size 470802 +oid sha256:89e0bc6c3f710c16cb1f81011848f41ad350e43ef9dc77961353897aa1d5b6b6 +size 1042433 diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst index 435e7eaf62c53b..d035645fb27291 100644 --- a/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst @@ -117,10 +117,25 @@ Install required dependencies .. code:: ipython3 + from pathlib import Path + import requests + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + + + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) Convert and Optimize Model -------------------------- @@ -172,13 +187,13 @@ documentation =2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) Convert and Optimize Model -------------------------- @@ -172,13 +186,13 @@ documentation `__, -`blog `__, -`GitHub `__, and -`Documentation `__. +``Qwen2.5`` as LLM in agent pipeline. + +* **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - + Qwen2.5 is the latest series of Qwen large language models. Comparing + with Qwen2, Qwen2.5 series brings significant improvements in coding, + mathematics and general knowledge skills. Additionally, it brings + long-context and multiple languages support including Chinese, English, + French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, + Vietnamese, Thai, Arabic, and more. For more details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and + `Documentation `__. .. code:: ipython3 import ipywidgets as widgets - + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] - + llm_model_id = widgets.Dropdown( options=llm_model_ids, value=llm_model_ids[0], description="Model:", disabled=False, ) - + llm_model_id @@ -146,9 +147,9 @@ Vietnamese, Thai, Arabic, and more. For more details, please refer to .. code:: ipython3 from pathlib import Path - + llm_model_path = llm_model_id.value.split("/")[-1] - + if not Path(llm_model_path).exists(): !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} @@ -160,9 +161,9 @@ Select inference device for LLM .. code:: ipython3 from notebook_utils import device_widget - + llm_device = device_widget("CPU", exclude=["NPU"]) - + llm_device @@ -226,15 +227,15 @@ guide `__ import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - + import json import json5 import torch - + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + llm = OVModelForCausalLM.from_pretrained( llm_model_path, device=llm_device.value, @@ -242,7 +243,7 @@ guide `__ config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), trust_remote_code=True, ) - + llm.generation_config.top_k = 1 llm.generation_config.max_length = 2000 @@ -260,31 +261,31 @@ received from tool calling.. class StopSequenceCriteria(StoppingCriteria): """ This class can be used to stop generation whenever a sequence of tokens is encountered. - + Args: stop_sequences (`str` or `List[str]`): The sequence (or list of sequences) on which to stop execution. tokenizer: The tokenizer used to decode the model outputs. """ - + def __init__(self, stop_sequences, tokenizer): if isinstance(stop_sequences, str): stop_sequences = [stop_sequences] self.stop_sequences = stop_sequences self.tokenizer = tokenizer - + def __call__(self, input_ids, scores, **kwargs) -> bool: decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) - - + + def text_completion(prompt: str, stop_words) -> str: im_end = "<|im_end|>" if im_end not in stop_words: stop_words = stop_words + [im_end] streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) - + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) input_ids = torch.tensor([tokenizer.encode(prompt)]) generate_kwargs = dict( @@ -297,7 +298,7 @@ received from tool calling.. output = tokenizer.decode(output, errors="ignore") assert output.startswith(prompt) output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") - + for stop_str in stop_words: idx = output.find(stop_str) if idx != -1: @@ -339,13 +340,13 @@ parameter should be a sequence of messages that contains the .. code:: ipython3 TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" - + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: - + {tools_text} - + Use the following format: - + Question: the input question you must answer Thought: you should always think about what to do Action: the action to take, should be one of [{tools_name_text}] @@ -354,9 +355,9 @@ parameter should be a sequence of messages that contains the ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) Thought: I now know the final answer Final Answer: the final answer to the original input question - + Begin! - + Question: {query}""" Meanwhile we have to create function for consolidate the tools @@ -381,9 +382,9 @@ information and conversation history into the prompt template. raise NotImplementedError tools_text.append(tool) tools_text = "\n\n".join(tools_text) - + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) - + messages = [{"role": "system", "content": "You are a helpful assistant."}] for i, (query, response) in enumerate(chat_history): if list_of_tool_info: @@ -397,9 +398,9 @@ information and conversation history into the prompt template. messages.append({"role": "user", "content": query}) if response: messages.append({"role": "assistant", "content": response}) - + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") - + return prompt Create parser @@ -493,7 +494,7 @@ execute them according to the output of LLM. return str(ret) elif tool_name == "image_gen": import urllib.parse - + tool_args = tool_args.replace("(", "").replace(")", "") prompt = json5.loads(tool_args)["prompt"] prompt = urllib.parse.quote(prompt) @@ -503,11 +504,11 @@ execute them according to the output of LLM. ) else: raise NotImplementedError - - + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] - + planning_prompt = build_input_text(chat_history, list_of_tool_info) text = "" while True: @@ -522,7 +523,7 @@ execute them according to the output of LLM. else: text += output break - + new_history = [] new_history.extend(history) new_history.append({"user": prompt, "bot": text}) @@ -537,7 +538,7 @@ Run agent history = [] query = "get the weather in London, and create a picture of Big Ben based on the weather information" - + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index 7f539abf025ebb..c1317625880917 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -637,7 +637,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index f6a22b6f160760..98f1217902a587 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -61,9 +61,15 @@ Prerequisites .. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -107,7 +113,16 @@ details.
+.. raw:: html + + + Click here for more detailed explanation of conversion steps + +.. raw:: html + + + MiniCPM-V2.6 is autoregressive transformer generative model, it means that each next model step depends from model output from previous step. The generation approach is based on the assumption that the probability @@ -205,11 +220,12 @@ Let’s convert each model part. .. parsed-literal:: - 2024-10-23 01:47:25.606377: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:47:25.640217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-07 09:57:53.402018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-07 09:57:53.403877: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-10-07 09:57:53.440490: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:47:26.161344: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - + 2024-10-07 09:57:54.270302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + .. code:: ipython3 @@ -222,157 +238,7 @@ Let’s convert each model part. ⌛ openbmb/MiniCPM-V-2_6 conversion started. Be patient, it may takes some time. ⌛ Load Original model - - - -.. parsed-literal:: - - Fetching 24 files: 0%| | 0/24 [00:00 self.max_seq_len_cached: - + .. parsed-literal:: - ✅ Language model successfully converted - ⌛ Convert Image embedding model ✅ Image embedding model successfully converted - ⌛ Convert Resamler model - - -.. parsed-literal:: - - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:421: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert embed_dim == embed_dim_to_check, \ - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:428: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert head_dim * num_heads == embed_dim, f"embed_dim {embed_dim} not divisible by num_heads {num_heads}" - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:434: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert key.shape == value.shape, f"key shape {key.shape} does not match value shape {value.shape}" - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:520: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert key_padding_mask.shape == (bsz, src_len), \ - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:539: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - q_scaled = q / math.sqrt(E) - - -.. parsed-literal:: - - ✅ Resampler model successfully converted ✅ openbmb/MiniCPM-V-2_6 model sucessfully converted. You can find results in MiniCPM-V-2_6 - + Compress Language Model Weights to 4 bits ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -459,12 +288,21 @@ applied using `NNCF `__.
-Click here for more details about weight compression Weight compression -aims to reduce the memory footprint of a model. It can also lead to -significant performance improvement for large memory-bound models, such -as Large Language Models (LLMs). LLMs and other models, which require -extensive memory to store the weights during inference, can benefit from -weight compression in the following ways: +.. raw:: html + + + +Click here for more details about weight compression + +.. raw:: html + + + +Weight compression aims to reduce the memory footprint of a model. It +can also lead to significant performance improvement for large +memory-bound models, such as Large Language Models (LLMs). LLMs and +other models, which require extensive memory to store the weights during +inference, can benefit from weight compression in the following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -547,33 +385,14 @@ documentation `__ - `Select model <#select-model>`__ @@ -62,9 +63,9 @@ Prerequisites .. code:: ipython3 from pathlib import Path - + repo_dir = Path("./ml-mobileclip") - + if not repo_dir.exists(): !git clone https://github.com/apple/ml-mobileclip.git @@ -76,15 +77,15 @@ Prerequisites remote: Counting objects: 100% (95/95), done. remote: Compressing objects: 100% (66/66), done. remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) - Unpacking objects: 100% (95/95), 469.11 KiB | 3.91 MiB/s, done. + Unpacking objects: 100% (95/95), 469.11 KiB | 3.13 MiB/s, done. .. code:: ipython3 %pip install -q "./ml-mobileclip" --no-deps - + %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu - + %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" @@ -141,9 +142,9 @@ comparison purposes, you can select different models among: .. code:: ipython3 import ipywidgets as widgets - + model_dir = Path("checkpoints") - + supported_models = { "MobileCLIP": { "mobileclip_s0": { @@ -184,17 +185,17 @@ comparison purposes, you can select different models among: "image_size": 224, }, "clip-vit-b-16": { - "image_name": "ViT-B-16", + "model_name": "ViT-B-16", "pretrained": "openai", "image_size": 224, }, "clip-vit-l-14": { - "image_name": "ViT-L-14", + "model_name": "ViT-L-14", "pretrained": "datacomp_xl_s13b_b90k", "image_size": 224, }, "clip-vit-h-14": { - "image_name": "ViT-H-14", + "model_name": "ViT-H-14", "pretrained": "laion2b_s32b_b79k", "image_size": 224, }, @@ -212,8 +213,8 @@ comparison purposes, you can select different models among: }, }, } - - + + model_type = widgets.Dropdown(options=supported_models.keys(), default="MobileCLIP", description="Model type:") model_type @@ -229,13 +230,13 @@ comparison purposes, you can select different models among: .. code:: ipython3 available_models = supported_models[model_type.value] - + model_checkpoint = widgets.Dropdown( options=available_models.keys(), default=list(available_models), description="Model:", ) - + model_checkpoint @@ -250,15 +251,15 @@ comparison purposes, you can select different models among: .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file, device_widget - + model_config = available_models[model_checkpoint.value] Run model inference @@ -293,8 +294,8 @@ Prepare image gallery import matplotlib.pyplot as plt import numpy as np from PIL import Image - - + + def visualize_result(images: List, query: str = "", selected: List[int] = None): """ Utility function for visualization classification results @@ -322,8 +323,8 @@ Prepare image gallery mask = np.ones_like(np.array(images[idx])) a.imshow(mask, "jet", interpolation="none", alpha=0.75) return fig - - + + images_urls = [ "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/282ce53e-912d-41aa-ab48-2a001c022d74", "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/9bb40168-82b5-4b11-ada6-d8df104c736c", @@ -333,17 +334,17 @@ Prepare image gallery image_names = ["red_panda.png", "cat.png", "raccoon.png", "dog.png"] sample_path = Path("data") sample_path.mkdir(parents=True, exist_ok=True) - + images = [] for image_name, image_url in zip(image_names, images_urls): image_path = sample_path / image_name if not image_path.exists(): download_file(image_url, filename=image_name, directory=sample_path) images.append(Image.open(image_path).convert("RGB").resize((640, 420))) - + input_labels = ["cat"] text_descriptions = [f"This is a photo of a {label}" for label in input_labels] - + visualize_result(images, "image gallery"); @@ -390,7 +391,7 @@ preprocessing utilities from PIL import Image import mobileclip import open_clip - + # instantiate model model_name = model_config["model_name"] pretrained = model_config["pretrained"] @@ -407,7 +408,7 @@ preprocessing utilities .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -426,8 +427,8 @@ Perform search image_tensor = torch.stack([preprocess(image) for image in images]) text = tokenizer(text_descriptions) - - + + with torch.no_grad(): # calculate image embeddings image_encoding_start = time.perf_counter() @@ -439,22 +440,22 @@ Perform search text_features = model.encode_text(text) text_encoding_end = time.perf_counter() print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") - + # normalize embeddings image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + # calcualte similarity score image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.136 ms - Text encoding took 0.0123 ms + Image encoding took 0.114 ms + Text encoding took 0.0113 ms @@ -481,8 +482,8 @@ be used separately. Let’s convert each part to OpenVINO. import types import torch.nn.functional as F - - + + def se_block_forward(self, inputs): """Apply forward pass.""" b, c, h, w = inputs.size() @@ -498,12 +499,12 @@ be used separately. Let’s convert each part to OpenVINO. import openvino as ov import gc - + ov_models_dir = Path("ov_models") ov_models_dir.mkdir(exist_ok=True) - + image_encoder_path = ov_models_dir / f"{model_checkpoint.value}_im_encoder.xml" - + if not image_encoder_path.exists(): if "mobileclip_s" in model_name: model.image_encoder.model.conv_exp.se.forward = types.MethodType(se_block_forward, model.image_encoder.model.conv_exp.se) @@ -516,23 +517,23 @@ be used separately. Let’s convert each part to OpenVINO. ov.save_model(ov_image_encoder, image_encoder_path) del ov_image_encoder gc.collect() - + text_encoder_path = ov_models_dir / f"{model_checkpoint.value}_text_encoder.xml" - + if not text_encoder_path.exists(): model.forward = model.encode_text ov_text_encoder = ov.convert_model(model, example_input=text, input=[-1, text.shape[1]]) ov.save_model(ov_text_encoder, text_encoder_path) del ov_text_encoder gc.collect() - + del model gc.collect(); .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -549,9 +550,9 @@ Select device for image encoder .. code:: ipython3 core = ov.Core() - + device = device_widget() - + device @@ -608,17 +609,17 @@ Perform search print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.0297 ms - Text encoding took 0.00513 ms + Image encoding took 0.0294 ms + Text encoding took 0.00498 ms @@ -654,14 +655,14 @@ models can require different optimal threshold for search. ) from open_clip.transform import image_transform from typing import Optional - - + + current_device = device.value current_model = image_encoder_path.name.split("_im_encoder")[0] - + available_converted_models = [model_file.name.split("_im_encoder")[0] for model_file in ov_models_dir.glob("*_im_encoder.xml")] available_devices = list(core.available_devices) + ["AUTO"] - + download_file( "https://storage.openvinotoolkit.org/data/test_data/videos/car-detection.mp4", directory=sample_path, @@ -671,8 +672,8 @@ models can require different optimal threshold for search. directory=sample_path, filename="coco.mp4", ) - - + + def get_preprocess_and_tokenizer(model_name): if "mobileclip" in model_name: resolution = supported_models["MobileCLIP"][model_name]["image_size"] @@ -693,10 +694,10 @@ models can require different optimal threshold for search. resize_size = model_configs[model_name]["image_size"] preprocess = image_transform((resize_size, resize_size), is_train=False, resize_mode="longest") tokenizer = open_clip.get_tokenizer(model_configs[model_name]["model_name"]) - + return preprocess, tokenizer - - + + def run( path: str, text_search: str, @@ -715,7 +716,7 @@ models can require different optimal threshold for search. global tokenizer global ov_compiled_image_encoder global ov_compiled_text_encoder - + if current_model != model_name or device != current_device: ov_compiled_image_encoder = core.compile_model(ov_models_dir / f"{model_name}_im_encoder.xml", device) ov_compiled_text_encoder = core.compile_model(ov_models_dir / f"{model_name}_text_encoder.xml", device) @@ -725,7 +726,7 @@ models can require different optimal threshold for search. # Load video dataset = LoadVideo(path, transforms=preprocess, vid_stride=stride) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0) - + # Get image query features if image_search: image = preprocess(image_search).unsqueeze(0) @@ -745,11 +746,11 @@ models can require different optimal threshold for search. for image, orig, frame, timestamp in dataloader: with torch.no_grad(): image_features = torch.from_numpy(ov_compiled_image_encoder(image)[0]) - + image_features /= image_features.norm(dim=-1, keepdim=True) probs = query_features.cpu().numpy() @ image_features.cpu().numpy().T probs = probs[0] - + # Save frame similarity values df = pd.DataFrame( { @@ -759,15 +760,15 @@ models can require different optimal threshold for search. } ) res = pd.concat([res, df]) - + # Check if frame is over threshold for i, p in enumerate(probs): if p > thresh: matches.append(to_pil_image(orig[i])) matches_probs.append(p) - + print(f"Frames: {frame.tolist()} - Probs: {probs}") - + # Create plot of similarity values lines = ( alt.Chart(res) @@ -778,16 +779,16 @@ models can require different optimal threshold for search. ) ).properties(width=600) rule = alt.Chart().mark_rule(strokeDash=[6, 3], size=2).encode(y=alt.datum(thresh)) - + selected_frames = np.argsort(-1 * np.array(matches_probs))[:20] matched_sorted_frames = [matches[idx] for idx in selected_frames] - + return ( lines + rule, matched_sorted_frames, ) # Only return up to 20 images to not crash the UI - - + + class LoadVideo(Dataset): def __init__(self, path, transforms, vid_stride=1): self.transforms = transforms @@ -795,27 +796,27 @@ models can require different optimal threshold for search. self.cur_frame = 0 self.cap = cv2.VideoCapture(path) self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - + def __getitem__(self, _): # Read video # Skip over frames for _ in range(self.vid_stride): self.cap.grab() self.cur_frame += 1 - + # Read frame _, img = self.cap.retrieve() timestamp = self.cap.get(cv2.CAP_PROP_POS_MSEC) - + # Convert to PIL img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(np.uint8(img)) - + # Apply transforms img_t = self.transforms(img) - + return img_t, to_tensor(img), self.cur_frame, timestamp - + def __len__(self): return self.total_frames @@ -837,15 +838,15 @@ models can require different optimal threshold for search. if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mobileclip-video-search/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo, Option - + demo = make_demo( run=run, model_option=Option(choices=available_converted_models, value=model_checkpoint.value), device_option=Option(choices=available_devices, value=device.value), ) - + try: demo.launch(debug=False) except Exception: @@ -858,7 +859,7 @@ models can require different optimal threshold for search. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst index 02efe16d9c0f4a..5902537e3026a5 100644 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ b/docs/notebooks/mobilevlm-language-assistant-with-output.rst @@ -119,13 +119,13 @@ Import required packages .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - 2024-10-23 01:57:03.532418: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:57:03.567584: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:02:06.143728: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:02:06.177889: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:57:04.078609: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-11-05 02:02:06.679118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -342,15 +342,15 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -416,7 +416,7 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index 566aa1c87a941c..4adc89b9ff79e7 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,13 +124,13 @@ Imports .. parsed-literal:: - 2024-10-23 01:59:20.725760: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 01:59:20.759494: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:04:23.419260: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:04:23.453089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 01:59:21.367845: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-11-05 02:04:24.059462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -170,11 +170,11 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -229,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -655,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 082b6613456e28..337458e35bbf0c 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -53,7 +53,7 @@ Prerequisites %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "nncf>=2.13" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers[transformers] "openvino>=2024.4.0" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly "openvino-tokenizers[transformers]" "openvino>=2024.4.0" %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" @@ -64,9 +64,10 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - openvino-dev 2024.4.0 requires openvino==2024.4.0, but you have openvino 2024.5.0.dev20241014 which is incompatible. openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -145,13 +146,13 @@ Download PyTorch model .. parsed-literal:: - example_1.png: 0%| | 0.00/200k [00:00 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:307: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. OpenVINO and OpenVINO Tokenizers versions are not binary compatible. OpenVINO version: 2024.5.0-16993 OpenVINO Tokenizers version: 2024.5.0.0 @@ -579,18 +250,18 @@ documentation + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in sys.exit(main()) - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main service.run() - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run main_export( - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export core = Core() - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: - Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE + Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE @@ -709,12 +380,12 @@ image encoder model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. warning_deprecated( - 2024-10-23 02:04:33.280788: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 02:04:33.314985: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:09:38.791476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:09:38.825207: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 02:04:33.946816: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:09:39.427301: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -915,8 +586,8 @@ can use the same tokenizer and image processor that provided with model. Question: Describe this image in detail Answer: - The image portrays a charming and playful scene featuring a white lama. This adorable creature has a playful expression, with its eyes sparkling with joy and its nose in a playful smile. It is adorned with cute, tiny eyes that add a playful touch to its face. The lama's ears are also quite noticeable, with one of them sporting a tiny pink button. The lama's body is covered in fluffy, white fur, and it has its hind legs visible, adding a sense of movement to the image. - The lama is surrounded by a vivid display of fire. The flames are bright and lively, with some areas appearing more intense + The image features a small, adorable white lamb standing amidst a fire. The lamb's fur is fluffy and white, and it is adorned with tiny black eyes that are bright and lively. The lamb's face is cute, with a small black nose and a small mouth. It seems like the lamb is looking straight at the camera, making it appear even more adorable. + The lamb's right ear is visible, and it is white and pink. The lamb's right eye is also black and pink. The lamb's face is quite detailed, with the nose and mouth visible. There are also details like the lamb's right foot, which is white Interactive demo diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index b1db0093158982..a34f72f5d8ff1e 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -2,12 +2,10 @@ Live Object Detection with OpenVINO™ ==================================== This notebook demonstrates live object detection with OpenVINO, using -the `SSDLite -MobileNetV2 `__ -from `Open Model -Zoo `__. Final part -of this notebook shows live inference results from a webcam. -Additionally, you can also upload a video file. +the `Ultralytics +YOLOv8 `__. Final part of +this notebook shows live inference results from a webcam. Additionally, +you can also upload a video file. **NOTE**: To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a @@ -25,13 +23,12 @@ Additionally, you can also upload a video file. - `The Model <#the-model>`__ - - `Download the Model <#download-the-model>`__ - - `Convert the Model <#convert-the-model>`__ + - `Download and convert the + Model <#download-and-convert-the-model>`__ - `Load the Model <#load-the-model>`__ - `Processing <#processing>`__ - - `Process Results <#process-results>`__ - `Main Processing Function <#main-processing-function>`__ - `Run <#run>`__ @@ -62,8 +59,8 @@ Install requirements .. code:: ipython3 - %pip install -q "openvino-dev>=2024.0.0" - %pip install -q tensorflow + %pip install -q "openvino>=2024.4.0" + %pip install -q "ultralytics==8.3.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q opencv-python requests tqdm # Fetch `notebook_utils` module @@ -79,10 +76,6 @@ Install requirements .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - magika 0.5.1 requires numpy<2.0,>=1.24; python_version >= "3.8" and python_version < "3.9", but you have numpy 1.23.5 which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -102,17 +95,10 @@ Imports .. code:: ipython3 - import collections - import tarfile - import time from pathlib import Path - - import cv2 - import numpy as np - from IPython import display + import gc import openvino as ov - from openvino.tools.mo.front import tf as ov_tf_front - from openvino.tools import mo + from ultralytics import YOLO import notebook_utils as utils @@ -121,92 +107,53 @@ The Model -Download the Model -~~~~~~~~~~~~~~~~~~ - +Download and convert the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Use the ``download_file``, a function from the ``notebook_utils`` file. -It automatically creates a directory structure and downloads the -selected model. This step is skipped if the package is already -downloaded and unpacked. The chosen model comes from the public -directory, which means it must be converted into OpenVINO Intermediate -Representation (OpenVINO IR). - - **NOTE**: Using a model other than ``ssdlite_mobilenet_v2`` may - require different conversion parameters as well as pre- and - post-processing. .. code:: ipython3 # A directory where the model will be downloaded. - base_model_dir = Path("model") - - # The name of the model from Open Model Zoo - model_name = "ssdlite_mobilenet_v2" - archive_name = Path(f"{model_name}_coco_2018_05_09.tar.gz") - model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/{model_name}/{archive_name}" + # The name of the model + model_name = "yolov8n" - # Download the archive - downloaded_model_path = base_model_dir / archive_name - if not downloaded_model_path.exists(): - utils.download_file(model_url, downloaded_model_path.name, downloaded_model_path.parent) + det_model_path = Path(f"{model_name}_openvino_model/{model_name}.xml") - # Unpack the model - tf_model_path = base_model_dir / archive_name.with_suffix("").stem / "frozen_inference_graph.pb" - if not tf_model_path.exists(): - with tarfile.open(downloaded_model_path) as file: - file.extractall(base_model_dir) - + # export model to OpenVINO format using Ultralytics API + if not det_model_path.exists(): + pt_model = YOLO(f"{model_name}.pt") + pt_model.export(format="openvino", dynamic=True, half=True) + del pt_model + gc.collect() .. parsed-literal:: - model/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz: 0%| | 0.00/48.7M [00:00`__ -(``mo.convert_model`` function). If the model has been already -converted, this step is skipped. - -.. code:: ipython3 - - precision = "FP16" - # The output path for the conversion. - converted_model_path = Path("model") / f"{model_name}_{precision.lower()}.xml" - - # Convert it to IR if not previously converted - trans_config_path = Path(ov_tf_front.__file__).parent / "ssd_v2_support.json" - if not converted_model_path.exists(): - ov_model = mo.convert_model( - tf_model_path, - compress_to_fp16=(precision == "FP16"), - transformations_config=trans_config_path, - tensorflow_object_detection_api_pipeline_config=tf_model_path.parent / "pipeline.config", - reverse_input_channels=True, - ) - ov.save_model(ov_model, converted_model_path) - del ov_model + Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'... .. parsed-literal:: - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + 100%|██████████| 6.25M/6.25M [00:00<00:00, 25.9MB/s] .. parsed-literal:: - [ WARNING ] The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if applicable) are kept. + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs + + PyTorch: starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) + + OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... + OpenVINO: export success ✅ 1.3s, saved as 'yolov8n_openvino_model/' (6.4 MB) + + Export complete (1.5s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam + Predict: yolo predict task=detect model=yolov8n_openvino_model imgsz=640 half + Validate: yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml half + Visualize: https://netron.app Load the Model @@ -241,208 +188,44 @@ best performance. For that purpose, just use ``AUTO``. .. code:: ipython3 - # Read the network and corresponding weights from a file. - model = core.read_model(model=converted_model_path) - # Compile the model for CPU (you can choose manually CPU, GPU etc.) - # or let the engine choose the best available device (AUTO). - compiled_model = core.compile_model(model=model, device_name=device.value) - - # Get the input and output nodes. - input_layer = compiled_model.input(0) - output_layer = compiled_model.output(0) - - # Get the input size. - height, width = list(input_layer.shape)[1:3] - -Input and output layers have the names of the input node and output node -respectively. In the case of SSDLite MobileNetV2, there is 1 input and 1 -output. - -.. code:: ipython3 - - input_layer.any_name, output_layer.any_name - - - - -.. parsed-literal:: - - ('image_tensor:0', 'detection_boxes:0') - - - -Processing ----------- - - - -Process Results -~~~~~~~~~~~~~~~ - - - -First, list all available classes and create colors for them. Then, in -the post-process stage, transform boxes with normalized coordinates -``[0, 1]`` into boxes with pixel coordinates ``[0, image_size_in_px]``. -Afterward, use `non-maximum -suppression `__ -to reject overlapping detections and those below the probability -threshold (0.5). Finally, draw boxes and labels inside them. - -.. code:: ipython3 - - # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ - classes = [ - "background", - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "street sign", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "hat", - "backpack", - "umbrella", - "shoe", - "eye glasses", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "plate", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "mirror", - "dining table", - "window", - "desk", - "toilet", - "door", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "blender", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush", - "hair brush", - ] - - # Colors for the classes above (Rainbow Color Map). - colors = cv2.applyColorMap( - src=np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8), - colormap=cv2.COLORMAP_RAINBOW, - ).squeeze() + core = ov.Core() - def process_results(frame, results, thresh=0.6): - # The size of the original frame. - h, w = frame.shape[:2] - # The 'results' variable is a [1, 1, 100, 7] tensor. - results = results.squeeze() - boxes = [] - labels = [] - scores = [] - for _, label, score, xmin, ymin, xmax, ymax in results: - # Create a box with pixels coordinates from the box with normalized coordinates [0,1]. - boxes.append(tuple(map(int, (xmin * w, ymin * h, (xmax - xmin) * w, (ymax - ymin) * h)))) - labels.append(int(label)) - scores.append(float(score)) + def load_model(det_model_path, device): + compiled_model = compile_model(det_model_path, device) + det_model = YOLO(det_model_path.parent, task="detect") - # Apply non-maximum suppression to get rid of many overlapping entities. - # See https://paperswithcode.com/method/non-maximum-suppression - # This algorithm returns indices of objects to keep. - indices = cv2.dnn.NMSBoxes(bboxes=boxes, scores=scores, score_threshold=thresh, nms_threshold=0.6) + if det_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**det_model.overrides, **custom} + det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks) + det_model.predictor.setup_model(model=det_model.model) - # If there are no boxes. - if len(indices) == 0: - return [] + det_model.predictor.model.ov_compiled_model = compiled_model + return det_model - # Filter detected objects. - return [(labels[idx], scores[idx], boxes[idx]) for idx in indices.flatten()] + def compile_model(det_model_path, device): + det_ov_model = core.read_model(det_model_path) - def draw_boxes(frame, boxes): - for label, score, box in boxes: - # Choose color for the label. - color = tuple(map(int, colors[label])) - # Draw a box. - x2 = box[0] + box[2] - y2 = box[1] + box[3] - cv2.rectangle(img=frame, pt1=box[:2], pt2=(x2, y2), color=color, thickness=3) + ov_config = {} + if device != "CPU": + det_ov_model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + det_compiled_model = core.compile_model(det_ov_model, device, ov_config) + return det_compiled_model - # Draw a label name inside the box. - cv2.putText( - img=frame, - text=f"{classes[label]} {score:.2f}", - org=(box[0] + 10, box[1] + 30), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=frame.shape[1] / 1000, - color=color, - thickness=1, - lineType=cv2.LINE_AA, - ) - return frame + det_model = load_model(det_model_path, device.value) + + +.. parsed-literal:: + + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Loading yolov8n_openvino_model for OpenVINO inference... + Using OpenVINO LATENCY mode for batch=1 inference... + Main Processing Function ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -454,8 +237,18 @@ file. .. code:: ipython3 + from IPython import display + import cv2 + import numpy as np + + # Main processing function to run object detection. - def run_object_detection(source=0, flip=False, use_popup=False, skip_first_frames=0): + def run_object_detection( + source=0, + flip=False, + use_popup=False, + skip_first_frames=0, + ): player = None try: # Create a video player to play with target fps. @@ -466,7 +259,6 @@ file. title = "Press ESC to Exit" cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) - processing_times = collections.deque() while True: # Grab the frame. frame = player.next() @@ -483,43 +275,10 @@ file. fy=scale, interpolation=cv2.INTER_AREA, ) - - # Resize the image and change dims to fit neural network input. - input_img = cv2.resize(src=frame, dsize=(width, height), interpolation=cv2.INTER_AREA) - # Create a batch of images (size = 1). - input_img = input_img[np.newaxis, ...] - - # Measure processing time. - - start_time = time.time() # Get the results. - results = compiled_model([input_img])[output_layer] - stop_time = time.time() - # Get poses from network results. - boxes = process_results(frame=frame, results=results) - - # Draw boxes on a frame. - frame = draw_boxes(frame=frame, boxes=boxes) - - processing_times.append(stop_time - start_time) - # Use processing times from last 200 frames. - if len(processing_times) > 200: - processing_times.popleft() - - _, f_width = frame.shape[:2] - # Mean processing time [ms]. - processing_time = np.mean(processing_times) * 1000 - fps = 1000 / processing_time - cv2.putText( - img=frame, - text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", - org=(20, 40), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=f_width / 1000, - color=(0, 0, 255), - thickness=1, - lineType=cv2.LINE_AA, - ) + input_image = np.array(frame) + detections = det_model(input_image, verbose=False) + frame = detections[0].plot() # Use this workaround if there is flickering. if use_popup: @@ -592,22 +351,10 @@ Run the object detection: -.. image:: object-detection-with-output_files/object-detection-with-output_19_0.png +.. image:: object-detection-with-output_files/object-detection-with-output_13_0.png .. parsed-literal:: Source ended - -References ----------- - - - -1. `SSDLite - MobileNetV2 `__ -2. `Open Model - Zoo `__ -3. `Non-Maximum - Suppression `__ diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png new file mode 100644 index 00000000000000..1c548a0445636e --- /dev/null +++ b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3fca049d156843f0976e9386ece07d1846e92efcacc5c559de140e7733b67b3 +size 171752 diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png deleted file mode 100644 index fb2b3b2abdf69f..00000000000000 --- a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1d300cd81ce6f44a5ee2ded6f14b739b0a8ecd8d5d2e487743cd085b92b3533b -size 175076 diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index a837b88ac6976b..b2b4c8c0f04fdd 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index b73dd8059aa65b..2ee11fcded84dc 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -100,10 +100,11 @@ Clone repository and install requirements Cloning into 'OpenVoice'... remote: Enumerating objects: 438, done. remote: Total 438 (delta 0), reused 0 (delta 0), pack-reused 438 (from 1) - Receiving objects: 100% (438/438), 3.84 MiB | 19.30 MiB/s, done. + Receiving objects: 100% (438/438), 3.84 MiB | 21.51 MiB/s, done. Resolving deltas: 100% (207/207), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -242,9 +243,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -258,9 +259,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -410,40 +411,40 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 - %5559 : Float(1, 192, 152, strides=[29184, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 38656]) != torch.Size([1, 1, 39680]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 39424]) != torch.Size([1, 1, 38656]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 151, 43]) != torch.Size([1, 1, 155, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154, 43]) != torch.Size([1, 1, 151, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 151]) != torch.Size([1, 1, 155]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154]) != torch.Size([1, 1, 151]). _check_trace( - 2024-10-23 02:08:53.401718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:13:33.268258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -476,16 +477,16 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -713,7 +714,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -868,7 +869,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -886,7 +887,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1075,7 +1076,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 9efa8e8e7b87f2..7dae2290312e68 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -9,16 +9,9 @@ which shows only text detection. The `horizontal-text-detection-0001 `__ and -`text-recognition-resnet `__ +`text-recognition-0014 `__ models are used together for text detection and then text recognition. -In this tutorial, Open Model Zoo tools including Model Downloader, Model -Converter and Info Dumper are used to download and convert the models -from `Open Model -Zoo `__. For more -information, refer to the -`model-tools `__ tutorial. - **Table of contents:** @@ -26,7 +19,6 @@ information, refer to the - `Imports <#imports>`__ - `Settings <#settings>`__ - `Download Models <#download-models>`__ -- `Convert Models <#convert-models>`__ - `Select inference device <#select-inference-device>`__ - `Object Detection <#object-detection>`__ @@ -61,15 +53,12 @@ Guide =2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu + # Install openvino package + %pip install -q "openvino>=2024.4.0" pillow opencv-python "matplotlib>=3.4" .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -111,7 +100,7 @@ Settings model_dir = Path("model") precision = "FP16" detection_model = "horizontal-text-detection-0001" - recognition_model = "text-recognition-resnet-fc" + recognition_model = "text-recognition-0014" model_dir.mkdir(exist_ok=True) @@ -120,274 +109,35 @@ Download Models -The next cells will run Model Downloader to download the detection and -recognition models. If the models have been downloaded before, they will -not be downloaded again. +The next cells will download the detection and recognition models. If +the models have been downloaded before, they will not be downloaded +again. .. code:: ipython3 - download_command = ( - f"omz_downloader --name {detection_model},{recognition_model} --output_dir {model_dir} --cache_dir {model_dir} --precision {precision} --num_attempts 5" - ) - display(Markdown(f"Download command: `{download_command}`")) - display(Markdown(f"Downloading {detection_model}, {recognition_model}...")) - !$download_command - display(Markdown(f"Finished downloading {detection_model}, {recognition_model}.")) - - detection_model_path = (model_dir / "intel/horizontal-text-detection-0001" / precision / detection_model).with_suffix(".xml") - recognition_model_path = (model_dir / "public/text-recognition-resnet-fc" / precision / recognition_model).with_suffix(".xml") - - - -Download command: -``omz_downloader --name horizontal-text-detection-0001,text-recognition-resnet-fc --output_dir model --cache_dir model --precision FP16 --num_attempts 5`` - - - -Downloading horizontal-text-detection-0001, text-recognition-resnet-fc… - - -.. parsed-literal:: - - ################|| Downloading horizontal-text-detection-0001 ||################ - - ========== Downloading model/intel/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.xml - - - ========== Downloading model/intel/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.bin - - - ################|| Downloading text-recognition-resnet-fc ||################ - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/model.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/weight_init.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/fc_head.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/body.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/component.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/bricks.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/builder.py - + from notebook_utils import download_ir_model - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/conv_module.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/fc_module.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/norm.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/common.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/config.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/addict-2.4.0-py3-none-any.whl + detection_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model}/{precision}/{detection_model}.xml" + recognition_model_url = ( + f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{recognition_model}/{precision}/{recognition_model}.xml" + ) - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/heads/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/component.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/utils/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - ========== Unpacking model/public/text-recognition-resnet-fc/vedastr/addict-2.4.0-py3-none-any.whl + detection_model_path = download_ir_model(detection_model_url, model_dir / detection_model / precision) + recognition_model_path = download_ir_model(recognition_model_url, model_dir / recognition_model / precision) -Finished downloading horizontal-text-detection-0001, -text-recognition-resnet-fc. - - -.. code:: ipython3 - - ### The text-recognition-resnet-fc model consists of many files. All filenames are printed in - ### the output of Model Downloader. Uncomment the next two lines to show this output. - - # for line in download_result: - # print(line) - -Convert Models --------------- - - - -The downloaded detection model is an Intel model, which is already in -OpenVINO Intermediate Representation (OpenVINO IR) format. The text -recognition model is a public model which needs to be converted to -OpenVINO IR. Since this model was downloaded from Open Model Zoo, use -Model Converter to convert the model to OpenVINO IR format. - -The output of Model Converter will be displayed. When the conversion is -successful, the last lines of output will include -``[ SUCCESS ] Generated IR version 11 model.`` - -.. code:: ipython3 - - convert_command = f"omz_converter --name {recognition_model} --precisions {precision} --download_dir {model_dir} --output_dir {model_dir}" - display(Markdown(f"Convert command: `{convert_command}`")) - display(Markdown(f"Converting {recognition_model}...")) - ! $convert_command - - - -Convert command: -``omz_converter --name text-recognition-resnet-fc --precisions FP16 --download_dir model --output_dir model`` - +.. parsed-literal:: + model/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.bin: 0%| | 0.00/3.70M [00:… -Converting text-recognition-resnet-fc… .. parsed-literal:: - ========== Converting text-recognition-resnet-fc to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx - - ONNX check passed successfully. - - ========== Converting text-recognition-resnet-fc to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. - Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin - + model/text-recognition-0014/FP16/text-recognition-0014.bin: 0%| | 0.00/17.4M [00:00 + @@ -384,7 +384,7 @@ may be specified is input data .. parsed-literal:: - + @@ -422,7 +422,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -636,6 +636,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0154 seconds per image, FPS: 65.10 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.53 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0153 seconds per image, FPS: 65.39 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0166 seconds per image, FPS: 60.23 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index e5c426d1920f33..9f7510cd5efe96 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -75,6 +75,8 @@ Guide =1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -207,7 +209,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -253,7 +255,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index 43d5bfb39ed4e8..38a0d5d593351b 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5f67b60f77e15fb0b7908ada4ff64ab506accc23f579c81193175e5044f8f3b -size 593679 +oid sha256:ac7efd85f2c50b0a189dbf00c0cd2252f362e6469cd014d8f255c53172152c3d +size 591373 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index 576878a578a323..25feb9293ee93a 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -89,11 +89,11 @@ Imports .. parsed-literal:: - --2024-10-23 02:12:15-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + --2024-11-05 02:15:59-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. Proxy request sent, awaiting response... 404 Not Found - 2024-10-23 02:12:15 ERROR 404: Not Found. + 2024-11-05 02:16:00 ERROR 404: Not Found. dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-10-23 02:12:17 INFO: Loading faiss with AVX512 support. - 2024-10-23 02:12:17 INFO: Successfully loaded faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Loading faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +209,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/10/23 02:12:43] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/11/05 02:16:41] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -275,7 +275,7 @@ clipping values. .. parsed-literal:: - 2024-10-23 02:12:44 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-11-05 02:16:42 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,7 +287,7 @@ clipping values. .. parsed-literal:: - + @@ -462,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0073 seconds per image, FPS: 137.19 + PaddlePaddle model on CPU: 0.0074 seconds per image, FPS: 134.37 PaddlePaddle result: Labrador retriever, 0.75138 @@ -523,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 382.97 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 373.31 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 3bbb67d2232c99..323959aa17e8ef 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -9,7 +9,7 @@ with synthetic annotations `__ by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. -|image0| +.. image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w Text-to-speech models trained on large-scale datasets have demonstrated impressive in-context learning capabilities and naturalness. However, @@ -53,8 +53,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w - Prerequisites ------------- @@ -69,23 +67,6 @@ Prerequisites %pip install -q "openvino>=2024.2.0" %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - onnx 1.16.1 requires protobuf>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - paddlepaddle 2.6.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.19.6 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. - tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. - torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1+cpu which is incompatible. - visualdl 2.5.3 requires protobuf>=3.20.0, but you have protobuf 3.19.6 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - - Load the original model and inference ------------------------------------- @@ -114,19 +95,6 @@ Load the original model and inference audio_arr = generation.cpu().numpy().squeeze() sf.write("parler_tts_out.wav", audio_arr, model.config.sampling_rate) - -.. parsed-literal:: - - 2024-10-23 02:13:22.641328: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 02:13:22.675982: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - Flash attention 2 is not installed - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. - WeightNorm.apply(module, name, dim) - You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers - The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - - .. code:: ipython3 import IPython.display as ipd @@ -140,10 +108,10 @@ Load the original model and inference - + @@ -191,13 +159,6 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. text_encoder_ov_model = convert(model.text_encoder, TEXT_ENCODER_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - - The Decoder Model performs in generation pipeline and we can separate it into two stage. In the first stage the model generates ``past_key_values`` into output for the second stage. In the second @@ -232,17 +193,6 @@ stage the model produces tokens during several runs. decoder_1_ov_model = convert(DecoderStage1Wrapper(model.decoder.model.decoder), DECODER_STAGE_1_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - - .. code:: ipython3 DECODER_STAGE_2_OV_PATH = Path("models/decoder_stage_2_ir.xml") @@ -308,7 +258,7 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') @@ -410,10 +360,10 @@ and run inference. - + @@ -456,27 +406,13 @@ Interactive inference demo = make_demo(fn=infer) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(share=True, debug=False) + demo.queue().launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index 2b07305615f09e..653a9b376edf7e 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -128,7 +128,7 @@ Guide =2024.0.0" + %pip install -q "openvino>=2024.0.0" %pip install -q opencv-python requests scipy tqdm "matplotlib>=3.4" @@ -189,16 +189,6 @@ We will use pre-trained models from OpenVINO’s `Open Model Zoo `__ to start the test. -Use ``omz_downloader``, which is a command-line tool from the -``openvino-dev`` package. It automatically creates a directory structure -and downloads the selected model. This step is skipped if the model is -already downloaded. The selected model comes from the public directory, -which means it must be converted into OpenVINO Intermediate -Representation (OpenVINO IR). - - **NOTE**: Using a model outside the list can require different pre- - and post-processing. - In this case, `person detection model `__ is deployed to detect the person in each frame of the video, and @@ -207,60 +197,39 @@ model `__, -``person-reidentification-retail-xxx`` from `Reidentification Models -list `__), -replace the name of the model in the code below. - .. code:: ipython3 + from notebook_utils import download_ir_model + # A directory where the model will be downloaded. base_model_dir = "model" precision = "FP16" # The name of the model from Open Model Zoo detection_model_name = "person-detection-0202" - download_command = ( - f"omz_downloader " f"--name {detection_model_name} " f"--precisions {precision} " f"--output_dir {base_model_dir} " f"--cache_dir {base_model_dir}" - ) - ! $download_command - detection_model_path = f"model/intel/{detection_model_name}/{precision}/{detection_model_name}.xml" + download_det_model_url = ( + f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model_name}/{precision}/{detection_model_name}.xml" + ) + detection_model_path = download_ir_model(download_det_model_url, Path(base_model_dir) / detection_model_name / precision) reidentification_model_name = "person-reidentification-retail-0287" + download_reid_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" - download_command = ( - f"omz_downloader " f"--name {reidentification_model_name} " f"--precisions {precision} " f"--output_dir {base_model_dir} " f"--cache_dir {base_model_dir}" - ) - ! $download_command + reidentification_model_path = download_ir_model(download_reid_model_url, Path(base_model_dir) / reidentification_model_name / precision) - reidentification_model_path = f"model/intel/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" .. parsed-literal:: - ################|| Downloading person-detection-0202 ||################ - - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml + model/person-detection-0202/FP16/person-detection-0202.bin: 0%| | 0.00/3.47M [00:00`__ #### -Table of contents: +precision using `NNCF `__ + +**Table of contents:** - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ @@ -56,11 +57,6 @@ install required packages and setup helper functions. .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - descript-audiotools 0.7.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 5.28.2 which is incompatible. - open-clip-torch 2.22.0 requires protobuf<4, but you have protobuf 5.28.2 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.2 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 5.28.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -264,10 +260,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:690: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:702: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -505,12 +516,6 @@ arguments. For running model we will use ``generate`` method. generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args) -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead - warnings.warn( - - .. parsed-literal:: Answer: diff --git a/docs/notebooks/photo-maker-with-output.rst b/docs/notebooks/photo-maker-with-output.rst index fe49eb9217065e..1685e9c05ea9f1 100644 --- a/docs/notebooks/photo-maker-with-output.rst +++ b/docs/notebooks/photo-maker-with-output.rst @@ -94,13 +94,13 @@ Clone PhotoMaker repository .. parsed-literal:: Cloning into 'PhotoMaker'... - remote: Enumerating objects: 303, done. - remote: Counting objects: 100% (148/148), done. - remote: Compressing objects: 100% (95/95), done. - remote: Total 303 (delta 130), reused 53 (delta 53), pack-reused 155 (from 1) - Receiving objects: 100% (303/303), 10.23 MiB | 29.77 MiB/s, done. - Resolving deltas: 100% (162/162), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker + remote: Enumerating objects: 306, done. + remote: Counting objects: 100% (151/151), done. + remote: Compressing objects: 100% (98/98), done. + remote: Total 306 (delta 132), reused 53 (delta 53), pack-reused 155 (from 1) + Receiving objects: 100% (306/306), 10.24 MiB | 23.03 MiB/s, done. + Resolving deltas: 100% (164/164), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker Note: switching to '1e78aa6514c11a84ef1be27b56c7c72d6c70f8fc'. You are in 'detached HEAD' state. You can look around, make experimental @@ -119,7 +119,7 @@ Clone PhotoMaker repository Turn off this advice by setting config variable advice.detachedHead to false HEAD is now at 1e78aa6 Update README.md - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/photo-maker + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker Install required packages @@ -133,11 +133,9 @@ Install required packages .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - descript-audiotools 0.7.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. - supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -199,10 +197,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-10-23 02:19:25.748160: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 02:19:25.783265: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:22:09.727876: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:22:09.761823: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 02:19:26.449413: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:22:10.482979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -391,9 +389,12 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" @@ -468,9 +469,9 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -574,15 +575,15 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index ef008d1d82ee52..c1c9a4b4e8ec57 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-10-23 02:27:23.824587: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 02:27:23.860019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:30:04.644117: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:30:04.680089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 02:27:24.531762: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:30:05.360275: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -130,17 +130,17 @@ directly in latent space, achieving super fast inference with few steps. Loading pipeline components...: 0%| | 0/5 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +.. parsed-literal:: + Loading checkpoint shards: 0%| | 0/4 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: + ['caption_projection.y_embedding'] @@ -229,8 +229,9 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Convert transformer @@ -271,11 +272,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -300,9 +301,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -448,7 +449,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -563,7 +564,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1621,16 +1622,16 @@ pipelines. Loading pipeline components...: 0%| | 0/5 [00:00=4.43.0, but you have transformers 4.45.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -156,21 +153,21 @@ documentation 0.19. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. INFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ @@ -488,7 +485,7 @@ documentation target_length: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) .. parsed-literal:: diff --git a/docs/notebooks/qwen2-vl-with-output.rst b/docs/notebooks/qwen2-vl-with-output.rst index 0ff882bb537ef2..a6f49a58ff0f25 100644 --- a/docs/notebooks/qwen2-vl-with-output.rst +++ b/docs/notebooks/qwen2-vl-with-output.rst @@ -139,10 +139,10 @@ using widget bellow: .. parsed-literal:: - 2024-10-23 04:32:38.810057: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 04:32:38.845114: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 04:37:28.225170: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 04:37:28.260034: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 04:32:39.399370: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 04:37:28.814599: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -335,26 +335,44 @@ documentation target_length: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors .. parsed-literal:: + ✅ Language model successfully converted ⌛ Weights compression with int4_asym mode started INFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 1% (1 / 130) │ 0% (0 / 129) │ + │ 8 │ 15% (1 / 197) │ 0% (0 / 196) │ ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 99% (129 / 130) │ 100% (129 / 129) │ + │ 4 │ 85% (196 / 197) │ 100% (196 / 196) │ ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -374,31 +392,15 @@ documentation =3.9.2, but you have protobuf 3.20.3 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.19.1+cpu which is incompatible. - parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. + parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. Collecting iopath>=0.1.10 Using cached iopath-0.1.10-py3-none-any.whl - Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) - Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.66.5) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (2.10.1) - Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.1) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) - Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) + Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) + Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.66.6) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (2.10.1) + Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.1) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) + Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) Installing collected packages: iopath Attempting uninstall: iopath Found existing installation: iopath 0.1.9 @@ -204,10 +203,10 @@ Clone and install segment-anything-2 .. parsed-literal:: env: SAM2_BUILD_CUDA=0 - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation/sam2 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation/sam2 ERROR: Package 'sam-2' requires a different Python: 3.8.10 not in '>=3.10.0' Note: you may need to restart the kernel to use updated packages. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation .. code:: ipython3 diff --git a/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst b/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst index 298a30bf7b6d7a..50aa71af0082b8 100644 --- a/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst +++ b/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst @@ -126,10 +126,10 @@ tokenizer and preparing the images. .. parsed-literal:: - 2024-10-23 04:42:12.224179: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 04:42:12.258186: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 04:45:37.694278: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 04:45:37.728953: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 04:42:12.920282: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 04:45:38.384577: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Run PyTorch model inference @@ -265,8 +265,9 @@ object ready to load on the device and start making predictions. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Run OpenVINO model @@ -610,7 +611,7 @@ model are similar to the PyTorch model. .. parsed-literal:: - [{'dog': 0.99}, {'horse': 0.0}, {'cat': 0.0}, {'wolf': 0.0}, {'frog': 0.0}] + [{'dog': 0.99}, {'horse': 0.0}, {'cat': 0.0}, {'wolf': 0.0}, {'tiger': 0.0}] @@ -685,7 +686,7 @@ approximately estimate the speed up of the dynamic quantized models. .. parsed-literal:: - Performance speed up: 2.019 + Performance speed up: 2.491 Interactive inference diff --git a/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png b/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png index 76d6a6178cf64e..90392474870369 100644 --- a/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png +++ b/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f3af06f927b6f5e643d37869bbe32651d9a0f23270d5f3426cebbc2f02765cb -size 580998 +oid sha256:1e64834a97582f6009d2c6f15fd109cba0bdc0d9577d3a7f0f31a4d9f4519d44 +size 581000 diff --git a/docs/notebooks/speculative-sampling-with-output.rst b/docs/notebooks/speculative-sampling-with-output.rst index 127f61ab35085c..a88201eb73453c 100644 --- a/docs/notebooks/speculative-sampling-with-output.rst +++ b/docs/notebooks/speculative-sampling-with-output.rst @@ -1,5 +1,5 @@ -Text Generation via Speculative Sampling, KV Caching, and OpenVINO™ -=================================================================== +Text Generation via Speculative Decoding and OpenVINO™ +====================================================== As model sizes grow, Generative AI implementations require significant inference resources. This not only increases the cost per generation @@ -18,42 +18,49 @@ summary, inference optimizations for text generation are essential to reduce costs and power consumption, while also improving the accuracy and speed of text generation. -Another necessary condition is that the optimizations are compatible -with each other. That is, implementing a certain optimization should not -preclude other optimizations. There are several levels of optimizations -that can provide significant speedup without “bumping into each other” -in a way that will compromise overall efficiency. +Speculative decoding (or +`assisted-generation `__) +is a recent technique, that allows to speed up token generation when an +additional smaller draft model is used alongside with the main model. -For details on this method, please refer to the paper by Chen et al, -http://arxiv.org/abs/2302.01318. Additionally, there’s an interesting -proof of correctness of speculative sampling (showing that the original -distribution is preserved) by Leviathan et al, -http://arxiv.org/abs/2211.17192 +Speculative decoding works the following way. The draft model predicts +the next K tokens one by one in an autoregressive manner, while the main +model validates these predictions and corrects them if necessary. We go +through each predicted token, and if a difference is detected between +the draft and main model, we stop and keep the last token predicted by +the main model. Then the draft model gets the latest main prediction and +again tries to predict the next K tokens, repeating the cycle. -Our blog article describing this implementation with OpenVino is -available at openvino.ai +This approach reduces the need for multiple infer requests to the main +model, enhancing performance. For instance, in more predictable parts of +text generation, the draft model can, in best-case scenarios, generate +the next K tokens that exactly match the target. In that case they are +validated in a single inference request to the main model (which is +bigger, more accurate but slower) instead of running K subsequent +requests. More details can be found in the original +`paper `__. + +|image0| + +In this tutorial we consider how to apply Speculative decoding using +OpenVINO GenAI. **Table of contents:** - `Prerequisites <#prerequisites>`__ +- `Prepare models <#prepare-models>`__ - `Select inference device <#select-inference-device>`__ -- `Create autoregressive and speculative forms of sampling with KV - Cache - support <#create-autoregressive-and-speculative-forms-of-sampling-with-kv-cache-support>`__ - - - `Setup imports <#setup-imports>`__ - - `Prepare autoregressive - sampling <#prepare-autoregressive-sampling>`__ - - `Prepare speculative sampling <#prepare-speculative-sampling>`__ - -- `Main generation function <#main-generation-function>`__ +- `Run target model without speculative + decoding <#run-target-model-without-speculative-decoding>`__ +- `Run Speculative decoding + pipeline <#run-speculative-decoding-pipeline>`__ - - `Download and Convert Model <#download-and-convert-model>`__ ### - Installation Instructions +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ This is a self-contained example that relies solely on its own code. @@ -62,29 +69,64 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. +.. |image0| image:: https://github.com/user-attachments/assets/eb999dea-d98b-42bb-835e-28d3054e1a84 + Prerequisites ------------- -First, we should install the `Hugging Face -Optimum `__ library -accelerated by OpenVINO integration. The Hugging Face Optimum Intel API -is a high-level API that enables us to convert and quantize models from -the Hugging Face Transformers library to the OpenVINO™ IR format. For -more details, refer to the `Hugging Face Optimum Intel -documentation `__. +First, we should install the `OpenVINO +GenAI `__ for running +model inference. + +|image01| + +OpenVINO™ GenAI is a library of the most popular Generative AI model +pipelines, optimized execution methods, and samples that run on top of +highly performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). -We will also need to install transformers (HuggingFace) and some other -useful modules. +.. |image01| image:: https://media.githubusercontent.com/media/openvinotoolkit/openvino.genai/refs/heads/master/src/docs/openvino_genai.svg .. code:: ipython3 - %pip install -Uq pip - %pip uninstall -q -y optimum optimum-intel - %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q --upgrade transformers "torch>=2.1" "torchvision" "gradio>=4.19" accelerate "onnx<1.16.2" ipywidgets --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install --pre -Uq "openvino>=2024.4.0" openvino-tokenizers openvino-genai huggingface_hub --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + +Prepare models +-------------- + + + +As example, we will use already converted LLMs from `OpenVINO +collection `__, +but in case, if you want run own models, you should convert them using +`Hugging Face +Optimum `__ +library accelerated by OpenVINO integration. More details about model +preparation can be found in `OpenVINO LLM inference +guide `__ + +.. code:: ipython3 + + from pathlib import Path + import huggingface_hub as hf_hub + + draft_model_id = "OpenVINO/dolly-v2-3b-int4-ov" + target_model_id = "OpenVINO/dolly-v2-7b-int8-ov" + + draft_model_path = Path(draft_model_id.split("/")[-1]) + target_model_path = Path(target_model_id.split("/")[-1]) + + if not draft_model_path.exists(): + hf_hub.snapshot_download(draft_model_id, local_dir=draft_model_path) + if not target_model_path.exists(): + hf_hub.snapshot_download(target_model_id, local_dir=target_model_path) Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -97,336 +139,184 @@ OpenVINO. .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - - device = device_widget() - - device + device = device_widget(default="CPU", exclude=["NPU", "AUTO"]) + device -.. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU') +.. parsed-literal:: + Dropdown(description='Device:', options=('CPU',), value='CPU') -Create autoregressive and speculative forms of sampling with KV Cache support ------------------------------------------------------------------------------ +Run target model without speculative decoding +--------------------------------------------- -Text generation is often done in an autoregressive fashion. We will all -support a KV cache (aka Past Value Cache) in the code. Note that we are -using greedy sampling. We do not adjust other text generation parameters -(e.g. temperature) so keep this illustration of speculative sampling as -simple and understandable as possible. -Setup imports -~~~~~~~~~~~~~ +OpenVINO GenAI provides easy-to-use API for running text generation. +Firstly we will create pipeline with ``LLMPipeline``. ``LLMPipeline`` is +the main object used for decoding. You can construct it straight away +from the folder with the converted model. It will automatically load the +``main model``, ``tokenizer``, ``detokenizer`` and default +``generation configuration``. After that we will configure parameters +for decoding. Then we just run ``generate`` method and get the output in +text format. We do not need to encode input prompt according to model +expected template or write post-processing code for logits decoder, it +will be done easily with LLMPipeline. +To obtain intermediate generation results without waiting until when +generation is finished, we will write streamer function. .. code:: ipython3 + import openvino_genai import time - import numpy as np - import openvino as ov -Prepare autoregressive sampling -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + pipe = openvino_genai.LLMPipeline(target_model_path, device.value) + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 -.. code:: ipython3 + def streamer(subword): + print(subword, end="", flush=True) + # Return flag corresponds whether generation should be stopped. + # False means continue generation. + return False - def autoregressive_sampling_with_pkv(input, model, N=30): - input_ids, attention_mask = input.input_ids, input.attention_mask - seq_len = input_ids.shape[-1] - position_ids = np.arange(0, seq_len, dtype=np.int64).reshape([-1, seq_len]) - - # in all subsequent inferences we feed tokens one by one, - # but for the first one we feed the whole encoded prompt - request = model.create_infer_request() - request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - next_token = np.argmax(request.results["logits"][:, -1]).reshape([1]) - - all_tokens = [] - all_tokens.extend(input_ids[0]) - all_tokens.append(next_token[0]) - - while seq_len < N: - input_ids = next_token.reshape([1, 1]) - attention_mask = np.concatenate((attention_mask, np.array([1]).reshape([1, 1])), axis=1) - position_ids = np.array([attention_mask.shape[1]]).reshape([1, 1]) - - request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - next_token = np.argmax(request.results["logits"][:, -1]) - all_tokens.append(next_token) - seq_len += 1 - - return all_tokens - -Prepare speculative sampling -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -- Step 1: With speculative sampling, we first generate K samples from - the draft model (in an autoregressive manner). -- Step 2: These are now candidates to examine using the main model - (step 2) using a batch size of K. -- Step 3: We go through each K predicted tokens, and if tokens differ, - we stop and keep the last token predicted by the main model. -- Step 4: We update KV-cache dropping keys & values for differing - tokens and repeat Step 1. -.. code:: ipython3 + start_time = time.perf_counter() + pipe.generate(["Sun is yellow because"], config, streamer=streamer) + end_time = time.perf_counter() - def update_state(request, seq_len): - for state in request.query_state(): - old_seq_len = state.state.shape[2] - if seq_len >= old_seq_len: - continue - # After the inference request, key/values have shape [BATCH_SIZE, seq_len + K, vocab_size]. - # Increment the sequence length by the number of matched tokens, and - # trim the KV cache to match the new sequence length. - state.state = ov.Tensor(state.state.data[:, :, :seq_len]) - - - def speculative_sampling_with_pkv(input, draft_model, main_model, K, N=30, **kwargs): - input_ids, attention_mask = input.input_ids, input.attention_mask - # seq_len number of key/values or number of already processed input tokens - seq_len = input_ids.shape[-1] - position_ids = np.arange(0, seq_len, dtype=np.int64).reshape([-1, seq_len]) - - draft_request = draft_model.create_infer_request() - draft_request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - - main_request = main_model.create_infer_request() - main_request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - first_token = np.argmax(main_request.results["logits"][:, -1]).reshape([1]) - - all_tokens = [] - all_tokens.extend(input_ids[0]) - all_tokens.append(first_token[0]) - - accum_draft_tokens = [] - while seq_len < N: - next_token = first_token - for i in range(K): - input_ids = next_token.reshape([1, 1]) - attention_mask = np.concatenate((attention_mask, np.array([1]).reshape([1, 1])), axis=1) - position_ids = np.array([attention_mask.shape[1]]).reshape([1, 1]) - - draft_request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - next_token = np.argmax(draft_request.results["logits"][:, -1]) - accum_draft_tokens.append(next_token) - - # main model will give also K out tokens - # feed the same first token to the main model and do not give the last token generated by the draft - input_ids = np.concatenate((first_token.reshape([1]), accum_draft_tokens[:-1])).reshape([1, -1]) - attention_mask = np.ones((1, seq_len + K)) - position_ids = np.arange(seq_len, seq_len + K, dtype=np.int64).reshape([1, -1]) - - main_request.infer((input_ids, attention_mask, position_ids, np.array([0]))) - next_tokens = np.argmax(main_request.results["logits"], axis=-1)[0] - - # if disagrees from the very beggining then context will be expanded only for one element - # all elements match then context will be expanded to K elements - for disagree_idx, (t1, t2) in enumerate(zip(accum_draft_tokens, next_tokens)): - if t1 != t2: - break - - first_token = next_tokens[disagree_idx] - all_tokens.extend(next_tokens[: disagree_idx + 1]) - seq_len += disagree_idx + 1 - - # cut key/values depending on the position where disagreement starts - update_state(draft_request, seq_len) - update_state(main_request, seq_len) - - attention_mask = np.ones((1, seq_len)) - accum_draft_tokens = [] - all_tokens.extend(accum_draft_tokens) - return all_tokens - -Main generation function ------------------------- - - - -Download and Convert Model -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Optimum Intel can be used to load optimized models from the `Hugging -Face Hub `__ and -create pipelines to run an inference with OpenVINO Runtime using Hugging -Face APIs. For speculative decoding we need to manually update states, -therefore we will use directly openvino inference api, and optimum only -for model conversion. >To download Llama-2-7b-chat-hf, you will need to -accept license agreement. You must be a registered user in Hugging -Face Hub. Please visit HuggingFace model -`card `__, -carefully read terms of usage and click accept button. You will need to -use an access token for the code below to run. For more information on -access tokens, refer to this section of the documentation. -.. code:: ipython3 +.. parsed-literal:: - from pathlib import Path - - main_model_id = "meta-llama/Llama-2-7b-chat-hf" - main_model_path = Path("Llama-2-7b-chat-hf") - draft_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" - draft_model_path = Path("TinyLlama-1.1B-Chat-v1.0") - - from transformers import AutoTokenizer - - main_tokenizer = AutoTokenizer.from_pretrained(main_model_id) - draft_tokenizer = AutoTokenizer.from_pretrained(draft_model_id) + it is made of gas. The gas is heated to a high temperature and then cooled. The gas is yellow because it has a band of light called the "Bondeson Pendulum Effect." The Bondeson Pendulum Effect is caused by the light waves bouncing off of the gas molecules. The light waves bounce off of the gas molecules in different ways, some of the light waves get scattered, and some of the light waves get reflected. The light waves that get scattered and reflected combine to .. code:: ipython3 - # In order for speculative sampling to work, both main and draft tokenizers should be the same. - token_test_txt = "text to ensure tokenizers work the same, as of 2024" - tokens_1 = draft_tokenizer(token_test_txt, return_tensors="pt").input_ids - tokens_2 = main_tokenizer(token_test_txt, return_tensors="pt").input_ids - - assert all((tokens_1 - tokens_2)[0] == 0) + import gc -.. code:: ipython3 + print(f"Generation time: {end_time - start_time:.2f}s") + del pipe + gc.collect(); - if not main_model_path.exists(): - !optimum-cli export openvino --model $main_model_id --weight-format fp16 {main_model_path} - if not draft_model_path.exists(): - !optimum-cli export openvino --model $draft_model_id --weight-format fp16 {draft_model_path} -Infer directly using OpenVINO Inference Pipeline +.. parsed-literal:: -.. code:: ipython3 + Generation time: 18.44s - core = ov.Core() - draft_ov_model = core.read_model(draft_model_path / "openvino_model.xml") - draft_model = core.compile_model(draft_ov_model, device_name=device.value) - - main_ov_model = core.read_model(main_model_path / "openvino_model.xml") - main_model = core.compile_model(main_ov_model, device_name=device.value) -.. code:: ipython3 +Run Speculative decoding pipeline +--------------------------------- - def main( - prompt: str, - n_tokens_to_generate: int = 75, - K: int = 5, - seed: int = 5555, - ): - # seed numpy rng - np.random.seed(seed) - tokenized = main_tokenizer(prompt, return_tensors="pt") - - def run_autoregressive_sampling_fn(decode_fn, tokenized, **kwargs): - start = time.perf_counter() - output_ids = decode_fn(tokenized, **kwargs) - text = main_tokenizer.decode(output_ids, skip_special_tokens=True) - elapsed_time = time.perf_counter() - start - return text, elapsed_time - - def run_speculative_sampling_fn(decode_fn, input_ids, **kwargs): - start = time.perf_counter() - output_ids = decode_fn(input_ids, **kwargs) - text = main_tokenizer.decode(output_ids, skip_special_tokens=True) - elapsed_time = time.perf_counter() - start - return text, elapsed_time - - autoregressive_text, autoregressive_time = run_autoregressive_sampling_fn( - autoregressive_sampling_with_pkv, - tokenized, - model=main_model, - N=n_tokens_to_generate, - ) - - speculative_text, speculative_time = run_speculative_sampling_fn( - speculative_sampling_with_pkv, - tokenized, - main_model=main_model, - draft_model=draft_model, - N=n_tokens_to_generate, - K=K, - ) - - # Format results for output in gradio - out = "\n" + "Autoregressive Decode" + "\n" + "---------------------" + "\n" - out = out + f"Time = {autoregressive_time:.2f}s" + "\n" + f"Text = {autoregressive_text}" + "\n" - out = out + "\n" + "Speculative Decode" + "\n" + "------------------" + "\n" - out = out + f"Time = {speculative_time:.2f}s" + "\n" + f"Text = {speculative_text}" - return out + + +To enable Speculative decoding in ``LLMPipeline,`` we should +additionally provide the ``draft_model`` structure and +``SchedulerConfig`` for resource management. + +|image011| + +As shown in the figure above, speculative decoding works by splitting +the generative process into two stages. In the first stage, a fast, but +less accurate draft model (AKA assistant) autoregressively generates a +sequence of tokens. In the second stage, a large, but more accurate +target model conducts parallelized verification over the generated draft +tokens. This process allows the target model to produce multiple tokens +in a single forward pass and thus accelerate autoregressive decoding. +The success of speculative decoding largely hinges on the speculation +lookahead (SL), i.e. the number of tokens produced by the draft model in +each iteration. The straightforward method, based on `Leviathan et +al. `__, uses a static value of the +speculation lookahead and involves generating a constant number of +candidate tokens at each speculative iteration. You can adjust the +number of candidates using ``num_assistant_tokens`` parameter in +generation config. If the assistant model’s confidence in its prediction +for the current token is lower than this threshold, the assistant model +stops the current token generation iteration is not yet reached. + +.. |image011| image:: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/dynamic_speculation_lookahead/spec_dec_diagram.png .. code:: ipython3 - res = main("Alan Turing was a", n_tokens_to_generate=100) - print(res) + scheduler_config = openvino_genai.SchedulerConfig() + # cache params + scheduler_config.cache_size = 2 + scheduler_config.block_size = 16 if "GPU" in device.value else 32 + + draft_model = openvino_genai.draft_model(draft_model_path, device.value) + + pipe = openvino_genai.LLMPipeline(target_model_path, device.value, draft_model=draft_model, scheduler_config=scheduler_config) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + config.num_assistant_tokens = 5 + start_time = time.perf_counter() + result = pipe.generate(["Sun is yellow because"], config, streamer=streamer) + end_time = time.perf_counter() .. parsed-literal:: - 2024-04-17 10:21:41.642283: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-04-17 10:21:41.644834: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-04-17 10:21:41.677055: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - 2024-04-17 10:21:41.677093: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - 2024-04-17 10:21:41.677119: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-04-17 10:21:41.683198: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-04-17 10:21:41.683977: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-04-17 10:21:42.477656: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + it is made of gas. The gas is heated to a high temperature and then cooled. The gas changes from a hot gas to a cold gas and then from a cold gas to a hot gas. The gas is very hot when it changes from a hot gas to a cold gas and very cold when it changes from a cold gas to a hot gas. When the gas changes from a hot gas to a cold gas it becomes yellow. When the gas changes from a cold gas to a hot gas it + +.. code:: ipython3 + + print(f"Generation time: {end_time - start_time:.2f}s") .. parsed-literal:: - - Autoregressive Decode - --------------------- - Time = 44.39s - Text = Alan Turing was a British mathematician, computer scientist, and codebreaker who played a pivotal role in cracking the German Enigma code during World War II. He was also a pioneer in the field of artificial intelligence and made significant contributions to the development of computer science. - - Turing was born on June 23, 1912, in London, England. He was educated at Cambridge University, where he earned a degree in mathematics in - - Speculative Decode - ------------------ - Time = 22.96s - Text = Alan Turing was a British mathematician, computer scientist, and codebreaker who played a pivotal role in cracking the German Enigma code during World War II. He was also a pioneer in the field of artificial intelligence and made significant contributions to the development of computer science. - - Turing was born on June 23, 1912, in London, England. He was educated at Cambridge University, where he earned a degree in mathematics in 1 + Generation time: 15.62s +Alternative approach, Dynamic Speculative Decoding, described in the +`paper `__ is based on heuristics and +adjusts the number of candidate tokens for the next iteration based on +the acceptance rate of the current iteration. If all speculative tokens +are correct, the number of candidate tokens increases; otherwise, it +decreases. For adjusting number of tokens +``assistant_confidence_threshold`` parameters should be used. If the +assistant model’s confidence in its prediction for the current token is +lower than this threshold, the assistant model stops the current token +generation iteration, even if the number of ``num_assistant_tokens`` is +not yet reached. You can find more details in this `blog +post `__. + .. code:: ipython3 - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/speculative-sampling/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo - - demo = make_demo(fn=main) - - try: - demo.launch(debug=False) - except Exception: - demo.launch(share=True, debug=False) - # If you are launching remotely, specify server_name and server_port - # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` - # To learn more please refer to the Gradio docs: https://gradio.app/docs/ + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + config.assistant_confidence_threshold = 0.05 + start_time = time.perf_counter() + result = pipe.generate(["Sun is yellow because"], config, streamer) + end_time = time.perf_counter() + + +.. parsed-literal:: + + it is made of gas. The gas is heated to a high temperature and then cooled. The gas changes from a hot gas to a cold gas and then from a cold gas to a hot gas. The gas is very hot when it changes from a hot gas to a cold gas and very cold when it changes from a cold gas to a hot gas. The gas is very light and can float in the air. When the gas cools it becomes a liquid. The Sun is a huge sphere of .. code:: ipython3 - # please uncomment and run this cell for stopping gradio interface - # demo.close() + print(f"Generation time: {end_time - start_time:.2f}s") + + +.. parsed-literal:: + + Generation time: 17.97s + diff --git a/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst b/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst index be35e6d00f9293..b4a9cf92289306 100644 --- a/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst +++ b/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst @@ -57,47 +57,47 @@ Guide =0.11.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.5.0) - Requirement already satisfied: torch>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.16.1) - Requirement already satisfied: numpy>=1.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (1.24.4) - Requirement already satisfied: pyarrow>=15.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (17.0.0) - Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.3.8) - Requirement already satisfied: pandas in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.0.3) - Requirement already satisfied: requests>=2.32.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.32.3) - Requirement already satisfied: tqdm>=4.66.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (4.66.5) - Requirement already satisfied: xxhash in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.5.0) - Requirement already satisfied: multiprocess<0.70.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.70.16) - Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0) - Requirement already satisfied: aiohttp in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.10.10) - Requirement already satisfied: huggingface-hub>=0.23.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.26.1) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (24.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (6.0.2) - Requirement already satisfied: lightning-utilities>=0.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (0.11.8) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1.4) - Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (2.4.3) - Requirement already satisfied: aiosignal>=1.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.1) - Requirement already satisfied: attrs>=17.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (24.2.0) - Requirement already satisfied: frozenlist>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.4.1) - Requirement already satisfied: multidict<7.0,>=4.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (6.1.0) - Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.15.2) - Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (4.0.3) - Requirement already satisfied: setuptools in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from lightning-utilities>=0.8.0->torchmetrics>=0.11.0) (75.2.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1.0) (2.1.5) - Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2.9.0.post0) - Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) - Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1.0) (1.3.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0) - Requirement already satisfied: propcache>=0.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0) + Requirement already satisfied: datasets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (3.1.0) + Requirement already satisfied: torchmetrics>=0.11.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.5.1) + Requirement already satisfied: torch>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.16.1) + Requirement already satisfied: numpy>=1.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (1.24.4) + Requirement already satisfied: pyarrow>=15.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (17.0.0) + Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.3.8) + Requirement already satisfied: pandas in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.0.3) + Requirement already satisfied: requests>=2.32.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.32.3) + Requirement already satisfied: tqdm>=4.66.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (4.66.6) + Requirement already satisfied: xxhash in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.5.0) + Requirement already satisfied: multiprocess<0.70.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.70.16) + Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0) + Requirement already satisfied: aiohttp in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.10.10) + Requirement already satisfied: huggingface-hub>=0.23.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.26.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (24.1) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (6.0.2) + Requirement already satisfied: lightning-utilities>=0.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (0.11.8) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1.4) + Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (2.4.3) + Requirement already satisfied: aiosignal>=1.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.1) + Requirement already satisfied: attrs>=17.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (24.2.0) + Requirement already satisfied: frozenlist>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.5.0) + Requirement already satisfied: multidict<7.0,>=4.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (6.1.0) + Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.15.2) + Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (4.0.3) + Requirement already satisfied: setuptools in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from lightning-utilities>=0.8.0->torchmetrics>=0.11.0) (75.3.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1.0) (2.1.5) + Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2.9.0.post0) + Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) + Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1.0) (1.3.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0) + Requirement already satisfied: propcache>=0.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -116,6 +116,15 @@ Imports from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor + +.. parsed-literal:: + + 2024-11-05 04:52:12.108210: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 04:52:12.142263: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 04:52:12.803683: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + Settings -------- @@ -163,9 +172,16 @@ IR). .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:871: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:872: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): @@ -272,14 +288,6 @@ steps: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -.. parsed-literal:: - - 2024-10-23 04:48:23.521146: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 04:48:23.553631: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 04:48:24.182318: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -500,7 +508,7 @@ quantized model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:62: FutureWarning: Importing `WordErrorRate` from `torchmetrics` was deprecated and will be removed in 2.0. Import `WordErrorRate` from `torchmetrics.text` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:62: FutureWarning: Importing `WordErrorRate` from `torchmetrics` was deprecated and will be removed in 2.0. Import `WordErrorRate` from `torchmetrics.text` instead. _future_warning( @@ -570,23 +578,23 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 17.90 ms + [ INFO ] Read model took 18.07 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [?,?] + [ INFO ] 45 , input_values (node: input_values) : f32 / [...] / [?,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [?,1..,32] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: '45': [1,30480] - [ INFO ] Reshape model took 4.29 ms + [ INFO ] Reshape model took 4.26 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [1,30480] + [ INFO ] 45 , input_values (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: [ INFO ] logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 493.63 ms + [ INFO ] Compile model took 495.05 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -623,17 +631,17 @@ models. [ INFO ] Fill input '45' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 69.32 ms + [ INFO ] First inference took 69.60 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5400 iterations - [ INFO ] Duration: 120141.91 ms + [ INFO ] Count: 5406 iterations + [ INFO ] Duration: 120228.72 ms [ INFO ] Latency: - [ INFO ] Median: 131.56 ms - [ INFO ] Average: 133.32 ms - [ INFO ] Min: 88.10 ms - [ INFO ] Max: 314.58 ms - [ INFO ] Throughput: 44.95 FPS + [ INFO ] Median: 131.74 ms + [ INFO ] Average: 133.26 ms + [ INFO ] Min: 107.29 ms + [ INFO ] Max: 340.52 ms + [ INFO ] Throughput: 44.96 FPS .. code:: ipython3 @@ -660,7 +668,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 24.51 ms + [ INFO ] Read model took 23.99 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] 45 , input_values (node: input_values) : f32 / [...] / [?,?] @@ -669,14 +677,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: '45': [1,30480] - [ INFO ] Reshape model took 5.99 ms + [ INFO ] Reshape model took 6.04 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] 45 , input_values (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: [ INFO ] logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1234.19 ms + [ INFO ] Compile model took 1203.24 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -713,15 +721,15 @@ models. [ INFO ] Fill input '45' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 56.91 ms + [ INFO ] First inference took 53.37 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 7974 iterations - [ INFO ] Duration: 120125.59 ms + [ INFO ] Count: 8004 iterations + [ INFO ] Duration: 120086.07 ms [ INFO ] Latency: - [ INFO ] Median: 88.71 ms - [ INFO ] Average: 90.24 ms - [ INFO ] Min: 68.79 ms - [ INFO ] Max: 212.19 ms - [ INFO ] Throughput: 66.38 FPS + [ INFO ] Median: 88.68 ms + [ INFO ] Average: 89.86 ms + [ INFO ] Min: 68.86 ms + [ INFO ] Max: 235.63 ms + [ INFO ] Throughput: 66.65 FPS diff --git a/docs/notebooks/speechbrain-emotion-recognition-with-output.rst b/docs/notebooks/speechbrain-emotion-recognition-with-output.rst index 55e3b31765b191..42783b7d598b01 100644 --- a/docs/notebooks/speechbrain-emotion-recognition-with-output.rst +++ b/docs/notebooks/speechbrain-emotion-recognition-with-output.rst @@ -53,25 +53,6 @@ Installations %pip install -q "transformers>=4.30.0" "huggingface_hub>=0.8.0" "SoundFile" %pip install -q "openvino>=2024.1.0" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - altair 5.4.1 requires typing-extensions>=4.10.0; python_version < "3.13", but you have typing-extensions 4.9.0 which is incompatible. - descript-audiotools 0.7.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible. - detectron2 0.6 requires iopath<0.1.10,>=0.1.7, but you have iopath 0.1.10 which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.19.1+cpu which is incompatible. - modelscope-studio 0.5.0 requires gradio<5.0,>=4.0, but you have gradio 3.43.1 which is incompatible. - openvino-dev 2024.4.0 requires openvino==2024.4.0, but you have openvino 2024.5.0.dev20241014 which is incompatible. - parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. - tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - typeguard 4.3.0 requires typing-extensions>=4.10.0, but you have typing-extensions 4.9.0 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - Imports ~~~~~~~ @@ -85,6 +66,12 @@ Imports import openvino as ov + +.. parsed-literal:: + + torchvision is not available - cannot save figures + + Prepare base model ~~~~~~~~~~~~~~~~~~ @@ -116,42 +103,6 @@ SpeechBrain codebase. source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) - - -.. parsed-literal:: - - config.json: 0%| | 0.00/1.84k [00:00=2.2" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "stable-audio-tools" "nncf>=2.12.0" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.3.0" + if platform.system() == "Darwin": + %pip install -q "numpy>=1.26,<2.0.0" "pandas>2.0.2" + else: + %pip install -q "numpy>=1.26" "pandas>2.0.2" + %pip install -q "openvino>=2024.4.0" Load the original model and inference ------------------------------------- @@ -108,15 +114,28 @@ Load the original model and inference model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0") + +.. parsed-literal:: + + model_config.json: 0%| | 0.00/4.17k [00:00=t0 but got ta=0.29999998211860657 and t0=0.3. + /home/ea/work/py311/lib/python3.11/site-packages/torchsde/_brownian/brownian_interval.py:599: UserWarning: Should have ta>=t0 but got ta=0.29999998211860657 and t0=0.3. warnings.warn(f"Should have ta>=t0 but got ta={ta} and t0={self._start}.") @@ -210,7 +235,7 @@ Load the original model and inference @@ -287,7 +312,7 @@ documentation =t0 but got ta=0.29999998211860657 and t0=0.3. + /home/ea/work/py311/lib/python3.11/site-packages/torchsde/_brownian/brownian_interval.py:599: UserWarning: Should have ta>=t0 but got ta=0.29999998211860657 and t0=0.3. warnings.warn(f"Should have ta>=t0 but got ta={ta} and t0={self._start}.") @@ -679,7 +685,7 @@ and run inference. @@ -736,8 +742,3 @@ Interactive inference # If you are launching remotely, specify server_name and server_port # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/stable-cascade-image-generation-with-output.rst b/docs/notebooks/stable-cascade-image-generation-with-output.rst index 969ed4e4dbe336..8bd358958e47b6 100644 --- a/docs/notebooks/stable-cascade-image-generation-with-output.rst +++ b/docs/notebooks/stable-cascade-image-generation-with-output.rst @@ -56,13 +56,8 @@ Prerequisites .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - descript-audiotools 0.7.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 5.28.3 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.19.1+cpu which is incompatible. - open-clip-torch 2.22.0 requires protobuf<4, but you have protobuf 5.28.3 which is incompatible. s3fs 2024.10.0 requires fsspec==2024.10.0.*, but you have fsspec 2024.9.0 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.3 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 5.28.3 which is incompatible. - wandb 0.15.4 requires protobuf!=4.21.0,<5,>=3.12.0; python_version < "3.9" and sys_platform == "linux", but you have protobuf 5.28.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -85,10 +80,10 @@ Load and run the original pipeline .. parsed-literal:: - 2024-10-23 04:56:18.757348: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 04:56:18.791430: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:00:13.157963: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:00:13.192172: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 04:56:19.346484: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:00:13.880563: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -257,11 +252,12 @@ here, we always use fixed shapes in conversion by using an .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -310,7 +306,7 @@ here, we always use fixed shapes in conversion by using an .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_stable_cascade.py:548: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_stable_cascade.py:548: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if skip is not None and (x.size(-1) != skip.size(-1) or x.size(-2) != skip.size(-2)): diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg index ff7c5a51fa8623..240c88c559da36 100644 --- a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ea7494f5ec5eee5a399e31abce1aafc5b2df1e446414a4c8ece593b465608e2 -size 81605 +oid sha256:8f0845e937cadada4b26550b58858292fd42eebd18367b454b1401143e263338 +size 85254 diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png index 81e228f915393f..349c8fa79af81f 100644 --- a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33518ccf125cee029c73380add209aae63208485ac340655ac3123d0e19c0426 -size 1629985 +oid sha256:d44cddd44f25458a49a6bf4a70431bbd39eecdc67d015c2c88aafa644f7c8ff9 +size 1644717 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst b/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst index 9f84e3178267ff..15c490942998d6 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst @@ -75,8 +75,6 @@ Prerequisites ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. botocore 1.35.36 requires urllib3<1.27,>=1.25.4; python_version < "3.10", but you have urllib3 2.2.3 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.3 which is incompatible. - wandb 0.15.4 requires protobuf!=4.21.0,<5,>=3.12.0; python_version < "3.9" and sys_platform == "linux", but you have protobuf 5.28.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -197,10 +195,10 @@ Additionally, LCM requires using LCMScheduler for efficient generation. .. parsed-literal:: - 2024-10-23 05:07:50.575524: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 05:07:50.610025: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:11:37.931642: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:11:37.965522: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 05:07:51.160950: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:11:38.628517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -211,7 +209,7 @@ Additionally, LCM requires using LCMScheduler for efficient generation. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead. warnings.warn( @@ -293,8 +291,11 @@ extractor as input and returns image embeddings. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): U-net @@ -355,17 +356,17 @@ Model predicts the ``sample`` state for the next step. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:1801: FutureWarning: You have passed a tensor as `image_embeds`.This is deprecated and will be removed in a future release. Please make sure to update your script to pass `image_embeds` as a list of tensors to suppress this warning. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:1801: FutureWarning: You have passed a tensor as `image_embeds`.This is deprecated and will be removed in a future release. Please make sure to update your script to pass `image_embeds` as a list of tensors to suppress this warning. deprecate("image_embeds not a list", "1.0.0", deprecation_message, standard_warn=False) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -443,16 +444,16 @@ image in pipeline, we can discuss it in inference examples. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %2506 : Float(1, 4, 64, 64, strides=[16384, 4096, 64, 1], requires_grad=0, device=cpu) = aten::randn(%2500, %2501, %2502, %2503, %2504, %2505) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/torch_utils.py:81:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %2506 : Float(1, 4, 64, 64, strides=[16384, 4096, 64, 1], requires_grad=0, device=cpu) = aten::randn(%2500, %2501, %2502, %2503, %2504, %2505) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/torch_utils.py:81:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 10395 / 16384 (63.4%) - Greatest absolute difference: 0.0018606185913085938 at index (0, 1, 63, 63) (up to 1e-05 allowed) - Greatest relative difference: 0.005050937208143127 at index (0, 3, 63, 59) (up to 1e-05 allowed) + Mismatched elements: 10398 / 16384 (63.5%) + Greatest absolute difference: 0.0014954805374145508 at index (0, 1, 0, 63) (up to 1e-05 allowed) + Greatest relative difference: 0.004146566421115057 at index (0, 3, 63, 59) (up to 1e-05 allowed) _check_trace( @@ -498,9 +499,9 @@ hidden states. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png index 9e41d1a02b25ee..b202db52895198 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d5fa26a0d01a01557d2baf61229065adafcf06e97d796f49cad83cfeef8c42e -size 964520 +oid sha256:f9454f039912f132f1c278b4d58fddc6b737c8a6d56ddbc5358142ad1e9f987c +size 980237 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png index 45f0dc6e8e9e10..299ec38e04f457 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b0f7041fd5c84ecde36f1036e957a5f64eb12507270cd300392042c8a703f6d -size 961384 +oid sha256:8e9b89ad55b5255e86b4a7e7d40984fa3e8ca7d64dc5cff8ca1367dcc0de5a72 +size 927436 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png index 8b9cea0390f80a..9f1e4c4d220755 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e434bcc6a4403efc3f9272a49882d42ea354dae2de6231c7d71458ea79fd6de9 -size 593289 +oid sha256:46120a84ef8334f29241e07bd43c8313977cd2318ce36a951f1f8ed3c626ba84 +size 598424 diff --git a/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst b/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst index e03a4ab614c769..151a5a5c30588d 100644 --- a/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst +++ b/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst @@ -117,8 +117,7 @@ torch.compile it goes through the following steps: 1. Graph acquisition - the model is rewritten as blocks of subgraphs that are either: - compiled by TorchDynamo and “flattened”, - - falling back to the eager-mode, due to unsupported Python constructs (like control-flow - code). + - falling back to the eager-mode, due to unsupported Python constructs (like control-flow code). 2. Graph lowering - all PyTorch operations are decomposed into their constituent kernels specific to the chosen backend. diff --git a/docs/notebooks/style-transfer-with-output.rst b/docs/notebooks/style-transfer-with-output.rst index dbbe6395574657..9b902aeb79f7a7 100644 --- a/docs/notebooks/style-transfer-with-output.rst +++ b/docs/notebooks/style-transfer-with-output.rst @@ -193,7 +193,7 @@ OpenVINO Intermediate Representation (IR) with ``FP16`` precision. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/style-transfer-webcam/model/mosaic-9.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/style-transfer-webcam/model/mosaic-9.onnx') diff --git a/docs/notebooks/style-transfer-with-output_files/style-transfer-with-output_25_0.png b/docs/notebooks/style-transfer-with-output_files/style-transfer-with-output_25_0.png index d9cfe3e6360a7a..69ebcfdd60f395 100644 --- a/docs/notebooks/style-transfer-with-output_files/style-transfer-with-output_25_0.png +++ b/docs/notebooks/style-transfer-with-output_files/style-transfer-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0565650551a7561fb5450ec915c747ffad759c8cba258eec1d1cd2d30f85c812 -size 81718 +oid sha256:0aa8c4fc4dd201212c4b63e6436c2a5e9cd6f7994271244bfabe91a5cef7e675 +size 81862 diff --git a/docs/notebooks/table-question-answering-with-output.rst b/docs/notebooks/table-question-answering-with-output.rst index 88edfa22333b86..e1cad24a567f4b 100644 --- a/docs/notebooks/table-question-answering-with-output.rst +++ b/docs/notebooks/table-question-answering-with-output.rst @@ -80,10 +80,10 @@ Prerequisites .. parsed-literal:: - 2024-10-23 05:11:15.997414: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 05:11:16.031769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:15:04.432298: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:15:04.467903: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 05:11:16.587915: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:15:05.158075: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Use ``TapasForQuestionAnswering.from_pretrained`` to download a @@ -219,57 +219,58 @@ function to serialize the result of conversion. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1571: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1571: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. self.indices = torch.as_tensor(indices) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1572: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1572: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. self.num_segments = torch.as_tensor(num_segments, device=indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1674: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1674: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. batch_size = torch.prod(torch.tensor(list(index.batch_shape()))) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1750: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1750: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. [torch.as_tensor([-1], dtype=torch.long), torch.as_tensor(vector_shape, dtype=torch.long)], dim=0 - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1753: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1753: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! flat_values = values.reshape(flattened_shape.tolist()) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1755: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1755: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = torch.zeros(int(flat_index.num_segments), dtype=torch.float, device=flat_values.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1763: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1763: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. torch.as_tensor(index.batch_shape(), dtype=torch.long), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1764: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1764: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. torch.as_tensor([index.num_segments], dtype=torch.long), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1765: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1765: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. torch.as_tensor(vector_shape, dtype=torch.long), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1770: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1770: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! output_values = segment_means.clone().view(new_shape.tolist()).to(values.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1701: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1701: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. batch_shape = torch.as_tensor( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1705: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1705: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. num_segments = torch.as_tensor(num_segments) # create a rank 0 tensor (scalar) containing num_segments (e.g. 64) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1716: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1716: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! new_shape = [int(x) for x in new_tensor.tolist()] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1719: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1719: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. multiples = torch.cat([batch_shape, torch.as_tensor([1])], dim=0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1720: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1720: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! indices = indices.repeat(multiples.tolist()) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:282: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:282: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. torch.as_tensor(self.config.max_position_embeddings - 1, device=device), position - first_position - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1231: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1231: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. indices=torch.min(row_ids, torch.as_tensor(self.config.max_num_rows - 1, device=row_ids.device)), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1236: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1236: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. indices=torch.min(column_ids, torch.as_tensor(self.config.max_num_columns - 1, device=column_ids.device)), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1928: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1928: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * torch.as_tensor( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1933: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1933: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * torch.as_tensor( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1969: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1969: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. labels_per_column, _ = reduce_sum(torch.as_tensor(labels, dtype=torch.float32, device=labels.device), col_index) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1992: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1992: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. torch.as_tensor(labels, dtype=torch.long, device=labels.device), cell_index - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1999: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:1999: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. column_mask = torch.as_tensor( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:2024: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:2024: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. selected_column_id = torch.as_tensor( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:2029: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/tapas/modeling_tapas.py:2029: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. selected_column_mask = torch.as_tensor( diff --git a/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst b/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst index b2efda5f0aa31a..45eb4a96573be1 100644 --- a/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst +++ b/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst @@ -338,5 +338,5 @@ performance. .. parsed-literal:: - IR model in OpenVINO Runtime/CPU: 0.0010 seconds per image, FPS: 995.15 + IR model in OpenVINO Runtime/CPU: 0.0010 seconds per image, FPS: 959.64 diff --git a/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output.rst b/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output.rst index 0c0357fa707f9c..2410d1657c9b1c 100644 --- a/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output.rst +++ b/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output.rst @@ -391,7 +391,7 @@ Read the image, resize and convert it to the input shape of the network: .. parsed-literal:: - + diff --git a/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output_files/tensorflow-instance-segmentation-to-openvino-with-output_39_0.png b/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output_files/tensorflow-instance-segmentation-to-openvino-with-output_39_0.png index d91e735a1d6467..4940e220329fbf 100644 --- a/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output_files/tensorflow-instance-segmentation-to-openvino-with-output_39_0.png +++ b/docs/notebooks/tensorflow-instance-segmentation-to-openvino-with-output_files/tensorflow-instance-segmentation-to-openvino-with-output_39_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8492fcb14f954229b35321f809f600bc9c9fb445cf44cf1cab1dbae5473e2d78 -size 391367 +oid sha256:69ea6c54eefea173c120ff5666caa829b5a008727ebd3a83f7e10d1986de07bd +size 394182 diff --git a/docs/notebooks/tensorflow-object-detection-to-openvino-with-output.rst b/docs/notebooks/tensorflow-object-detection-to-openvino-with-output.rst index 970ff5dd5278ae..11dc1675dcfeb4 100644 --- a/docs/notebooks/tensorflow-object-detection-to-openvino-with-output.rst +++ b/docs/notebooks/tensorflow-object-detection-to-openvino-with-output.rst @@ -189,7 +189,7 @@ from TensorFlow Hub: .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/od-model/tf/faster_rcnn_resnet50_v1_640x640.tar.gz') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/od-model/tf/faster_rcnn_resnet50_v1_640x640.tar.gz') @@ -368,7 +368,7 @@ Load and save an image: .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/data/coco_bike.jpg') @@ -396,7 +396,7 @@ Read the image, resize and convert it to the input shape of the network: .. parsed-literal:: - + @@ -697,7 +697,7 @@ Zoo `__: .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/data/coco_91cl.txt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-object-detection-to-openvino/data/coco_91cl.txt') diff --git a/docs/notebooks/tensorflow-object-detection-to-openvino-with-output_files/tensorflow-object-detection-to-openvino-with-output_38_0.png b/docs/notebooks/tensorflow-object-detection-to-openvino-with-output_files/tensorflow-object-detection-to-openvino-with-output_38_0.png index e3868b416ee4e5..41c96a6eeb7d0e 100644 --- a/docs/notebooks/tensorflow-object-detection-to-openvino-with-output_files/tensorflow-object-detection-to-openvino-with-output_38_0.png +++ b/docs/notebooks/tensorflow-object-detection-to-openvino-with-output_files/tensorflow-object-detection-to-openvino-with-output_38_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfdccf9cf18ab90165dbca452027d6afdd51d2f355b8cdc45d7431f9e88c5782 -size 392207 +oid sha256:63da618f6999e82c3f9e461cb912450186fbb00bc247ccd311a0dfd0bbc825b3 +size 390893 diff --git a/docs/notebooks/tensorflow-quantization-aware-training-with-output.rst b/docs/notebooks/tensorflow-quantization-aware-training-with-output.rst index 8fe9423b3dcc53..4a4285ca97cc9a 100644 --- a/docs/notebooks/tensorflow-quantization-aware-training-with-output.rst +++ b/docs/notebooks/tensorflow-quantization-aware-training-with-output.rst @@ -127,10 +127,10 @@ models will be stored. .. parsed-literal:: - 2024-10-23 05:14:10.399928: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 05:14:10.434579: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:17:59.724629: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:17:59.759583: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 05:14:11.041876: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:18:00.367471: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -140,7 +140,7 @@ models will be stored. Downloading data from https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/305_resnet18_imagenette_fp32_v1.h5 134604992/134604992 [==============================] - 2s 0us/step Absolute path where the model weights are saved: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-quantization-aware-training/model/ResNet-18_fp32.h5 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/tensorflow-quantization-aware-training/model/ResNet-18_fp32.h5 Dataset Preprocessing @@ -175,13 +175,13 @@ Download and prepare Imagenette 160px dataset. .. parsed-literal:: - 2024-10-23 05:14:16.300917: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-11-05 05:18:05.378970: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... - 2024-10-23 05:14:16.433616: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1] - [[{{node Placeholder/_2}}]] - 2024-10-23 05:14:16.434383: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1] - [[{{node Placeholder/_0}}]] - 2024-10-23 05:14:16.493317: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2024-11-05 05:18:05.496251: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1] + [[{{node Placeholder/_3}}]] + 2024-11-05 05:18:05.496608: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1] + [[{{node Placeholder/_1}}]] + 2024-11-05 05:18:05.549189: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. @@ -325,15 +325,15 @@ model and a training pipeline. .. parsed-literal:: - 2024-10-23 05:14:17.636634: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1] - [[{{node Placeholder/_2}}]] - 2024-10-23 05:14:17.637423: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1] - [[{{node Placeholder/_3}}]] + 2024-11-05 05:18:06.706690: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int64 and shape [1] + [[{{node Placeholder/_4}}]] + 2024-11-05 05:18:06.707393: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1] + [[{{node Placeholder/_0}}]] .. parsed-literal:: - 4/4 [==============================] - 1s 250ms/sample - loss: 0.9807 - acc@1: 0.8220 + 4/4 [==============================] - 1s 241ms/sample - loss: 0.9807 - acc@1: 0.8220 Accuracy of FP32 model: 0.822 @@ -379,13 +379,13 @@ scenario and requires only 3 modifications. .. parsed-literal:: - 2024-10-23 05:14:20.291889: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1] + 2024-11-05 05:18:09.325779: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1] [[{{node Placeholder/_2}}]] - 2024-10-23 05:14:20.292274: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1] - [[{{node Placeholder/_1}}]] - 2024-10-23 05:14:21.242391: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. - 2024-10-23 05:14:21.895899: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. - 2024-10-23 05:14:30.320766: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2024-11-05 05:18:09.326170: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1] + [[{{node Placeholder/_0}}]] + 2024-11-05 05:18:10.310403: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2024-11-05 05:18:10.931815: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2024-11-05 05:18:19.063128: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. Evaluate the new model on the validation set after initialization of @@ -411,7 +411,7 @@ demonstrated here. .. parsed-literal:: - 4/4 [==============================] - 1s 306ms/sample - loss: 0.9766 - acc@1: 0.8120 + 4/4 [==============================] - 1s 305ms/sample - loss: 0.9766 - acc@1: 0.8120 Fine-tune the Compressed Model @@ -446,10 +446,10 @@ training pipeline are required. Here is a simple example. Accuracy of INT8 model after initialization: 0.812 Epoch 1/2 - 101/101 [==============================] - 49s 418ms/step - loss: 0.7134 - acc@1: 0.9299 + 101/101 [==============================] - 49s 416ms/step - loss: 0.7134 - acc@1: 0.9299 Epoch 2/2 - 101/101 [==============================] - 42s 415ms/step - loss: 0.6807 - acc@1: 0.9489 - 4/4 [==============================] - 1s 144ms/sample - loss: 0.9760 - acc@1: 0.8160 + 101/101 [==============================] - 42s 414ms/step - loss: 0.6807 - acc@1: 0.9489 + 4/4 [==============================] - 1s 141ms/sample - loss: 0.9760 - acc@1: 0.8160 Accuracy of INT8 model after fine-tuning: 0.816 @@ -559,10 +559,10 @@ Please select a benchmarking device using the dropdown list: .. parsed-literal:: Benchmark FP32 model (IR) - [ INFO ] Throughput: 2786.08 FPS + [ INFO ] Throughput: 2758.24 FPS Benchmark INT8 model (IR) - [ INFO ] Throughput: 10924.61 FPS + [ INFO ] Throughput: 11317.45 FPS Show Device Information for reference. diff --git a/docs/notebooks/tflite-selfie-segmentation-with-output.rst b/docs/notebooks/tflite-selfie-segmentation-with-output.rst index 9acc434aa435f6..cc8a203e4f15a9 100644 --- a/docs/notebooks/tflite-selfie-segmentation-with-output.rst +++ b/docs/notebooks/tflite-selfie-segmentation-with-output.rst @@ -143,7 +143,7 @@ Download pretrained model and test image .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/tflite-selfie-segmentation/selfie_multiclass_256x256.tflite') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/tflite-selfie-segmentation/selfie_multiclass_256x256.tflite') diff --git a/docs/notebooks/tflite-selfie-segmentation-with-output_files/tflite-selfie-segmentation-with-output_33_0.png b/docs/notebooks/tflite-selfie-segmentation-with-output_files/tflite-selfie-segmentation-with-output_33_0.png index d0b2b849bf51c2..882a9d712dd7b0 100644 --- a/docs/notebooks/tflite-selfie-segmentation-with-output_files/tflite-selfie-segmentation-with-output_33_0.png +++ b/docs/notebooks/tflite-selfie-segmentation-with-output_files/tflite-selfie-segmentation-with-output_33_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2963c9b4c7dacdca6b1bbe9ea3ef832a19ba962c0a9980363722083e66b7358 -size 14233 +oid sha256:2cda2d6f5f72e6e7c24898ead882b6b26b696d1d1c61ff589d08fd55d98357ef +size 14403 diff --git a/docs/notebooks/tflite-to-openvino-with-output.rst b/docs/notebooks/tflite-to-openvino-with-output.rst index dcd7a98897f334..a70e10853c0dc7 100644 --- a/docs/notebooks/tflite-to-openvino-with-output.rst +++ b/docs/notebooks/tflite-to-openvino-with-output.rst @@ -274,7 +274,7 @@ GPU. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 9.48 ms + [ INFO ] Read model took 10.03 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [1,224,224,3] @@ -288,7 +288,7 @@ GPU. [ INFO ] Model outputs: [ INFO ] Softmax (node: 61) : f32 / [...] / [1,1000] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 157.24 ms + [ INFO ] Compile model took 165.61 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: TensorFlow_Lite_Frontend_IR @@ -325,15 +325,15 @@ GPU. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 7.14 ms + [ INFO ] First inference took 6.93 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 17436 iterations - [ INFO ] Duration: 15008.41 ms + [ INFO ] Count: 14982 iterations + [ INFO ] Duration: 15006.90 ms [ INFO ] Latency: - [ INFO ] Median: 4.99 ms - [ INFO ] Average: 5.03 ms - [ INFO ] Min: 2.84 ms - [ INFO ] Max: 24.60 ms - [ INFO ] Throughput: 1161.75 FPS + [ INFO ] Median: 5.47 ms + [ INFO ] Average: 5.91 ms + [ INFO ] Min: 3.02 ms + [ INFO ] Max: 34.80 ms + [ INFO ] Throughput: 998.34 FPS diff --git a/docs/notebooks/typo-detector-with-output.rst b/docs/notebooks/typo-detector-with-output.rst index 5c03dc870f8a69..94431f153ae72d 100644 --- a/docs/notebooks/typo-detector-with-output.rst +++ b/docs/notebooks/typo-detector-with-output.rst @@ -108,10 +108,10 @@ Imports .. parsed-literal:: - 2024-10-23 05:18:31.129817: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 05:18:31.163652: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:23:42.228206: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:23:42.262729: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 05:18:31.823748: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:23:42.928779: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Methods @@ -255,8 +255,7 @@ your model. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - op1 = operator(\*args, \*\*kwargs) + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Load the tokenizer @@ -372,7 +371,7 @@ Let’s run a demo using the Hugging Face Optimum API. [Input]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. [Detected]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. ---------------------------------------------------------------------------------------------------------------------------------- - Time elapsed: 0.14796948432922363 + Time elapsed: 0.14881014823913574 2. Converting the model to OpenVINO IR @@ -618,5 +617,5 @@ Let’s run a demo using the converted OpenVINO IR model. [Input]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. [Detected]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. ---------------------------------------------------------------------------------------------------------------------------------- - Time elapsed: 0.10040116310119629 + Time elapsed: 0.10274600982666016 diff --git a/docs/notebooks/vision-background-removal-with-output.rst b/docs/notebooks/vision-background-removal-with-output.rst index 63e02d9c787aaf..4e79038a213347 100644 --- a/docs/notebooks/vision-background-removal-with-output.rst +++ b/docs/notebooks/vision-background-removal-with-output.rst @@ -195,7 +195,7 @@ next cell loads the model and the pre-trained weights. Downloading... From: https://drive.google.com/uc?id=1W8E4FHIlTVstfRkYmNOjbr0VDXTZm0jD To: <_io.BufferedWriter name='model/u2net_lite/u2net_lite.pth'> - 100%|██████████| 4.68M/4.68M [00:00<00:00, 34.0MB/s] + 100%|██████████| 4.68M/4.68M [00:00<00:00, 33.7MB/s] .. parsed-literal:: @@ -225,7 +225,7 @@ next cell loads the model and the pre-trained weights. .. parsed-literal:: - /tmp/ipykernel_2664614/1036642300.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_586189/1036642300.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. net.load_state_dict(state_dict=torch.load(model_path, map_location="cpu")) @@ -252,7 +252,7 @@ OpenVINO IR format. Executing the following command may take a while. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/vision-background-removal/model/u2net.py:23: UserWarning: `nn.functional.upsample` is deprecated. Use `nn.functional.interpolate` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/vision-background-removal/model/u2net.py:23: UserWarning: `nn.functional.upsample` is deprecated. Use `nn.functional.interpolate` instead. src = F.upsample(src,size=tar.shape[2:],mode='bilinear') @@ -341,7 +341,7 @@ Load the OpenVINO IR model to OpenVINO Runtime and do inference. .. parsed-literal:: - Inference finished. Inference time: 0.107 seconds, FPS: 9.31. + Inference finished. Inference time: 0.106 seconds, FPS: 9.42. Visualize Results diff --git a/docs/notebooks/vision-monodepth-with-output.rst b/docs/notebooks/vision-monodepth-with-output.rst index ea81314fe49df7..dfb20643e991e7 100644 --- a/docs/notebooks/vision-monodepth-with-output.rst +++ b/docs/notebooks/vision-monodepth-with-output.rst @@ -309,7 +309,7 @@ original image shape. .. parsed-literal:: - /tmp/ipykernel_2664839/2076527990.py:15: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. + /tmp/ipykernel_586416/2076527990.py:15: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = matplotlib.cm.get_cmap(colormap) @@ -508,7 +508,7 @@ Do Inference on a Video and Create Monodepth Video .. parsed-literal:: - Processed 60 frames in 24.30 seconds. Total FPS (including video processing): 2.47.Inference FPS: 46.34 + Processed 60 frames in 25.55 seconds. Total FPS (including video processing): 2.35.Inference FPS: 48.61 Monodepth Video saved to 'output/Coco%20Walking%20in%20Berkeley_monodepth.mp4'. @@ -535,7 +535,7 @@ Display Monodepth Video .. parsed-literal:: Showing monodepth video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/vision-monodepth/output/Coco%20Walking%20in%20Berkeley_monodepth.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/vision-monodepth/output/Coco%20Walking%20in%20Berkeley_monodepth.mp4 If you cannot see the video in your browser, please click on the following link to download the video diff --git a/docs/notebooks/wav2lip-with-output.rst b/docs/notebooks/wav2lip-with-output.rst index 3f6b510cf00374..c70015d2614d8d 100644 --- a/docs/notebooks/wav2lip-with-output.rst +++ b/docs/notebooks/wav2lip-with-output.rst @@ -140,7 +140,7 @@ Download example files. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/data_video_sun_5s.mp4') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/data_video_sun_5s.mp4') @@ -177,7 +177,7 @@ and will convert both model in OpenVINO format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/ov_wav2lip_helper.py:43: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/ov_wav2lip_helper.py:43: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model_weights = torch.load(path_to_detector) @@ -200,7 +200,7 @@ and will convert both model in OpenVINO format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/ov_wav2lip_helper.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/ov_wav2lip_helper.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) @@ -263,7 +263,7 @@ python API and converted OpenVINO models. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/Wav2Lip/audio.py:100: FutureWarning: Pass sr=16000, n_fft=800 as keyword args. From version 0.10 passing these as positional arguments will result in an error + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/wav2lip/Wav2Lip/audio.py:100: FutureWarning: Pass sr=16000, n_fft=800 as keyword args. From version 0.10 passing these as positional arguments will result in an error return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, @@ -286,14 +286,14 @@ python API and converted OpenVINO models. 0%| | 0/8 [00:00 #0:0 (mpeg4 (native) -> h264 (libx264)) Stream #0:0 -> #0:1 (pcm_s16le (native) -> aac (native)) Press [q] to stop, [?] for help - [libx264 @ 0x56537dc66840] -qscale is ignored, -crf is recommended. - [libx264 @ 0x56537dc66840] using SAR=1/1 - [libx264 @ 0x56537dc66840] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512 - [libx264 @ 0x56537dc66840] profile High, level 3.1 - [libx264 @ 0x56537dc66840] 264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00 + [libx264 @ 0x55edc3fbd840] -qscale is ignored, -crf is recommended. + [libx264 @ 0x55edc3fbd840] using SAR=1/1 + [libx264 @ 0x55edc3fbd840] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512 + [libx264 @ 0x55edc3fbd840] profile High, level 3.1 + [libx264 @ 0x55edc3fbd840] 264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00 Output #0, mp4, to 'results/result_voice.mp4': Metadata: encoder : Lavf58.29.100 @@ -349,25 +349,25 @@ python API and converted OpenVINO models. encoder : Lavc58.54.100 aac frame= 123 fps=0.0 q=-1.0 Lsize= 621kB time=00:00:05.06 bitrate=1005.8kbits/s speed=10.8x video:573kB audio:43kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.827166% - [libx264 @ 0x56537dc66840] frame I:1 Avg QP:22.24 size: 31028 - [libx264 @ 0x56537dc66840] frame P:75 Avg QP:22.01 size: 6954 - [libx264 @ 0x56537dc66840] frame B:47 Avg QP:25.58 size: 718 - [libx264 @ 0x56537dc66840] consecutive B-frames: 38.2% 27.6% 14.6% 19.5% - [libx264 @ 0x56537dc66840] mb I I16..4: 14.0% 83.9% 2.1% - [libx264 @ 0x56537dc66840] mb P I16..4: 1.3% 3.3% 0.1% P16..4: 37.8% 8.2% 6.4% 0.0% 0.0% skip:43.0% - [libx264 @ 0x56537dc66840] mb B I16..4: 0.2% 0.7% 0.0% B16..8: 27.9% 0.4% 0.1% direct: 0.2% skip:70.6% L0:43.9% L1:54.2% BI: 1.9% - [libx264 @ 0x56537dc66840] 8x8 transform intra:73.3% inter:77.1% - [libx264 @ 0x56537dc66840] coded y,uvDC,uvAC intra: 56.9% 72.4% 8.1% inter: 11.4% 13.0% 0.2% - [libx264 @ 0x56537dc66840] i16 v,h,dc,p: 20% 23% 9% 48% - [libx264 @ 0x56537dc66840] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 25% 23% 36% 3% 3% 2% 2% 3% 3% - [libx264 @ 0x56537dc66840] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 39% 14% 14% 4% 6% 7% 4% 9% 3% - [libx264 @ 0x56537dc66840] i8c dc,h,v,p: 42% 25% 29% 4% - [libx264 @ 0x56537dc66840] Weighted P-Frames: Y:0.0% UV:0.0% - [libx264 @ 0x56537dc66840] ref P L0: 74.2% 10.4% 11.1% 4.3% - [libx264 @ 0x56537dc66840] ref B L0: 86.1% 11.2% 2.8% - [libx264 @ 0x56537dc66840] ref B L1: 98.3% 1.7% - [libx264 @ 0x56537dc66840] kb/s:953.36 - [aac @ 0x56537dc68140] Qavg: 121.673 + [libx264 @ 0x55edc3fbd840] frame I:1 Avg QP:22.24 size: 31028 + [libx264 @ 0x55edc3fbd840] frame P:75 Avg QP:22.01 size: 6954 + [libx264 @ 0x55edc3fbd840] frame B:47 Avg QP:25.58 size: 718 + [libx264 @ 0x55edc3fbd840] consecutive B-frames: 38.2% 27.6% 14.6% 19.5% + [libx264 @ 0x55edc3fbd840] mb I I16..4: 14.0% 83.9% 2.1% + [libx264 @ 0x55edc3fbd840] mb P I16..4: 1.3% 3.3% 0.1% P16..4: 37.8% 8.2% 6.4% 0.0% 0.0% skip:43.0% + [libx264 @ 0x55edc3fbd840] mb B I16..4: 0.2% 0.7% 0.0% B16..8: 27.9% 0.4% 0.1% direct: 0.2% skip:70.6% L0:43.9% L1:54.2% BI: 1.9% + [libx264 @ 0x55edc3fbd840] 8x8 transform intra:73.3% inter:77.1% + [libx264 @ 0x55edc3fbd840] coded y,uvDC,uvAC intra: 56.9% 72.4% 8.1% inter: 11.4% 13.0% 0.2% + [libx264 @ 0x55edc3fbd840] i16 v,h,dc,p: 20% 23% 9% 48% + [libx264 @ 0x55edc3fbd840] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 25% 23% 36% 3% 3% 2% 2% 3% 3% + [libx264 @ 0x55edc3fbd840] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 39% 14% 14% 4% 6% 7% 4% 9% 3% + [libx264 @ 0x55edc3fbd840] i8c dc,h,v,p: 42% 25% 29% 4% + [libx264 @ 0x55edc3fbd840] Weighted P-Frames: Y:0.0% UV:0.0% + [libx264 @ 0x55edc3fbd840] ref P L0: 74.2% 10.4% 11.1% 4.3% + [libx264 @ 0x55edc3fbd840] ref B L0: 86.1% 11.2% 2.8% + [libx264 @ 0x55edc3fbd840] ref B L1: 98.3% 1.7% + [libx264 @ 0x55edc3fbd840] kb/s:953.36 + [aac @ 0x55edc3fbf140] Qavg: 121.673 diff --git a/docs/notebooks/whisper-asr-genai-with-output.rst b/docs/notebooks/whisper-asr-genai-with-output.rst index 45cc8291b8acc2..3f4339e01b3acc 100644 --- a/docs/notebooks/whisper-asr-genai-with-output.rst +++ b/docs/notebooks/whisper-asr-genai-with-output.rst @@ -75,8 +75,8 @@ Prerequisites .. code:: ipython3 - %pip install -q "transformers>=4.35" "torch>=2.3" "torchvision>=0.18.1" "onnx>=1.16.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -q "torch>=2.3" "torchvision>=0.18.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q --pre -U "openvino" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly %pip install -q datasets "gradio>=4.0" "soundfile>=0.12" "librosa" "python-ffmpeg<=1.0.16" %pip install -q "nncf>=2.13.0" "jiwer" @@ -91,6 +91,12 @@ Prerequisites url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) + + if not Path("cmd_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) Load PyTorch model ------------------ @@ -266,7 +272,7 @@ Let’s check how to work the ``transcribe`` task. .. parsed-literal:: - /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py:496: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead. + /home/labuser/work/notebook/whisper_new/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py:496: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead. warnings.warn( @@ -349,8 +355,8 @@ be found in the `paper `__. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. -Download and convert model to OpenVINO IR via Optimum Intel CLI ---------------------------------------------------------------- +Convert model to OpenVINO IR via Optimum Intel CLI +-------------------------------------------------- @@ -381,20 +387,13 @@ documentation `__. import logging import nncf - import os - from IPython.display import display, Markdown + from cmd_helper import optimum_cli nncf.set_log_level(logging.ERROR) model_path = Path(model_id.value.split("/")[1]) - export_command = f"optimum-cli export openvino --model {model_id.value} --library transformers --task automatic-speech-recognition-with-past --framework pt {str(model_path)}" - - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - - exit_code = os.system(export_command) - if exit_code != 0: - raise Exception("Failed to load and convert model!") + optimum_cli(model_id.value, model_path) + print(f"✅ {model_id.value} model converted and can be found in {model_path}") Run inference OpenVINO model with WhisperPipeline ------------------------------------------------- @@ -604,9 +603,9 @@ Compare performance PyTorch vs OpenVINO .. parsed-literal:: - Mean torch openai/whisper-tiny generation time: 0.624s - Mean openvino openai/whisper-tiny generation time: 0.344s - Performance openai/whisper-tiny openvino speedup: 1.814 + Mean torch openai/whisper-tiny generation time: 0.564s + Mean openvino openai/whisper-tiny generation time: 0.311s + Performance openai/whisper-tiny openvino speedup: 1.815 Quantization @@ -934,7 +933,7 @@ for Word Error Rate. .. parsed-literal:: - Whole pipeline performance speedup: 1.499 + Whole pipeline performance speedup: 1.350 Whisper transcription word accuracy. Original model: 82.88%. Quantized model: 84.13%. Accuracy drop: -1.25%. @@ -960,7 +959,7 @@ upload button) or record using your microphone. pipe = ov_quantized_pipe if to_quantize.value else ov_pipe - gr_pipeline = GradioPipeline(pipe, multilingual=(not model_id.value.endswith(".en")), quantized=to_quantize.value) + gr_pipeline = GradioPipeline(pipe, model_id.value, quantized=to_quantize.value) demo = make_demo(gr_pipeline) diff --git a/docs/notebooks/whisper-subtitles-generation-with-output.rst b/docs/notebooks/whisper-subtitles-generation-with-output.rst index 8bfbfa8a6e86a3..b1e16ffa40c626 100644 --- a/docs/notebooks/whisper-subtitles-generation-with-output.rst +++ b/docs/notebooks/whisper-subtitles-generation-with-output.rst @@ -80,7 +80,7 @@ Install dependencies. %pip install -q "nncf>=2.13.0" %pip install -q --pre -U "openvino" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer %pip install -q "gradio>=4.19" @@ -94,6 +94,12 @@ Install dependencies. url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) + + if not Path("cmd_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) Instantiate model ----------------- @@ -184,10 +190,12 @@ documentation `__. .. code:: ipython3 + from cmd_helper import optimum_cli + model_dir = model_id.value.split("/")[-1] if not Path(model_dir).exists(): - !optimum-cli export openvino -m {model_id.value} {model_dir} --weight-format fp16 + optimum_cli(model_id.value, model_dir) Prepare inference pipeline -------------------------- @@ -218,13 +226,6 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import device_widget device = device_widget(default="CPU", exclude=["NPU"]) diff --git a/docs/notebooks/wuerstchen-image-generation-with-output.rst b/docs/notebooks/wuerstchen-image-generation-with-output.rst index 342ddbafdc7edb..237bfb8b4bd943 100644 --- a/docs/notebooks/wuerstchen-image-generation-with-output.rst +++ b/docs/notebooks/wuerstchen-image-generation-with-output.rst @@ -229,7 +229,8 @@ parameter to generate a less memory-demanding model. Text encoder model has 2 inputs: -- ``input_ids``: vector of tokenized input sentence. Default tokenizer vector length is 77. +- ``input_ids``: vector of tokenized + input sentence. Default tokenizer vector length is 77. - ``attention_mask``: vector of same length as ``input_ids`` describing the attention mask. @@ -285,6 +286,7 @@ Decoder pipeline Decoder pipeline consists of 3 parts: decoder, text encoder and VQGAN. Decoder model is the WuerstchenDiffNeXt UNet decoder. Inputs are: + - ``x``: sample - ``r``: timestep - ``effnet``: interpolation block diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output.rst b/docs/notebooks/yolov11-instance-segmentation-with-output.rst index 9d9b5e6bd15c0e..9cbca9fe4f1c37 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov11-instance-segmentation-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -216,14 +216,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.1MB/s] + 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.2MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.4ms - Speed: 1.8ms preprocess, 66.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 69.6ms + Speed: 2.0ms preprocess, 69.6ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640) @@ -260,7 +260,7 @@ preserve dynamic shapes in the model. OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-seg_openvino_model/' (6.0 MB) Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=segment model=yolo11n-seg_openvino_model imgsz=640 half Validate: yolo val task=segment model=yolo11n-seg_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -345,8 +345,8 @@ Test on single image .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.2ms - Speed: 3.6ms preprocess, 23.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.2ms + Speed: 1.8ms preprocess, 23.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -668,8 +668,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 17.7ms - Speed: 2.1ms preprocess, 17.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 11.2ms + Speed: 1.9ms preprocess, 11.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640) @@ -728,7 +728,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 20.05 ms + [ INFO ] Read model took 19.61 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -738,7 +738,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 8.82 ms + [ INFO ] Reshape model took 8.69 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -746,7 +746,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 387.38 ms + [ INFO ] Compile model took 390.55 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -783,17 +783,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 36.42 ms + [ INFO ] First inference took 36.99 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 1788 iterations - [ INFO ] Duration: 15050.22 ms + [ INFO ] Count: 1806 iterations + [ INFO ] Duration: 15074.73 ms [ INFO ] Latency: - [ INFO ] Median: 49.80 ms - [ INFO ] Average: 50.35 ms - [ INFO ] Min: 33.27 ms - [ INFO ] Max: 104.15 ms - [ INFO ] Throughput: 118.80 FPS + [ INFO ] Median: 49.77 ms + [ INFO ] Average: 49.90 ms + [ INFO ] Min: 24.69 ms + [ INFO ] Max: 66.18 ms + [ INFO ] Throughput: 119.80 FPS .. code:: ipython3 @@ -819,7 +819,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.13 ms + [ INFO ] Read model took 29.59 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -837,7 +837,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 594.13 ms + [ INFO ] Compile model took 592.55 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -874,17 +874,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 27.63 ms + [ INFO ] First inference took 24.82 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 3714 iterations - [ INFO ] Duration: 15026.92 ms + [ INFO ] Count: 3702 iterations + [ INFO ] Duration: 15040.97 ms [ INFO ] Latency: - [ INFO ] Median: 23.95 ms - [ INFO ] Average: 24.14 ms - [ INFO ] Min: 17.70 ms - [ INFO ] Max: 39.05 ms - [ INFO ] Throughput: 247.16 FPS + [ INFO ] Median: 24.15 ms + [ INFO ] Average: 24.25 ms + [ INFO ] Min: 20.10 ms + [ INFO ] Max: 43.05 ms + [ INFO ] Throughput: 246.13 FPS Other ways to optimize model diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png index 011e13859e8a3e..737d0af981d026 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e393b8edd8a20af1aaab6b69932bf0e715bf4f9c35814dd766dfa74ae27f2ae4 -size 493271 +oid sha256:f7d6739facd4204cf94a8bb5d471ea56a2ce3eab4dcd1e3807ba40c8e43baa44 +size 502582 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output.rst b/docs/notebooks/yolov11-keypoint-detection-with-output.rst index 97c549e6751dd0..cf94af9ab1e794 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output.rst +++ b/docs/notebooks/yolov11-keypoint-detection-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') @@ -214,14 +214,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.3MB/s] + 100%|██████████| 5.97M/5.97M [00:00<00:00, 21.1MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 59.6ms - Speed: 2.1ms preprocess, 59.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 62.6ms + Speed: 1.8ms preprocess, 62.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -258,7 +258,7 @@ preserve dynamic shapes in the model. OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) Export complete (2.1s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=pose model=yolo11n-pose_openvino_model imgsz=640 half Validate: yolo val task=pose model=yolo11n-pose_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco-pose.yaml half Visualize: https://netron.app @@ -338,8 +338,8 @@ ready to check model prediction. Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 19.9ms - Speed: 2.3ms preprocess, 19.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 20.1ms + Speed: 2.8ms preprocess, 20.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -686,8 +686,8 @@ on the image. Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 28.0ms - Speed: 2.0ms preprocess, 28.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 31.4ms + Speed: 2.1ms preprocess, 31.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -748,7 +748,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.55 ms + [ INFO ] Read model took 19.20 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -764,7 +764,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 329.35 ms + [ INFO ] Compile model took 338.50 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -801,17 +801,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 33.67 ms + [ INFO ] First inference took 33.80 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 17280 iterations - [ INFO ] Duration: 120049.75 ms + [ INFO ] Count: 17082 iterations + [ INFO ] Duration: 120040.01 ms [ INFO ] Latency: - [ INFO ] Median: 40.74 ms - [ INFO ] Average: 41.55 ms - [ INFO ] Min: 24.20 ms - [ INFO ] Max: 98.48 ms - [ INFO ] Throughput: 143.94 FPS + [ INFO ] Median: 40.53 ms + [ INFO ] Average: 42.03 ms + [ INFO ] Min: 30.12 ms + [ INFO ] Max: 171.39 ms + [ INFO ] Throughput: 142.30 FPS .. code:: ipython3 @@ -838,7 +838,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 28.39 ms + [ INFO ] Read model took 28.43 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -854,7 +854,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 556.59 ms + [ INFO ] Compile model took 573.24 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -891,17 +891,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 30.44 ms + [ INFO ] First inference took 25.90 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5160 iterations - [ INFO ] Duration: 15031.11 ms + [ INFO ] Count: 5124 iterations + [ INFO ] Duration: 15036.37 ms [ INFO ] Latency: - [ INFO ] Median: 34.62 ms - [ INFO ] Average: 34.76 ms - [ INFO ] Min: 25.46 ms - [ INFO ] Max: 51.89 ms - [ INFO ] Throughput: 343.29 FPS + [ INFO ] Median: 34.72 ms + [ INFO ] Average: 35.02 ms + [ INFO ] Min: 25.74 ms + [ INFO ] Max: 52.00 ms + [ INFO ] Throughput: 340.77 FPS Compare accuracy of the Original and Quantized Models diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png index f4e1de3947dc95..24dd29e6b7751f 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44dd0441baec6d73d3cc7552c26d2878c806514edacb3da332031733686c654b -size 507432 +oid sha256:498642bae277603810b461dcd81b4e67eb3cce1e0f1099f346b8d832dde4720f +size 504317 diff --git a/docs/notebooks/yolov11-object-detection-with-output.rst b/docs/notebooks/yolov11-object-detection-with-output.rst index d987f4148e7265..b446d6d3c1d41e 100644 --- a/docs/notebooks/yolov11-object-detection-with-output.rst +++ b/docs/notebooks/yolov11-object-detection-with-output.rst @@ -141,7 +141,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -207,14 +207,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.35M/5.35M [00:00<00:00, 23.2MB/s] + 100%|██████████| 5.35M/5.35M [00:00<00:00, 24.0MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 78.0ms - Speed: 2.3ms preprocess, 78.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 80.3ms + Speed: 2.2ms preprocess, 80.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) @@ -250,8 +250,8 @@ preserve dynamic shapes in the model. OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 1.8s, saved as 'yolo11n_openvino_model/' (5.4 MB) - Export complete (1.9s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (2.0s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=detect model=yolo11n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -330,8 +330,8 @@ ready to check model prediction for object detection. Loading yolo11n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 18.7ms - Speed: 2.0ms preprocess, 18.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 19.9ms + Speed: 1.8ms preprocess, 19.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -645,8 +645,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 19.7ms - Speed: 1.8ms preprocess, 19.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 18.1ms + Speed: 1.8ms preprocess, 18.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -708,7 +708,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.73 ms + [ INFO ] Read model took 18.52 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -724,7 +724,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 316.96 ms + [ INFO ] Compile model took 317.51 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -761,17 +761,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 30.27 ms + [ INFO ] First inference took 30.05 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 18480 iterations - [ INFO ] Duration: 120044.94 ms + [ INFO ] Count: 18396 iterations + [ INFO ] Duration: 120062.14 ms [ INFO ] Latency: - [ INFO ] Median: 38.09 ms - [ INFO ] Average: 38.84 ms - [ INFO ] Min: 20.45 ms - [ INFO ] Max: 97.13 ms - [ INFO ] Throughput: 153.94 FPS + [ INFO ] Median: 38.20 ms + [ INFO ] Average: 39.02 ms + [ INFO ] Min: 22.85 ms + [ INFO ] Max: 99.30 ms + [ INFO ] Throughput: 153.22 FPS .. code:: ipython3 @@ -798,7 +798,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.48 ms + [ INFO ] Read model took 26.57 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -814,7 +814,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 556.87 ms + [ INFO ] Compile model took 533.02 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -851,17 +851,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.34 ms + [ INFO ] First inference took 29.56 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5208 iterations - [ INFO ] Duration: 15031.88 ms + [ INFO ] Count: 5148 iterations + [ INFO ] Duration: 15046.93 ms [ INFO ] Latency: - [ INFO ] Median: 34.21 ms - [ INFO ] Average: 34.44 ms - [ INFO ] Min: 18.94 ms - [ INFO ] Max: 52.48 ms - [ INFO ] Throughput: 346.46 FPS + [ INFO ] Median: 34.51 ms + [ INFO ] Average: 34.87 ms + [ INFO ] Min: 24.28 ms + [ INFO ] Max: 69.01 ms + [ INFO ] Throughput: 342.13 FPS Next steps diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png index 02e6e38bb12a7b..d5c2464840ad65 100644 --- a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0904f4ceeae66d4c516656b23389b16d93bc601214a4c9b8d1ee30f7b5366526 -size 572206 +oid sha256:4e54004e6e28473d61aacba64acb407291de3581ae9a0b792a3d8f96bd394f08 +size 572509 diff --git a/docs/notebooks/yolov8-object-detection-with-output.rst b/docs/notebooks/yolov8-object-detection-with-output.rst index 6b7dacd839a587..16648e4559f618 100644 --- a/docs/notebooks/yolov8-object-detection-with-output.rst +++ b/docs/notebooks/yolov8-object-detection-with-output.rst @@ -121,7 +121,7 @@ Install necessary packages. .. code:: ipython3 %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" - %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.2.24" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu Import required utility functions. The lower cell will download the ``notebook_utils`` Python module from GitHub. @@ -161,7 +161,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') + PosixPath('/home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') @@ -194,10 +194,9 @@ Let us consider the examples: from PIL import Image from ultralytics import YOLO - DET_MODEL_NAME = "yolo11n" + DET_MODEL_NAME = "yolov8n" - det_model = YOLO(f"{DET_MODEL_NAME}.pt") - det_model.to("cpu") + det_model = YOLO(models_dir / f"{DET_MODEL_NAME}.pt") label_map = det_model.model.names res = det_model(IMAGE_PATH) @@ -206,14 +205,24 @@ Let us consider the examples: .. parsed-literal:: + Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'models/yolov8n.pt'... - image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 101.6ms - Speed: 3.1ms preprocess, 101.6ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640) +.. parsed-literal:: + + 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 6.25M/6.25M [00:02<00:00, 2.29MB/s] + + +.. parsed-literal:: + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 1 dog, 82.9ms + Speed: 2.4ms preprocess, 82.9ms inference, 475.6ms postprocess per image at shape (1, 3, 480, 640) -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png + + + +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png @@ -230,10 +239,27 @@ preserve dynamic shapes in the model. .. code:: ipython3 # object detection model - det_model_path = Path(f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml") + det_model_path = models_dir / f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml" if not det_model_path.exists(): det_model.export(format="openvino", dynamic=True, half=True) + +.. parsed-literal:: + + Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.1.0+cu121 CPU (Intel Core(TM) i9-10980XE 3.00GHz) + + PyTorch: starting from 'models/yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) + + OpenVINO: starting export with openvino 2024.3.0-16041-1e3b88e4e3f-releases/2024/3... + OpenVINO: export success ✅ 1.7s, saved as 'models/yolov8n_openvino_model/' (6.4 MB) + + Export complete (3.1s) + Results saved to /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/models + Predict: yolo predict task=detect model=models/yolov8n_openvino_model imgsz=640 half + Validate: yolo val task=detect model=models/yolov8n_openvino_model imgsz=640 data=coco.yaml half + Visualize: https://netron.app + + Verify model inference ~~~~~~~~~~~~~~~~~~~~~~ @@ -297,7 +323,6 @@ ready to check model prediction for object detection. det_model.predictor.inference = infer det_model.predictor.model.pt = False - det_model res = det_model(IMAGE_PATH) Image.fromarray(res[0].plot()[:, :, ::-1]) @@ -306,8 +331,8 @@ ready to check model prediction for object detection. .. parsed-literal:: - image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 16.9ms - Speed: 3.7ms preprocess, 16.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 16.1ms + Speed: 3.4ms preprocess, 16.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640) @@ -368,16 +393,245 @@ evaluation function. zip_ref.extractall(OUT_DIR / "coco/images") + +.. parsed-literal:: + + /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/val2017.zip: 0%| … + + +.. parsed-literal:: + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + + .. parsed-literal:: - '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/val2017.zip' already exists. - '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco2017labels-segments.zip' already exists. + '/home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco2017labels-segments.zip' already exists. .. parsed-literal:: - /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco.yaml… + /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco.yaml: 0%| … Define validation function @@ -520,7 +774,7 @@ validator class instance. .. parsed-literal:: - val: Scanning /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00\ **Note**: Model evaluation is time consuming +process and can take several minutes, depending on the hardware. For +reducing calculation time, we define ``num_samples`` parameter with +evaluation subset size, but in this case, accuracy can be noncomparable +with originally reported by the authors of the model, due to validation +subset difference. *To validate the models on the full dataset set +``NUM_TEST_SAMPLES = None``.* + +.. code:: ipython3 + + NUM_TEST_SAMPLES = 300 + +.. code:: ipython3 + + fp_det_stats = test(det_ov_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00 ([1,640,640,3], [N,H,W,C], f32) convert layout [N,C,H,W]: ([1,640,640,3], [N,H,W,C], f32) -> ([1,3,640,640], [N,C,H,W], f32) @@ -1348,7 +1704,7 @@ Now, we can skip these preprocessing steps in detect function: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png @@ -1527,7 +1883,7 @@ Run the object detection: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png .. parsed-literal:: diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg deleted file mode 100644 index 7a742941b54c17..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:394895a09cc413a6343edb2a6f6242258701b6911c09d8405eacc7c27f6fac62 -size 110819 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png index 6d4bfe012b6187..703c1375a98784 100644 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b18cc2e60a802c059cd5cc05598a06c4affd959216e03674eeb68e822295843 -size 913173 +oid sha256:e85f0434876997adc7cf2152fa667b4e271817a443a75ca350d284aee48f9145 +size 908034 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg deleted file mode 100644 index 40ac3a8da0dc2a..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90d9079b877726f30f154885745b36d8bc281f4970ce10f86529b9739a56ac72 -size 112567 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png deleted file mode 100644 index e7c4fb31035d98..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fad172495a797bc753d19d54bd4afaf8f08ed36dfd1b9761df7bfa948673aab -size 911538 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png new file mode 100644 index 00000000000000..44e7623ec662d1 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6778bdd31a39722d8ce3fb9ddea6383e451ab1b32cf23fad23b556bd89c99707 +size 907722 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg deleted file mode 100644 index 202af866d221dd..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02d585dcd341e2580baf166b6f35337d2da17561a251fb5475041e2a20d6e558 -size 110078 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png deleted file mode 100644 index d0d2028049c4f3..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44c1e80aa859828b9aae3240fb544b483df8dbd8b7c7bca29479a2a0714612c2 -size 929124 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png deleted file mode 100644 index cdae11c722ca65..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c6c2609efd786cba1d9baac015d06040939ff8d1e7644f67d10d6df5c171a62 -size 569438 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png new file mode 100644 index 00000000000000..3b025ea9323f25 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44f35cf238dcd208fcebe73bb388947f5528a167b511c8098d040b05d9bb819 +size 931491 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png new file mode 100644 index 00000000000000..6d628536486d93 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0d406144e3b07ae6bef629cd7b4973424847958b9865e7133bd0f0c7deeb5c +size 534154 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg deleted file mode 100644 index bcf00f3d50dcb5..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc502c95e9f5526befe662fcc79a6a91d4f5ff0cfce09165436e91f4ef8224ad -size 113613 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png deleted file mode 100644 index e886ccd29b47f9..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c7bb27d4264dbc22e0006e42fb2f1619992aeac7220232119578e42a0d8a083 -size 904352 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png new file mode 100644 index 00000000000000..5073fa9f3eca09 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d38bd078c6eeda706640d0e35907a9a504bf78f9dd8dbf2961de865c294802 +size 907783 diff --git a/docs/notebooks/yolov9-optimization-with-output.rst b/docs/notebooks/yolov9-optimization-with-output.rst index 39690ea292721d..b132215f390216 100644 --- a/docs/notebooks/yolov9-optimization-with-output.rst +++ b/docs/notebooks/yolov9-optimization-with-output.rst @@ -21,7 +21,6 @@ The tutorial consists of the following steps: - Compare performance of the FP32 and quantized models. - Run optimized model inference on video - **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -58,7 +57,6 @@ Guide =2023.3.0" "nncf>=2.8.1" "opencv-python" "matplotlib>=3.4" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu @@ -93,9 +91,9 @@ Prerequisites Cloning into 'yolov9'... remote: Enumerating objects: 781, done. remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1) - Receiving objects: 100% (781/781), 3.27 MiB | 19.93 MiB/s, done. + Receiving objects: 100% (781/781), 3.27 MiB | 16.41 MiB/s, done. Resolving deltas: 100% (331/331), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -133,7 +131,7 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') @@ -188,11 +186,11 @@ using ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/experimental.py:243: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/experimental.py:243: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. ckpt = torch.load(attempt_download(w), map_location='cpu') # load Fusing layers... Model summary: 387 layers, 25288768 parameters, 0 gradients, 102.1 GFLOPs - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif self.dynamic or self.shape != shape: @@ -573,10 +571,10 @@ asymmetric quantization of activations. .. parsed-literal:: - 2024-10-23 05:32:14.632848: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-23 05:32:14.668324: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:37:29.322072: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:37:29.357249: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-23 05:32:15.276616: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:37:29.968269: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -675,7 +673,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.16 ms + [ INFO ] Read model took 26.54 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [?,3,?,?] @@ -687,7 +685,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 7.97 ms + [ INFO ] Reshape model took 8.17 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -697,7 +695,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 475.37 ms + [ INFO ] Compile model took 499.10 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -734,17 +732,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 182.45 ms + [ INFO ] First inference took 170.42 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 228 iterations - [ INFO ] Duration: 15670.39 ms + [ INFO ] Count: 222 iterations + [ INFO ] Duration: 15614.22 ms [ INFO ] Latency: - [ INFO ] Median: 412.82 ms - [ INFO ] Average: 410.86 ms - [ INFO ] Min: 309.65 ms - [ INFO ] Max: 431.51 ms - [ INFO ] Throughput: 14.55 FPS + [ INFO ] Median: 412.60 ms + [ INFO ] Average: 419.41 ms + [ INFO ] Min: 210.55 ms + [ INFO ] Max: 885.97 ms + [ INFO ] Throughput: 14.22 FPS .. code:: ipython3 @@ -769,7 +767,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 41.07 ms + [ INFO ] Read model took 41.88 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [1,3,640,640] @@ -791,7 +789,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 943.07 ms + [ INFO ] Compile model took 946.19 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -828,17 +826,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 64.84 ms + [ INFO ] First inference took 68.41 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 720 iterations - [ INFO ] Duration: 15158.98 ms + [ INFO ] Count: 726 iterations + [ INFO ] Duration: 15191.13 ms [ INFO ] Latency: - [ INFO ] Median: 119.92 ms - [ INFO ] Average: 125.90 ms - [ INFO ] Min: 80.89 ms - [ INFO ] Max: 277.84 ms - [ INFO ] Throughput: 47.50 FPS + [ INFO ] Median: 121.43 ms + [ INFO ] Average: 125.10 ms + [ INFO ] Min: 56.89 ms + [ INFO ] Max: 305.84 ms + [ INFO ] Throughput: 47.79 FPS Run Live Object Detection diff --git a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png index d6b48379756722..ccb15ab8382b86 100644 --- a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png +++ b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:014be25aa9fb5ebd46a7f69afca67c33d8059694ca54bc2e32c0abfa460cb825 -size 496427 +oid sha256:13788e6c93a735cf601513bf18c5e6cae29819bdff2bd506fa90a4d34ae4bcc5 +size 496199 diff --git a/docs/openvino_sphinx_theme/README.md b/docs/openvino_sphinx_theme/README.md index 7931c481c308aa..2e82fa06e8c185 100644 --- a/docs/openvino_sphinx_theme/README.md +++ b/docs/openvino_sphinx_theme/README.md @@ -4,7 +4,8 @@ 1. Install the `openvino_sphinx_theme` using `python`: ``` -python setup.py install --user +cd openvino/docs/openvino_sphinx_theme +python -m pip install --user . ``` 2. Update the `html_theme` variable in your `conf.py`: diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html index 25acb3c1e5cbda..a2ab53c6a57a83 100644 --- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html +++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html @@ -28,7 +28,7 @@ {# The data-cfasync attribute disables CloudFlare's Rocket loader so that #} {# mode/theme are correctly set before the browser renders the page. #} {# https://github.com/pydata/pydata-sphinx-theme/pull/1045 #} - + - - - - + + - - - - {% endblock %} - {% block docs_navbar %} {{ super() }} {% include 'baner.html' %} diff --git a/docs/sphinx_setup/_templates/search.html b/docs/sphinx_setup/_templates/search.html index 3519f6e7e02f19..5430f24f74aa8c 100644 --- a/docs/sphinx_setup/_templates/search.html +++ b/docs/sphinx_setup/_templates/search.html @@ -2,133 +2,100 @@ {% set title = _('Search') %} {%- block content %} - {% block docs_navbar %} - {{ super() }} - {% include 'baner.html' %} - {% endblock %} +{% block docs_navbar %} +{{ super() }} +{% include 'baner.html' %} +{% endblock %} - {% block body %} - - - - - - - - - - - -
- - -
- - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- {% endblock %} - - {%- block scripts_end %} - {{ _webpack.body_post() }} - {%- endblock %} +{% block body %} + + + + + + + + +
+ + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+
+{% endblock %} +{%- block scripts_end %} +{{ _webpack.body_post() }} {%- endblock %} + +{%- endblock %} \ No newline at end of file diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index def41af5943b3c..01c74de0175bcf 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -193,7 +193,6 @@ 'css/textfield.css', 'css/tabs.css', 'css/coveo_custom.css', - 'https://static.cloud.coveo.com/atomic/v2/themes/coveo.css', 'https://cdn.jsdelivr.net/npm/@splidejs/splide@4.1.4/dist/css/splide.min.css', ] diff --git a/docs/sphinx_setup/index.rst b/docs/sphinx_setup/index.rst index a65b5b5892c560..47dc76bf16cf29 100644 --- a/docs/sphinx_setup/index.rst +++ b/docs/sphinx_setup/index.rst @@ -124,11 +124,11 @@ Places to Begin Cloud-ready deployments for microservice applications. - .. button-link:: openvino-workflow/running-inference.html + .. button-link:: openvino-workflow/openvino-workflow/model-server/ovms_what_is_openvino_model_server.html :color: primary :outline: - Try it out + Check out Model Server .. grid-item-card:: Model Compression :img-top: ./assets/images/home_begin_tile_06.png diff --git a/samples/cpp/common/utils/src/args_helper.cpp b/samples/cpp/common/utils/src/args_helper.cpp index f4a3d10ceb0b5b..ba58f98e498e90 100644 --- a/samples/cpp/common/utils/src/args_helper.cpp +++ b/samples/cpp/common/utils/src/args_helper.cpp @@ -29,8 +29,7 @@ void readInputFilesArguments(std::vector& files, const std::string& arg) { struct stat sb; if (stat(arg.c_str(), &sb) != 0) { - slog::warn << "File " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " file or directory not found."); } if (S_ISDIR(sb.st_mode)) { struct CloseDir { @@ -43,17 +42,20 @@ void readInputFilesArguments(std::vector& files, const std::string& using Dir = std::unique_ptr; Dir dp(opendir(arg.c_str())); if (dp == nullptr) { - slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " directory cannot be opened!"); } struct dirent* ep; + size_t files_size = files.size(); while (nullptr != (ep = readdir(dp.get()))) { std::string fileName = ep->d_name; if (fileName == "." || fileName == "..") continue; files.push_back(arg + "/" + ep->d_name); } + if (files.size() == files_size) { + throw std::invalid_argument("No files were found in directory " + arg); + } } else { files.push_back(arg); } diff --git a/samples/js/node/.gitignore b/samples/js/node/.gitignore index 2ab910508d5529..c836a5862da256 100644 --- a/samples/js/node/.gitignore +++ b/samples/js/node/.gitignore @@ -1,2 +1,3 @@ node_modules -hello_reshape_ssd/out.jpg +out*.jpg +output/ diff --git a/samples/js/node/README.md b/samples/js/node/README.md index 9e69769778d8a8..2fd4dbe76e5900 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -7,13 +7,15 @@ To run samples, install dependencies first. In current directory run: npm install ``` -Note: Perform these steps also before running notebooks. +Note: Perform this step also before running notebooks. ## Samples - hello_classification - hello_reshape_ssd - classification_sample_async + - optical_character_recognition + - vision_background_removal ## Notebooks diff --git a/samples/js/node/classification_sample_async/README.md b/samples/js/node/classification_sample_async/README.md index 0b19e908587505..6a0dcba6d16e02 100644 --- a/samples/js/node/classification_sample_async/README.md +++ b/samples/js/node/classification_sample_async/README.md @@ -4,7 +4,12 @@ Models with only 1 input and output are supported. Run: ```bash -node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d AUTO +node classification_sample_async.js -m ../../assets/models/v3-small_224_1.0_float.xml -i ../../assets/images/coco.jpg -i ../../assets/images/coco_hollywood.jpg -d AUTO +``` + +Where +```bash +node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d *device* ``` Other details see in [../../../python/classification_sample_async/README.md](../../../python/classification_sample_async/README.md) diff --git a/samples/js/node/classification_sample_async/classification_sample_async.js b/samples/js/node/classification_sample_async/classification_sample_async.js index 94f4d7828c95c2..96724be80d3b0b 100644 --- a/samples/js/node/classification_sample_async/classification_sample_async.js +++ b/samples/js/node/classification_sample_async/classification_sample_async.js @@ -1,39 +1,30 @@ const { addon: ov } = require('openvino-node'); const args = require('args'); -const { cv } = require('opencv-wasm'); -const { getImageData } = require('../helpers.js'); - -args.options([{ - name: 'img', - defaultValue: [], -}, { - name: 'model', -}, { - name: 'device', -}]); -const { model: modelPath, device: deviceName, img: images } = - args.parse(process.argv); - -main(modelPath, images, deviceName); - -function completionCallback(result, imagePath) { - const predictions = Array.from(result.data) - .map((prediction, classId) => ({ prediction, classId })) - .sort(({ prediction: predictionA }, { prediction: predictionB }) => - predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); - - console.log(`Image path: ${imagePath}`); - console.log('Top 10 results:'); - console.log('class_id probability'); - console.log('--------------------'); - predictions.slice(0, 10).forEach(({ classId, prediction }) => - console.log(`${classId}\t ${prediction.toFixed(7)}`), - ); - console.log(); -} - -async function main(modelPath, images, deviceName) { +const Image = require('../image.js'); +const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json'); + +args.options([ + { + name: 'img', + defaultValue: [], + }, + { + name: 'model', + }, + { + name: 'device', + }, +]); +const { + model: modelPath, + device: deviceName, + img: imgPaths +} = args.parse(process.argv); + +main(modelPath, imgPaths, deviceName); + +async function main(modelPath, imgPaths, deviceName) { //----------- Step 1. Initialize OpenVINO Runtime Core ----------------------- console.log('Creating OpenVINO Runtime Core'); const core = new ov.Core(); @@ -42,8 +33,6 @@ async function main(modelPath, images, deviceName) { console.log(`Reading the model: ${modelPath}`); // (.xml and .bin files) or (.onnx file) const model = await core.readModel(modelPath); - const [h, w] = model.inputs[0].shape.slice(-2); - const tensorShape = [1, h, w, 3]; if (model.inputs.length !== 1) throw new Error('Sample supports only single input topologies'); @@ -52,56 +41,65 @@ async function main(modelPath, images, deviceName) { throw new Error('Sample supports only single output topologies'); //----------- Step 3. Set up input ------------------------------------------- - // Read input image - const imagesData = []; - - for (const imagePath of images) - imagesData.push(await getImageData(imagePath)); + const inputImages = []; + const [, inputHeight, inputWidth] = model.inputs[0].getShape(); - const preprocessedImages = imagesData.map((imgData) => { - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - cv.resize(image, image, new cv.Size(w, h)); + // Read input image, resize it to the model's input size and convert it to a tensor. + for (const path of imgPaths) { + const img = await Image.load(path); + const resized = img.resize(inputWidth, inputHeight); - return new Uint8Array(image.data); - }); + inputImages.push(resized); + } //----------- Step 4. Apply preprocessing ------------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); _ppp.input().tensor().setLayout('NHWC').setElementType(ov.element.u8); - _ppp.input().model().setLayout('NCHW'); + _ppp.input().model().setLayout('NHWC'); _ppp.output().tensor().setElementType(ov.element.f32); _ppp.build(); - //----------------- Step 5. Loading model to the device ---------------------- + //----------- Step 5. Loading model to the device ---------------------------- console.log('Loading the model to the plugin'); const compiledModel = await core.compileModel(model, deviceName); const outputName = compiledModel.output(0).toString(); - //----------- Step 6. Collecting promises to react when they resolve --------- - console.log('Starting inference in asynchronous mode'); + //----------- Step 6. Do inference ------------------------------------------- + console.log('Starting inference\n'); // Create infer request const inferRequest = compiledModel.createInferRequest(); - - const promises = preprocessedImages.map((tensorData, i) => { - const inferPromise = inferRequest.inferAsync([ - new ov.Tensor(ov.element.u8, tensorShape, tensorData) - ]); + const promises = inputImages.map((img, i) => { + const inferPromise = inferRequest.inferAsync([img.toTensor()]); inferPromise.then(result => - completionCallback(result[outputName], images[i])); + completionCallback(result[outputName], imgPaths[i])); return inferPromise; }); - //----------- Step 7. Do inference ------------------------------------------- + //----------- Step 7. Wait till all inferences execute ----------------------- await Promise.all(promises); console.log('All inferences executed'); console.log('\nThis sample is an API example, for any performance ' + 'measurements please use the dedicated benchmark_app tool'); } + +function completionCallback(result, imagePath) { + const predictions = Array.from(result.data) + .map((prediction, classId) => ({ prediction, classId })) + .sort(({ prediction: predictionA }, { prediction: predictionB }) => + predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); + + const imagenetClasses = ['background', ...Object.values(imagenetClassesMap)]; + + console.log(`Image path: ${imagePath}`); + console.log('Top 5 results:\n'); + console.log('id\tprobability\tlabel'); + console.log('---------------------------------'); + predictions.slice(0, 5).forEach(({ classId, prediction }) => + console.log(`${classId}\t${prediction.toFixed(7)}\t${imagenetClasses[classId][1]}`), + ); + console.log(); +} diff --git a/samples/js/node/fetch-samples-assets.js b/samples/js/node/fetch-samples-assets.js new file mode 100644 index 00000000000000..33dd509a922f85 --- /dev/null +++ b/samples/js/node/fetch-samples-assets.js @@ -0,0 +1,83 @@ +const { downloadFile } = require('./helpers.js'); + +const host = 'https://storage.openvinotoolkit.org'; + +const models = [ + // hello classification + '/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/v3-small_224_1.0_float.xml', + '/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/v3-small_224_1.0_float.bin', + + // hello reshape ssd + '/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml', + '/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.bin', + + // hello detection, optical character recognition + '/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.xml', + '/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin', + + '/repositories/open_model_zoo/public/text-recognition-resnet-fc/text-recognition-resnet-fc.xml', + '/repositories/open_model_zoo/public/text-recognition-resnet-fc/text-recognition-resnet-fc.bin', + + // vision background removal + '/repositories/open_model_zoo/public/vision-background-removal/unet_ir_model.xml', + '/repositories/open_model_zoo/public/vision-background-removal/unet_ir_model.bin', + + // pose estimation + '/repositories/open_model_zoo/2022.1/models_bin/3/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.xml', + '/repositories/open_model_zoo/2022.1/models_bin/3/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.bin', + + // question answering + '/repositories/open_model_zoo/2022.3/models_bin/1/bert-small-uncased-whole-word-masking-squad-0001/FP16/bert-small-uncased-whole-word-masking-squad-0001.xml', +]; +const modelsDir = __dirname + '/../assets/models'; + + +const images = [ + // hello classification + '/repositories/openvino_notebooks/data/data/image/coco.jpg', + + // hello reshape ssd + '/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg', + + // hello detection, optical character recognition, pose estimation + '/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg', + + // vision background removal + '/repositories/openvino_notebooks/data/data/image/coco_hollywood.jpg', + '/repositories/openvino_notebooks/data/data/image/wall.jpg', +]; +const imagesDir = __dirname + '/../assets/images'; + +const datasets = [ + // hello classification + '/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_class_index.json', +]; +const datasetsDir = __dirname + '/../assets/datasets'; + +const vocab = [ + '/repositories/openvino_notebooks/data/data/text/bert-uncased/vocab.txt', +]; +const vocabDir = __dirname + '/../assets/vocab'; + +try { + main(); +} catch(error) { + console.error('Error Occurred', error); +} + +async function main() { + await downloadAssets(models, modelsDir); + await downloadAssets(images, imagesDir); + await downloadAssets(datasets, datasetsDir); + await downloadAssets(vocab, vocabDir); +} + +async function downloadAssets(links, destinationDir) { + for (const link of links) { + const url = host + link; + const filename = link.split('/').pop(); + + await downloadFile(url, filename, destinationDir); + console.log(`Downloaded: ${filename} \n`); + } +} diff --git a/samples/js/node/hello_classification/README.md b/samples/js/node/hello_classification/README.md index 2de983af58334b..1d20bba8ac63e6 100644 --- a/samples/js/node/hello_classification/README.md +++ b/samples/js/node/hello_classification/README.md @@ -2,9 +2,13 @@ Models with only 1 input and output are supported. -Run: +Run sample: ```bash -node hello_classification.js *path_to_model_file* *path_to_img* AUTO +node hello_classification.js ../../assets/models/v3-small_224_1.0_float.xml ../../assets/images/coco.jpg AUTO +``` +Where +```bash +node hello_classification.js *path_to_model_file* *path_to_img* *device* ``` Other details see in [../../../python/hello_classification/README.md](../../../python/hello_classification/README.md) diff --git a/samples/js/node/hello_classification/hello_classification.js b/samples/js/node/hello_classification/hello_classification.js index 00ba868e40f3a0..9edbc998d006bd 100644 --- a/samples/js/node/hello_classification/hello_classification.js +++ b/samples/js/node/hello_classification/hello_classification.js @@ -1,7 +1,7 @@ const { addon: ov } = require('openvino-node'); -const { cv } = require('opencv-wasm'); -const { getImageData } = require('../helpers.js'); +const Image = require('../image.js'); +const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json'); // Parsing and validation of input arguments if (process.argv.length !== 5) @@ -31,21 +31,12 @@ async function main(modelPath, imagePath, deviceName) { //----------------- Step 3. Set up input ------------------------------------- // Read input image - const imgData = await getImageData(imagePath); - - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - - const tensorData = new Float32Array(image.data); - const shape = [1, image.rows, image.cols, 3]; - const inputTensor = new ov.Tensor(ov.element.f32, shape, tensorData); + const img = await Image.load(imagePath); + const inputTensor = img.toTensor(); //----------------- Step 4. Apply preprocessing ------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); - _ppp.input().tensor().setShape(shape).setLayout('NHWC'); + _ppp.input().tensor().setElementType(ov.element.u8).setShape(inputTensor.getShape()).setLayout('NHWC'); _ppp.input().preprocess().resize(ov.preprocess.resizeAlgorithm.RESIZE_LINEAR); _ppp.input().model().setLayout('NHWC'); _ppp.output().tensor().setElementType(ov.element.f32); @@ -69,12 +60,14 @@ async function main(modelPath, imagePath, deviceName) { .sort(({ prediction: predictionA }, { prediction: predictionB }) => predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); + const imagenetClasses = ['background', ...Object.values(imagenetClassesMap)]; + console.log(`Image path: ${imagePath}`); - console.log('Top 10 results:'); - console.log('class_id probability'); - console.log('--------------------'); + console.log('Top 10 results:\n'); + console.log('id\tprobability\tlabel'); + console.log('---------------------------------'); predictions.slice(0, 10).forEach(({ classId, prediction }) => - console.log(`${classId}\t ${prediction.toFixed(7)}`), + console.log(`${classId}\t${prediction.toFixed(7)}\t${imagenetClasses[classId][1]}`), ); console.log('\nThis sample is an API example, for any performance ' diff --git a/samples/js/node/hello_reshape_ssd/README.md b/samples/js/node/hello_reshape_ssd/README.md index 21d8be8ec4b50d..547cf989478a87 100644 --- a/samples/js/node/hello_reshape_ssd/README.md +++ b/samples/js/node/hello_reshape_ssd/README.md @@ -2,9 +2,13 @@ Models with only 1 input and output are supported. -Run: +Run sample: ```bash -node hello_reshape_ssd.js *path_to_model_file* *path_to_img* AUTO +node hello_reshape_ssd.js ../../assets/models/road-segmentation-adas-0001.xml ../../assets/images/empty_road_mapillary.jpg AUTO +``` +Where +```bash +node hello_reshape_ssd.js *path_to_model_file* *path_to_img* *device* ``` Other details see in [../../../python/hello_reshape_ssd/README.md](../../../python/hello_reshape_ssd/README.md) diff --git a/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js b/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js index 416e8bc8180668..ebcc8e83223853 100644 --- a/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js +++ b/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js @@ -1,13 +1,5 @@ const { addon: ov } = require('openvino-node'); - -const fs = require('node:fs/promises'); -const { cv } = require('opencv-wasm'); -const { - setShape, - getImageData, - getImageBuffer, - arrayToImageData, -} = require('../helpers.js'); +const Image = require('../image.js'); // Parsing and validation of input arguments if (process.argv.length !== 5) @@ -38,24 +30,15 @@ async function main(modelPath, imagePath, deviceName) { //----------------- Step 3. Set up input ------------------------------------- // Read input image - const imgData = await getImageData(imagePath); - - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - - const tensorData = new Uint8Array(image.data); - const shape = [1, image.rows, image.cols, 3]; - const inputTensor = new ov.Tensor(ov.element.u8, shape, tensorData); + const img = await Image.load(imagePath); + const inputTensor = img.toTensor(); //----------------- Step 4. Apply preprocessing ------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); _ppp.input().preprocess().resize(ov.preprocess.resizeAlgorithm.RESIZE_LINEAR); _ppp.input().tensor() - .setShape(shape) + .setShape(inputTensor.getShape()) .setElementType(ov.element.u8) .setLayout('NHWC'); @@ -70,48 +53,51 @@ async function main(modelPath, imagePath, deviceName) { //---------------- Step 6. Create infer request and do inference synchronously console.log('Starting inference in synchronous mode'); const inferRequest = compiledModel.createInferRequest(); - inferRequest.setInputTensor(inputTensor); - inferRequest.infer(); + const outputs = inferRequest.infer([inputTensor]); //----------------- Step 7. Process output ----------------------------------- const outputLayer = compiledModel.outputs[0]; - const resultInfer = inferRequest.getTensor(outputLayer); - const predictions = Array.from(resultInfer.data); - const [height, width] = [originalImage.rows, originalImage.cols]; - - const detections = setShape(predictions, [100, 7]); - const color = [255, 0, 0, 255]; - const THROUGHPUT = 0.9; - - detections.forEach(detection => { - const [classId, confidence, xmin, ymin, xmax, ymax] = detection.slice(1); - - if (confidence < THROUGHPUT) return; - - console.log(`Found: classId = ${classId}, ` - + `confidence = ${confidence.toFixed(2)}, ` - + `coords = (${xmin}, ${ymin}), (${xmax}, ${ymax})`, - ); - - // Draw a bounding box on a output image - cv.rectangle(originalImage, - new cv.Point(xmin*width, ymin*height), - new cv.Point(xmax*width, ymax*height), - color, - 2, - ); - }); - - const resultImgData = arrayToImageData(originalImage.data, width, height); + const output = outputs[outputLayer]; + const outputData = output.data; + const resultLayer = []; + const colormap = [ + [68, 1, 84, 255], + [48, 103, 141, 255], + [53, 183, 120, 255], + [199, 216, 52, 255], + ]; + const size = outputData.length/4; + + for (let i = 0; i < size; i++) { + const valueAt = (i, number) => outputData[i + number*size]; + const currentValues = { + bg: valueAt(i, 0), + c: valueAt(i, 1), + h: valueAt(i, 2), + w: valueAt(i, 3), + }; + const values = Object.values(currentValues); + const maxIndex = values.indexOf(Math.max(...values)); + + resultLayer.push(maxIndex); + } + + const pixels = []; + resultLayer.forEach(i => pixels.push(...colormap[i])); + + const alpha = 0.6; const filename = 'out.jpg'; + const [, , H, W] = output.getShape(); - await fs.writeFile(`./${filename}`, getImageBuffer(resultImgData)); + const segmentsImg = Image.fromArray(pixels, W, H); + const resizedSegments = segmentsImg.resize(img.width, img.height); + const mergedImg = Image.overlay(img, resizedSegments, alpha); try { - await fs.readFile(filename); - console.log('Image out.jpg was created!'); + await mergedImg.save(filename); + console.log(`Image '${filename}' was created.`); } catch(err) { - console.log(`Image ${filename} was not created. Check your permissions.`); + console.log(`Image '${filename}' was not created. Check your permissions.`); } console.log('\nThis sample is an API example, for any performance ' diff --git a/samples/js/node/helpers.js b/samples/js/node/helpers.js index 5cbd6650495070..5d56d4c60a7139 100644 --- a/samples/js/node/helpers.js +++ b/samples/js/node/helpers.js @@ -1,17 +1,8 @@ const path = require('node:path'); -const { cv } = require('opencv-wasm'); const { createWriteStream } = require('node:fs'); const { mkdir, stat } = require('node:fs/promises'); const { HttpsProxyAgent } = require('https-proxy-agent'); -const { - Image, - ImageData, - loadImage, - createCanvas, - createImageData, -} = require('canvas'); - module.exports = { exp, sum, @@ -23,96 +14,29 @@ module.exports = { setShape, transform, downloadFile, - displayImage, - getImageData, extractValues, - getImageBuffer, - arrayToImageData, - displayArrayAsImage, matrixMultiplication, }; -function arrayToImageData(array, width, height) { - return createImageData(new Uint8ClampedArray(array), width, height); -} - -function getImageBuffer(imageOrImageData) { - const canvas = createCanvas(imageOrImageData.width, imageOrImageData.height); - const ctx = canvas.getContext('2d'); - - if (imageOrImageData instanceof Image) - ctx.drawImage(imageOrImageData, 0, 0); - else if (imageOrImageData instanceof ImageData) - ctx.putImageData(imageOrImageData, 0, 0); - else - throw Error(`Passed parameters has type '${typeof imageOrImageData}'. ` - + 'It is\'t supported.'); - - return canvas.toBuffer('image/jpeg'); -} - -function displayImage(imageOrImageData, display) { - const buffer = getImageBuffer(imageOrImageData); - - display.image(buffer); -} - -function displayArrayAsImage(arr, width, height, display) { - const alpha = 255; - const componentsPerPixel = arr.length / (width*height); - - try { - switch (componentsPerPixel) { - case 1: - arr = arr.reduce((acc, val) => { - acc.push(val, val, val, alpha); - - return acc; - }, []); - break; - - case 3: - arr = arr.reduce((acc, val, index) => { - if (index && index%3 === 0) acc.push(alpha); - - acc.push(val); - - return acc; - }, []); - break; - } - } catch(e) { - console.log(e); - } - - const imageData = arrayToImageData(arr, width, height); - - displayImage(imageData, display); -} - -async function getImageData(path) { - const image = await loadImage(path); - const { width, height } = image; - - const canvas = await createCanvas(width, height); - const ctx = canvas.getContext('2d'); - - ctx.drawImage(image, 0, 0); - - return ctx.getImageData(0, 0, width, height); -} - function transform(arr, { width, height }, order) { - const img = new cv.Mat(height, width, cv.CV_8UC3); - - img.data.set(arr, 0, arr.length); + // Calculate the number of pixels and the size of each channel + const numPixels = width * height; + const channels = [[], [], []]; + + // Separate RGB channels + for (let i = 0; i < numPixels; i++) { + channels[0].push(arr[i * 3]); // Red channel + channels[1].push(arr[i * 3 + 1]); // Green channel + channels[2].push(arr[i * 3 + 2]); // Blue channel + } - const channels = new cv.MatVector(); - cv.split(img, channels); + // Reorder channels based on the 'order' array + const reorderedChannels = order.map(num => channels[num]); - const val = order.map(num => [...channels.get(num).data]); + // Flatten reordered channels into a single array + const result = reorderedChannels.flat(); - return [].concat(...val); + return result; } async function downloadFile(url, filename, destination) { diff --git a/samples/js/node/image.js b/samples/js/node/image.js new file mode 100644 index 00000000000000..30ac35032d7ed7 --- /dev/null +++ b/samples/js/node/image.js @@ -0,0 +1,275 @@ +const { + ImageData, + loadImage, + createCanvas, +} = require('@napi-rs/canvas'); +const path = require('node:path'); +const fs = require('node:fs/promises'); +const { addon: ov } = require('openvino-node'); + +const codeENOENT = 'ENOENT'; + +class OvImage { + constructor(imageData) { + this.imageData = imageData; + this.channels = imageData.data.length / (this.width*this.height); + } + + get width() { + return this.imageData.width; + } + + get height() { + return this.imageData.height; + } + + get rgb() { + return this.imageData.data.filter((_, index) => index % 4 !== 3); + } + + get rgba() { + return this.imageData.data; + } + + get grayscale() { + const grayData = new Uint8ClampedArray(this.width * this.height); + + for (let i = 0; i < this.imageData.data.length; i += 4) { + const [r, g, b] = this.imageData.data.slice(i, i + 3); + const gray = 0.299 * r + 0.587 * g + 0.114 * b; + + grayData[i / 4] = gray; + } + + return grayData; + } + + get canvasCtx() { + const canvas = createCanvas(this.width, this.height); + const ctx = canvas.getContext('2d'); + + ctx.putImageData(this.imageData, 0, 0); + + return ctx; + } + + get buffer() { + return this.canvasCtx.canvas.toBuffer('image/jpeg'); + } + + drawRect(x, y, width, height, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.strokeRect(x, y, width, height); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawText(text, x, y, properties) { + const ctx = this.canvasCtx; + + ctx.font = properties.font || '30px Arial'; + ctx.fillStyle = properties.color || 'red'; + ctx.fillText(text, x, y); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawCircle(x, y, radius, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.beginPath(); + ctx.arc(x, y, radius, 0, 2 * Math.PI); + ctx.stroke(); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawLine(x1, y1, x2, y2, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.beginPath(); + ctx.moveTo(x1, y1); + ctx.lineTo(x2, y2); + ctx.stroke(); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + toTensor() { + return new ov.Tensor( + ov.element.u8, + [1, this.height, this.width, 3], + new Uint8ClampedArray(this.rgb), + ); + } + + resize(newWidth, newHeight) { + const ctx = this.canvasCtx; + + const canvas2 = createCanvas(newWidth, newHeight); + const ctx2 = canvas2.getContext('2d'); + ctx2.drawImage(ctx.canvas, 0, 0, newWidth, newHeight); + + const imageData = ctx2.getImageData(0, 0, newWidth, newHeight); + + return new OvImage(imageData); + } + + invert() { + const invertedData = this.rgba.map((value, index) => { + if (index % 4 === 3) + return 255; + + return 255 - value; + }); + + return OvImage.fromArray(invertedData, this.width, this.height); + } + + crop(x, y, width, height) { + const canvas2 = createCanvas(width, height); + const ctx2 = canvas2.getContext('2d'); + + ctx2.drawImage(this.canvasCtx.canvas, x, y, width, height, 0, 0, width, height); + + const imageData = ctx2.getImageData(0, 0, width, height); + + return new OvImage(imageData); + } + + async save(filepath) { + const destination = path.dirname(filepath); + + try { + await fs.access(destination); + } catch(error) { + if (error.code !== codeENOENT) throw error; + + await fs.mkdir(destination, { recursive: true }); + } + + return await fs.writeFile(filepath, this.buffer); + } + + // Display the image using the node notebook display object + display(display) { + display.image(this.buffer); + } + + static async load(path) { + const image = await loadImage(path); + const { width, height } = image; + + const canvas = await createCanvas(width, height); + const ctx = canvas.getContext('2d'); + + ctx.drawImage(image, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, width, height)); + } + + static fromArray(arr, width, height) { + const canvas = createCanvas(width, height); + const ctx = canvas.getContext('2d'); + + const imageData = new ImageData( + new Uint8ClampedArray(arr), + width, + height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, width, height)); + } + + static merge(img1, img2) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const canvas = createCanvas(img1.width, img1.height); + const ctx = canvas.getContext('2d'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const mergedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return (img1Data[index] + img2Data[index]); + }); + + const imageData = new ImageData( + new Uint8ClampedArray(mergedData), + img1.width, + img1.height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, img1.width, img1.height)); + } + + static mask(img1, img2) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const canvas = createCanvas(img1.width, img1.height); + const ctx = canvas.getContext('2d'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const subtractedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return img1Data[index] * (img2Data[index] / 255); + }); + + const imageData = new ImageData( + new Uint8ClampedArray(subtractedData), + img1.width, + img1.height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, img1.width, img1.height)); + } + + static overlay(img1, img2, alpha) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const overlayedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return img1Data[index] * (1 - alpha) + img2Data[index] * alpha; + }); + + return OvImage.fromArray(overlayedData, img1.width, img1.height); + } +} + +module.exports = OvImage; diff --git a/samples/js/node/notebooks/hello-detection.nnb b/samples/js/node/notebooks/hello-detection.nnb index f6139ec7ec8b98..60640b3bd042ea 100644 --- a/samples/js/node/notebooks/hello-detection.nnb +++ b/samples/js/node/notebooks/hello-detection.nnb @@ -17,57 +17,42 @@ { "language": "typescript", "source": [ - "const { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst { transform, getImageData, displayArrayAsImage, downloadFile } = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst Image = require('../image');\nconst { transform, argMax, setShape } = require('../helpers.js');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Download the Model" + "## Load the Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'horizontal-text-detection-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" + "const modelXMLPath = '../../assets/models/horizontal-text-detection-0001.xml';\n\n// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output('boxes');\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/horizontal-text-detection-0001.bin'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", "source": [ - "## Download an Image" + "## Load an Image" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\n\nawait downloadFile(imgUrl, 'intel_rnb.jpg', baseImagesDir);\n" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [inputHeight, inputWidth] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(inputWidth, inputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: inputWidth, height: inputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/intel_rnb.jpg'", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } @@ -76,51 +61,28 @@ { "language": "markdown", "source": [ - "## Load the Model" + "## Do Inference" ], "outputs": [] }, { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output('boxes');\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Load an Image" + "const inferRequest = compiledModel.createInferRequest();\nconst result = await inferRequest.inferAsync([tensor]);\n" ], "outputs": [] }, - { - "language": "typescript", - "source": [ - "const imgData = await getImageData('../../assets/images/intel_rnb.jpg');\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\nconst resizedImage = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB);\n\nconst [B, C, H, W] = inputLayer.shape;\n\ncv.resize(image, resizedImage, new cv.Size(W, H));\n\nconst inputImage = transform(resizedImage.data, { width: W, height: H }, [0, 1, 2]); // NHWC to NCHW\n\ndisplayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] - }, { "language": "markdown", "source": [ - "## Do Inference" + "## Initialize helper functions\n" ], "outputs": [] }, { "language": "typescript", "source": [ - "const tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, Int32Array.from(inputLayer.shape), tensorData);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst output = inferRequest.getTensor(outputLayer);\nconst { data: boxes } = output;\n" + "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => {\n const position = idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return Math.floor(position);\n }\n\n return box.map(scaleShape);\n}" ], "outputs": [] }, @@ -134,14 +96,14 @@ { "language": "typescript", "source": [ - "// For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:\n// The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function\nfunction convertResultToImage(bgrImage, resizedImage, boxes, componentsCount, options) {\n\tconst defaultOptions = { threshold: 0.3, confLabels: true };\n\tconst { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n\t// Define colors for boxes and descriptions.\n\tconst colors = { red: [255, 0, 0, 255], green: [0, 255, 0, 255] };\n\n\t// Fetch the image shapes to calculate a ratio.\n\tconst [realY, realX] = [bgrImage.rows, bgrImage.cols];\n\tconst [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n\n\tconst [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n\t// Convert the base image from BGR to RGB format.\n\tconst rgbImage = new cv.Mat();\n\tcv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n\t// Iterate through non-zero boxes.\n\tfor (let i = 0; i < boxes.length; i += componentsCount) {\n\t\tconst box = boxes.slice(i, i + componentsCount);\n\n\t\t// Pick a confidence factor from the last place in an array.\n\t\tconst conf = box[box.length - 1];\n\n\t\tif (conf < threshold) continue;\n\n\t\t// Convert float to int and multiply corner position of each box by x and y ratio.\n\t\t// If the bounding box is found at the top of the image, \n\t\t// position the upper box bar little lower to make it visible on the image. \n\t\tconst [xMin, yMin, xMax, yMax] = box.slice(0, -1).map((cornerPosition, idx) =>\n\t\t\tidx % 2\n\t\t\t\t? parseInt(Math.max(cornerPosition * ratioY, 10))\n\t\t\t\t: parseInt(cornerPosition * ratioX)\n\t\t);\n\t\t// Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.\n\t\tcv.rectangle(rgbImage, new cv.Point(xMin, yMin), new cv.Point(xMax, yMax), colors.green, 2);\n\n\t\t// Add text to the image based on position and confidence.\n\t\t// Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.\n\t\tif (confLabels)\n\t\t\tcv.putText(\n\t\t\t\trgbImage,\n\t\t\t\t`${conf.toFixed(2)}`,\n\t\t\t\tnew cv.Point(xMin, yMin - 10),\n\t\t\t\tcv.FONT_HERSHEY_SIMPLEX,\n\t\t\t\t0.8,\n\t\t\t\tcolors.red,\n\t\t\t\t1,\n\t\t\t\tcv.LINE_AA,\n\t\t\t);\n\t}\n\n\treturn rgbImage;\n}\n\nconst [ __, componentsCount] = output.getShape();\nconst img = convertResultToImage(image, resizedImage, boxes, componentsCount, { confLabels: false });\n\ndisplayArrayAsImage(img.data, originalWidth, originalHeight, display);\n" + "// Calculate ratios\nconst [ratioX, ratioY] = [img.width / inputWidth, img.height / inputHeight];\nconst boundingBoxesArray = extractBoundingBoxes(result[outputLayer]);\n// Resize bounding boxes to the original image size\nconst boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box =>\n [...multiplyByRatio(ratioX, ratioY, box), box[4]]);\n\n// Takes original image and bounding boxes\n// and returns the image with bounding boxes drawn on it\nasync function putBoundingBoxesOnImage(img, boxes, threshold = 0.3) {\n let finalImage = img;\n\n for (const box of boxes) {\n const conf = box[box.length - 1];\n\n if (conf < threshold) continue;\n\n const [xMin, yMin, xMax, yMax] = box;\n\n finalImage = finalImage.drawRect(\n xMin, yMin,\n xMax - xMin, yMax - yMin,\n { color: 'red', width: 3 },\n );\n }\n\n return finalImage;\n}\n\n\nconst resultImg = await putBoundingBoxesOnImage(\n img,\n boundingBoxesOriginalSizeArray,\n);\nresultImg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } diff --git a/samples/js/node/notebooks/hello-segmentation.nnb b/samples/js/node/notebooks/hello-segmentation.nnb index 8df4e6227701e9..a7da34a2799edf 100644 --- a/samples/js/node/notebooks/hello-segmentation.nnb +++ b/samples/js/node/notebooks/hello-segmentation.nnb @@ -17,62 +17,10 @@ { "language": "typescript", "source": [ - "const {\n getImageData, \n displayArrayAsImage, \n arrayToImageData,\n transform,\n downloadFile,\n} = require('../helpers');\n\nconst { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const { display } = require('node-kernel');\nconst { addon: ov } = require('openvino-node');\n\nconst Image = require('../image.js');\nconst {\n transform,\n} = require('../helpers');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'road-segmentation-adas-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/';\n\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/road-segmentation-adas-0001.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download an Image" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg';\n\nawait downloadFile(imgUrl, 'empty_road_mapillary.jpg', baseImagesDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/empty_road_mapillary.jpg'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,7 +31,7 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n" + "const modelXMLPath = '../../assets/models/road-segmentation-adas-0001.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n" ], "outputs": [] }, @@ -97,14 +45,14 @@ { "language": "typescript", "source": [ - "const imgData = await getImageData('../../assets/images/empty_road_mapillary.jpg');\n\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB); \n\nconst [B, C, H, W] = inputLayer.shape;\n\ncv.resize(image, image, new cv.Size(W, H));\n\nconst inputImage = transform(image.data, { width: W, height: H }, [0, 1, 2]); // NHWC to NCHW\n\ndisplayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n" + "const img = await Image.load('../../assets/images/empty_road_mapillary.jpg');\n\nimg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } @@ -120,18 +68,9 @@ { "language": "typescript", "source": [ - "const tensor_data = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensor_data);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst output = inferRequest.getTensor(outputLayer);\n\nconst { data: outputData } = output;\nconst layers = { bg: [], c: [], h: [], w: [] };\nconst resultLayer = [];\nconst colormap = [[68, 1, 84, 255], [48, 103, 141, 255], [53, 183, 120, 255], [199, 216, 52, 255]];\n\nconst size = outputData.length/4;\n\nfor (let i = 0; i < size; i++) {\n const valueAt = (i, number) => outputData[i + number*size];\n\n const currentValues = { \n bg: valueAt(i, 0),\n c: valueAt(i, 1),\n h: valueAt(i, 2),\n w: valueAt(i, 3),\n };\n const values = Object.values(currentValues);\n const maxIndex = values.indexOf(Math.max(...values));\n\n resultLayer.push(maxIndex);\n}\n\nconst pixels = [];\nresultLayer.forEach(i => pixels.push(...colormap[i]));\n\ndisplayArrayAsImage(pixels, W, H, display);\n" + "const [height, width] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(width, height);\n\n// Transform image data from NHWC to NCHW to match model input\n// as alternative you can use ov.preprocess.PrePostProcessor\n// see hello_reshape_ssd.js sample\nconst transformedImgData = transform(resizedImg.rgb, { width, height }, [0, 1, 2]);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n inputLayer.shape,\n new Float32Array(transformedImgData),\n);\n\nconst inferRequest = compiledModel.createInferRequest();\nconst outputs = await inferRequest.inferAsync([tensor]);\nconst output = outputs[outputLayer];\nconst outputData = output.data;\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -143,14 +82,14 @@ { "language": "typescript", "source": [ - "const alpha = 0.3;\n\nconst pixelsAsImageData = arrayToImageData(pixels, W, H);\nconst mask = cv.matFromImageData(pixelsAsImageData);\n\ncv.resize(mask, mask, new cv.Size(originalWidth, originalHeight));\n\ncv.addWeighted(mask, alpha, originalImage, 1 - alpha, 0, mask);\n\ndisplayArrayAsImage(mask.data, originalWidth, originalHeight, display);\n" + "const layers = { bg: [], c: [], h: [], w: [] };\nconst resultLayer = [];\nconst colormap = [\n [68, 1, 84, 255],\n [48, 103, 141, 255],\n [53, 183, 120, 255],\n [199, 216, 52, 255],\n];\nconst size = outputData.length/4;\n\nfor (let i = 0; i < size; i++) {\n const valueAt = (i, number) => outputData[i + number*size];\n\n const currentValues = {\n bg: valueAt(i, 0),\n c: valueAt(i, 1),\n h: valueAt(i, 2),\n w: valueAt(i, 3),\n };\n const values = Object.values(currentValues);\n const maxIndex = values.indexOf(Math.max(...values));\n\n resultLayer.push(maxIndex);\n}\n\nconst pixels = [];\nresultLayer.forEach(i => pixels.push(...colormap[i]));\n\nconst alpha = 0.6;\nconst [N, C, H, W] = output.getShape();\n\nconst segmentsImg = Image.fromArray(pixels, W, H);\nconst resizedSegments = segmentsImg.resize(img.width, img.height);\nconst mergedImg = Image.overlay(img, resizedSegments, alpha);\n\nmergedImg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } diff --git a/samples/js/node/notebooks/hello-world.nnb b/samples/js/node/notebooks/hello-world.nnb index dbc46d196345ab..83d4ca8bec29f5 100644 --- a/samples/js/node/notebooks/hello-world.nnb +++ b/samples/js/node/notebooks/hello-world.nnb @@ -17,62 +17,10 @@ { "language": "javascript", "source": [ - "const { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst { getImageData, displayImage, downloadFile } = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node');\n" + "const { display } = require('node-kernel');\nconst { addon: ov } = require('openvino-node');\n\nconst Image = require('../image.js');\nconst imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'v3-small_224_1.0_float';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/v3-small_224_1.0_float.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download an Image and Imagenet Classes" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg';\nconst classesUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_class_index.json';\n\nawait downloadFile(imgUrl, 'coco.jpg', '../../assets/images');\nawait downloadFile(classesUrl, 'imagenet_class_index.json', '../../assets/datasets');\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/datasets/imagenet_class_index.json'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,7 +31,7 @@ { "language": "javascript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst outputLayer = compiledModel.outputs[0];\n" + "const modelXMLPath = '../../assets/models/v3-small_224_1.0_float.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayer = compiledModel.outputs[0];\n" ], "outputs": [] }, @@ -97,14 +45,14 @@ { "language": "javascript", "source": [ - "const imgData = await getImageData('../../assets/images/coco.jpg');\n\n// Use opencv-wasm to preprocess image.\nconst originalImage = cv.matFromImageData(imgData);\nconst image = new cv.Mat();\n// The MobileNet model expects images in RGB format.\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\n// Resize to MobileNet image shape.\ncv.resize(image, image, new cv.Size(224, 224));\n\ndisplayImage(imgData, display);\n" + "const img = await Image.load('../../assets/images/coco.jpg');\n\nimg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } @@ -120,7 +68,7 @@ { "language": "javascript", "source": [ - "const tensorData = new Float32Array(image.data);\nconst tensor = new ov.Tensor(ov.element.f32, Int32Array.from([1, 224, 224, 3]), tensorData);\n" + "const [N, inputHeight, inputWidth] = inputLayer.shape;\nconst resizedImg = img.resize(inputWidth, inputHeight);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, new Float32Array(resizedImg.rgb));\n" ], "outputs": [] }, @@ -134,14 +82,21 @@ { "language": "javascript", "source": [ - "const inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst resultInfer = inferRequest.getTensor(outputLayer);\nconst resultIndex = resultInfer.data.indexOf(Math.max(...resultInfer.data));\n" + "const inferRequest = compiledModel.createInferRequest();\nconst result = await inferRequest.inferAsync([tensor]);\n\nconst outputData = result[outputLayer].data;\nconst resultIndex = outputData.indexOf(Math.max(...outputData));\nconst prediction = outputData[resultIndex];\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "## Display result based on maximum probability" ], "outputs": [] }, { "language": "javascript", "source": [ - "const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\nconst imagenetClasses = ['background', ...Object.values(imagenetClassesMap)];\n\nconsole.log(`Result: ${imagenetClasses[resultIndex][1]}`);\n" + "const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\nconst imagenetClasses = ['background', ...Object.values(imagenetClassesMap)];\n\nconsole.log('Result:\\n\\nid\\tprobability\\tlabel');\nconsole.log('---------------------------------');\nconsole.log(`${resultIndex}\\t${prediction.toFixed(7)}\\t${imagenetClasses[resultIndex][1]}`);\n" ], "outputs": [ { @@ -149,7 +104,11 @@ { "mime": "application/vnd.code.notebook.stdout", "value": [ - "Result: flat-coated_retriever", + "Result:", + "", + "id\tprobability\tlabel", + "---------------------------------", + "206\t0.7470666\tflat-coated_retriever", "" ] } diff --git a/samples/js/node/notebooks/optical-character-recognition.nnb b/samples/js/node/notebooks/optical-character-recognition.nnb index b7e8e109ff857f..ce450dbce0a0d7 100644 --- a/samples/js/node/notebooks/optical-character-recognition.nnb +++ b/samples/js/node/notebooks/optical-character-recognition.nnb @@ -24,60 +24,42 @@ { "language": "typescript", "source": [ - "const fs = require(\"node:fs\");\nconst path = require(\"node:path\");\nconst { createCanvas, Image, ImageData } = require(\"canvas\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst { cv } = require(\"opencv-wasm\");\nconst {\n transform,\n getImageData,\n displayArrayAsImage,\n downloadFile,\n arrayToImageData,\n getImageBuffer,\n argMax,\n setShape,\n} = require(\"../helpers.js\");\n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst Image = require('../image');\nconst { transform, argMax, setShape } = require('../helpers.js');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Download Models" + "# Load a Detection Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Intializing Images, Models\nconst baseArtifactsDir = '../../assets/models';\nconst detBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\nconst recBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/text-recognition-resnet-fc/';\nconst detectionModelName = 'horizontal-text-detection-0001';\nconst textRecModelName = 'text-recognition-resnet-fc';\n\nconst detModelXMLName = `${detectionModelName}.xml`;\nconst detModelBINName = `${detectionModelName}.bin`;\n\nconst detModelXMLPath = `${baseArtifactsDir}/${detModelXMLName}`;\nconst detModelBINPath = `${baseArtifactsDir}/${detModelBINName}`;\n\nconst recModelXMLName = `${textRecModelName}.xml`;\nconst recModelBINName = `${textRecModelName}.bin`;\n\nconst recModelXMLPath = `${baseArtifactsDir}/${textRecModelName}.xml`;\nconst recModelBINPath = `${baseArtifactsDir}/${textRecModelName}.bin`;\n\nawait downloadFile(\n detBaseURL + detModelXMLName,\n detModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n detBaseURL + detModelBINName,\n detModelBINName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelXMLName,\n recModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelBINName,\n recModelBINName,\n baseArtifactsDir\n);\n" + "const detModelXMLPath = '../../assets/models/horizontal-text-detection-0001.xml';\n\n// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.xml'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.bin'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.xml'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.bin'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", "source": [ - "# Dowload Image" + "# Prepare Image for Inference" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imgName = 'intel_rnb.jpg';\nawait downloadFile(imgUrl, imgName, baseImagesDir);\n" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [detInputHeight, detInputWidth] = detInputLayer.shape.slice(2);\nconst resizedImg = img.resize(detInputWidth, detInputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: detInputWidth, height: detInputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/images/intel_rnb.jpg'", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } @@ -86,141 +68,72 @@ { "language": "markdown", "source": [ - "# Load a Detection Model" + "## Run inference on the detection model" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" + "const detInferRequest = detCompiledModel.createInferRequest();\nconst detResult = await detInferRequest.inferAsync([tensor]);\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Prepare Image for Inference" + "# Load Text Recognition Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const imageData = await getImageData(`${baseImagesDir}/intel_rnb.jpg`);\nconst inputImageMat = cv.matFromImageData(imageData);\nconst displayImageMat = inputImageMat.clone();\n\n// Resize the image to meet network input size\nconst [B, C, H, W] = detInputLayer.shape;\nconst resizedImage = new cv.Mat();\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB);\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\n// Prepare input tensor\nconst inputImage = transform(resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]);\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n detInputLayer.shape,\n tensorData\n);\n" + "const recModelXMLPath = '../../assets/models/text-recognition-resnet-fc.xml';\n\n// Load the recognition model and prepare the inference request\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInferRequest = recModelCompiled.createInferRequest();\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Define Post-Processing Functions" + "# Define Post-Processing Functions" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n console.log(`Output shape: ${output.getData()}`);\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n" + "async function performTextRecognition(model, inferenceRequest, img) {\n const inputLayerShape = model.input(0).shape;\n const outputLayer = model.output(0);\n\n const [,, inputHeight, inputWidth] = inputLayerShape;\n const resizedImg = img.resize(inputWidth, inputHeight);\n\n // Convert image to grayscale and create tensor\n const tensor = new ov.Tensor(\n ov.element.f32,\n inputLayerShape,\n new Float32Array(resizedImg.grayscale),\n );\n\n const result = await inferenceRequest.inferAsync([tensor]);\n const recognitionResults = extractRecognitionResults(result[outputLayer]);\n const annotation = parseAnnotations(recognitionResults);\n\n return annotation;\n}\n\n// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => {\n const position = idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return Math.floor(position);\n }\n\n return box.map(scaleShape);\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = '~0123456789abcdefghijklmnopqrstuvwxyz';\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Takes original image and bounding boxes with annotations\n// and returns the image with annotations\nasync function putAnnotationsOnImage(img, boxesWithAnnotations, options) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n let finalImage = img;\n\n for (const item of boxesWithAnnotations) {\n const { box, annotation } = item;\n const conf = box[box.length - 1];\n\n if (conf < threshold) continue;\n\n const [xMin, yMin, xMax, yMax] = box;\n const yOffset = 10;\n\n finalImage = finalImage.drawRect(\n xMin, yMin,\n xMax - xMin, yMax - yMin,\n { color: 'green', width: 3 },\n );\n finalImage = finalImage.drawText(\n annotation,\n xMin, yMin - yOffset,\n { font: '30px Arial' },\n );\n\n if (!confLabels) continue;\n\n finalImage = finalImage.drawText(\n conf.toFixed(2),\n xMin, yMax + 2 * yOffset,\n { font: '20px Arial' },\n );\n }\n\n return finalImage;\n}\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Do Inference" + "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Create infer request\nconst detInferRequest = detCompiledModel.createInferRequest();\n\nconst detResult = await detInferRequest.inferAsync([tensor]);\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n\n// Show original image\ndisplayArrayAsImage(\n displayImageMat.data,\n displayImageMat.cols,\n displayImageMat.rows,\n display\n);\n" + "// Calculate ratios\nconst [ratioX, ratioY] = [img.width / detInputWidth, img.height / detInputHeight];\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n// Resize bounding boxes to the original image size\nconst boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box =>\n [...multiplyByRatio(ratioX, ratioY, box), box[4]]);\n\n// Process each bounding box and run inference on the recognition model\nconst boxesWithAnnotations = [];\nfor (let i = 0; i < boundingBoxesOriginalSizeArray.length; i++) {\n const box = boundingBoxesOriginalSizeArray[i];\n const [xMin, yMin, xMax, yMax] = box;\n const croppedImg = img.crop(xMin, yMin, xMax - xMin, yMax - yMin);\n croppedImg.display(display);\n\n const annotation = await performTextRecognition(recModel, recInferRequest, croppedImg);\n\n boxesWithAnnotations.push({ box, annotation });\n\n console.log(`Box ${i}: [${box.join(',')}], Annotation: '${annotation}'`);\n}\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] - } - ] - }, - { - "language": "markdown", - "source": [ - "# Load Text Recognition Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Loading the text recognition model\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInputLayer = recModelCompiled.input(0);\nconst recOutputLayer = recModelCompiled.output(0);\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "# Define Post-Processing Functions" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Function to calculate the ratios for the image\nfunction calculateRatios(originalImage, resizedImage) {\n const realY = originalImage.rows;\n const realX = originalImage.cols;\n const resizedY = resizedImage.rows;\n const resizedX = resizedImage.cols;\n const ratioX = realX / resizedX;\n const ratioY = realY / resizedY;\n\n return { ratioX, ratioY };\n}\n\n// Function to convert the image to grayscale\nfunction convertToGrayscale(originalImage) {\n const grayscaleImage = new cv.Mat();\n cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY);\n\n return grayscaleImage;\n}\n\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return box.map(scaleShape);\n}\n\n\n// Function to resize and convert a crop to the recognition model input format\nfunction resizeAndConvertCropToModelInput(crop, netShape) {\n const [netWidth, netHeight] = netShape;\n\n // Resize the crop to the network's input shape\n const tempImg = new cv.Mat();\n cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight));\n\n // Create the reshaped buffer\n const reshapedBuffer = new Uint8Array(netHeight * netWidth);\n let index = 0;\n\n for (let i = 0; i < netHeight; i++) {\n for (let j = 0; j < netWidth; j++) {\n reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0];\n }\n }\n\n // Clean up\n tempImg.delete();\n\n return reshapedBuffer;\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [batchSize, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = \"~0123456789abcdefghijklmnopqrstuvwxyz\";\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Function to crop the image based on the bounding box coordinates\nfunction cropImage(originalImage, xMin, yMin, xMax, yMax) {\n xMin = Math.max(0, xMin);\n yMin = Math.max(0, yMin);\n xMax = Math.min(originalImage.cols, xMax);\n yMax = Math.min(originalImage.rows, yMax);\n if (xMin >= xMax || yMin >= yMax) {\n throw new Error('Invalid crop coordinates');\n }\n const roi = originalImage.roi(\n new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin)\n );\n const cropped = new cv.Mat();\n roi.copyTo(cropped);\n roi.delete();\n\n return cropped;\n}\n\n// Function to log the bounding boxes with annotations\nfunction printSortedAnnotations(boxesWithAnnotations) {\n /* Sort the boxes with annotations based\n on their position in the input image */\n const sortedAnnotations = boxesWithAnnotations\n .sort((a, b) => {\n const [aXMin, aYMin] = a.box;\n const [bXMin, bYMin] = b.box;\n\n return (aYMin - bYMin) || (aXMin - bXMin);\n })\n .map(item => item.annotation);\n\n console.log('Sorted Annotations:', sortedAnnotations);\n}\n\n// Get Text size\nfunction getTextSize(text, fontFace, fontScale) {\n const canvas = createCanvas(200, 200);\n const ctx = canvas.getContext('2d');\n const adjustedFontScale = fontScale * 35;\n ctx.font = `${adjustedFontScale}px ${fontFace}`;\n const metrics = ctx.measureText(text);\n const width = metrics.width;\n const height =\n metrics.actualBoundingBoxAscent +\n metrics.actualBoundingBoxDescent;\n\n return { width, height };\n}\n\n/* The convertResultToImage function visualizes object detection\n results on an image by drawing bounding boxes around detected\n objects and optionally adding labels to them. */\nfunction convertResultToImage(\n bgrImage,\n resizedImage,\n boxesWithAnnotations,\n options,\n) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n const colors = {\n red: [255, 0, 0, 255],\n green: [0, 255, 0, 255],\n white: [255, 255, 255, 255]\n };\n const [realY, realX] = [bgrImage.rows, bgrImage.cols];\n const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n const [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n const rgbImage = new cv.Mat();\n cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n boxesWithAnnotations.forEach(({ box, annotation }) => {\n const conf = box[box.length - 1];\n\n if (conf < threshold) return;\n\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box);\n\n cv.rectangle(\n rgbImage,\n new cv.Point(xMin, yMin),\n new cv.Point(xMax, yMax),\n colors.green,\n 3\n );\n\n if (!confLabels) return;\n\n const text = `${annotation}`;\n const fontScale = 0.8;\n const thickness = 1;\n const { width: textW, height: textH } = getTextSize(text, 'Arial', fontScale);\n const imageCopy = rgbImage.clone();\n\n cv.rectangle(\n imageCopy,\n new cv.Point(xMin, yMin - textH - 10),\n new cv.Point(xMin + textW, yMin - 10),\n colors.white,\n cv.FILLED\n );\n cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage);\n cv.putText(\n rgbImage,\n text,\n new cv.Point(xMin, yMin - 10),\n cv.FONT_HERSHEY_SIMPLEX,\n fontScale,\n colors.red,\n thickness,\n cv.LINE_AA\n );\n\n imageCopy.delete();\n\n });\n\n return rgbImage;\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "# Async Inference Helper Function" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "async function inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations,\n) {\n // Create infer request\n const inferRequest = recModelCompiled.createInferRequest();\n\n // Define the completion callback function\n function completionCallback(outputTensor, i, annotations) {\n const recognitionResults = extractRecognitionResults(outputTensor);\n const annotation = parseAnnotations(recognitionResults);\n annotations.push(annotation);\n }\n\n // Start inference in asynchronous mode\n try {\n const result = await inferRequest.inferAsync([tensor]);\n completionCallback(result[recOutputLayer], i, annotations);\n }catch (error) {\n console.error('Error during inference:', error);\n }\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Process each bounding box and run inference on the recognition model\nconst [batchSize, channels, height, width] = recInputLayer.shape;\n// Calculate ratios\nconst {\n ratioX,\n ratioY,\n} = calculateRatios(inputImageMat, resizedImage);\n\n// Convert image to grayscale\nconst grayscaleImage = convertToGrayscale(inputImageMat);\n\nconst annotations = [];\nconst croppedImages = [];\n\n\nfor (let i = 0; i < boundingBoxesArray.length; i++) {\n const crop = boundingBoxesArray[i];\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map(Math.floor);\n const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin);\n const croppedImage = grayscaleImage.roi(cropRect);\n\n try {\n const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [width, height]);\n const tensorData = new Float32Array(preprocessedCrop);\n const tensor = new ov.Tensor(\n ov.element.f32,\n Int32Array.from(recInputLayer.shape),\n tensorData\n );\n\n await inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations\n );\n\n croppedImages.push(\n cropImage(inputImageMat, xMin, yMin, xMax, yMax)\n );\n } catch (error) {\n console.error('Error during preprocessing:', error);\n }\n\n croppedImage.delete();\n}\n\ngrayscaleImage.delete();\n\nconst boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({\n box,\n annotation: annotations[index]\n}));\n\nconst resultImage = convertResultToImage(\n inputImageMat,\n resizedImage,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: true }\n);\n\ndisplayArrayAsImage(\n resultImage.data,\n resultImage.cols,\n resultImage.rows,\n display\n);\n\ncroppedImages.forEach((croppedImage) => {\n displayArrayAsImage(\n croppedImage.data,\n croppedImage.cols,\n croppedImage.rows,\n display\n );\n});\n" - ], - "outputs": [ + }, { "items": [ { "mime": "application/vnd.code.notebook.stdout", "value": [ - "Annotation for box 0: building", - "Cropped Image Size: 159 x 40", - "Annotation for box 1: noyce", - "Original Image Size: 690 x 517", - "Cropping Coordinates: (256, 50) to (377, 88)", - "Cropped Image Size: 121 x 38", - "Cropping Coordinates: (604, 205) to (653, 228)", - "Cropped Image Size: 49 x 23", - "Cropped Image Size: 26 x 32", - "Cropped Image Size: 31 x 23", - "Text: noyce, Width: 74.716796875, Height: 21", - "Text: 2200, Width: 62.2890625, Height: 19", - "Text: robert, Width: 73.14453125, Height: 20", + "Box 0: [391,59,550,99,0,0.518794059753418], Annotation: 'building'", "" ] } @@ -230,15 +143,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] }, { "items": [ { - "mime": "image/jpeg", - "value": "" + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 1: [257,50,377,88,0,0.48566171526908875], Annotation: 'noyce'", + "" + ] } ] }, @@ -246,15 +162,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] }, { "items": [ { - "mime": "image/jpeg", - "value": "" + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 2: [604,205,653,229,0,0.45074450969696045], Annotation: '2200'", + "" + ] } ] }, @@ -262,7 +181,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 3: [22,33,185,74,0,0.3334950804710388], Annotation: 'robert'", + "" + ] } ] }, @@ -270,7 +200,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 4: [506,408,538,431,0,0.32059410214424133], Annotation: 'center'", + "" + ] } ] }, @@ -278,7 +219,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 5: [203,46,229,78,0,0.3125338554382324], Annotation: 'n'", + "" + ] } ] } @@ -287,28 +239,25 @@ { "language": "markdown", "source": [ - "### Print Annotations in Plain Text Format" + "## Display the OCR Results on the original image" ], "outputs": [] }, { "language": "typescript", "source": [ - "printSortedAnnotations(boxesWithAnnotations);\n" + "const annotatedImg = await putAnnotationsOnImage(\n img,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: false },\n);\nannotatedImg.display(display);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Sorted Annotations: [ 'robert', 'n', 'noyce', 'building', '2200', 'center' ]", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } ] } ] -} \ No newline at end of file +} diff --git a/samples/js/node/notebooks/pose-estimation.nnb b/samples/js/node/notebooks/pose-estimation.nnb index 6ca358ed2b0cf4..16774b4514ab11 100644 --- a/samples/js/node/notebooks/pose-estimation.nnb +++ b/samples/js/node/notebooks/pose-estimation.nnb @@ -17,32 +17,17 @@ { "language": "javascript", "source": [ - "const path = require('node:path');\nconst { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst tf = require('@tensorflow/tfjs-node');\nconst {\n getImageData,\n displayArrayAsImage,\n arrayToImageData,\n getImageBuffer,\n transform,\n downloadFile,\n} = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node');\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'human-pose-estimation-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = `https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/3/${modelName}/FP16-INT8/`;\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imageFilename = path.parse(imgUrl).base;\n\nawait downloadFile(imgUrl, imageFilename, '../../assets/images');\n\nconst imagePath = `../../assets/images/${imageFilename}`\n" + "const path = require('node:path');\nconst { display } = require('node-kernel');\nconst tf = require('@tensorflow/tfjs-node');\nconst { addon: ov } = require('openvino-node');\n\nconst { transform } = require('../helpers.js');\nconst Image = require('../image');\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", + "mime": "application/vnd.code.notebook.stderr", "value": [ - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/intel_rnb.jpg'", + "2024-11-04 17:09:35.860051: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.", + "2024-11-04 17:09:35.872537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX_VNNI FMA", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.", "" ] } @@ -60,7 +45,7 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU', { PERFORMANCE_HINT: 'LATENCY' });\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayers = compiledModel.outputs;\n\nconst [height, width] = inputLayer.shape.slice(2);\n\nconst heatmapsOutputKey = 'Mconv7_stage2_L2';\n\nconst THRESHOLD = 0.3;\nconst COLOR = [0, 255, 0, 255];" + "const modelXMLPath = '../../assets/models/human-pose-estimation-0001.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO', { PERFORMANCE_HINT: 'LATENCY' });\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayers = compiledModel.outputs;\n\nconst [height, width] = inputLayer.shape.slice(2);\n\nconst heatmapsOutputKey = 'Mconv7_stage2_L2';\n\nconst THRESHOLD = 0.3;\nconst COLOR = 'rgb(0,255,0)';\n" ], "outputs": [] }, @@ -74,9 +59,18 @@ { "language": "typescript", "source": [ - "const imgData = await getImageData(imagePath);\n\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB);\ncv.resize(image, image, new cv.Size(width, height), cv.INTER_AREA);\n\n// NHWC to NCHW\nconst inputImage = transform(image.data, { width, height }, [0, 1, 2]);" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [inputHeight, inputWidth] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(inputWidth, inputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: inputWidth, height: inputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n" ], - "outputs": [] + "outputs": [ + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] }, { "language": "markdown", @@ -88,7 +82,7 @@ { "language": "typescript", "source": [ - "\n// Utility function to get the coordinates of the maximum value in a tensor\nfunction getCoords(tensor) {\n const { values, indices } = tf.topk(tensor.flatten(), 1);\n const [maxVal] = values.dataSync();\n const [maxIndex] = indices.dataSync();\n const x = maxIndex % tensor.shape[1];\n const y = Math.floor(maxIndex / tensor.shape[1]);\n\n return { x, y, confidence: maxVal };\n}\n\n// Draw keypoints on the input image\nfunction drawKeypoints(image, keypoints) {\n keypoints.forEach(keypoint => {\n if (keypoint.confidence > THRESHOLD) {\n cv.circle(image,\n new cv.Point(keypoint.x, keypoint.y),\n 2,\n COLOR,\n 3,\n );\n }\n });\n}\n\n// Draw skeleton (lines between keypoints) on the input image\nfunction drawSkeleton(image, keypoints) {\n const skeleton = [\n [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10],\n [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17],\n ];\n\n skeleton.forEach(([start, end]) => {\n const startPoint = keypoints[start];\n const endPoint = keypoints[end];\n\n if (startPoint.confidence > THRESHOLD\n && endPoint.confidence > THRESHOLD) {\n cv.line(image,\n new cv.Point(startPoint.x, startPoint.y),\n new cv.Point(endPoint.x, endPoint.y),\n COLOR,\n 2,\n );\n }\n });\n}\n\nfunction toTFTensor(ovTensor) {\n return tf.tensor(ovTensor.data, ovTensor.getShape());\n}" + "// Utility function to get the coordinates of the maximum value in a tensor\nfunction getCoords(tensor) {\n const { values, indices } = tf.topk(tensor.flatten(), 1);\n const [maxVal] = values.dataSync();\n const [maxIndex] = indices.dataSync();\n const x = maxIndex % tensor.shape[1];\n const y = Math.floor(maxIndex / tensor.shape[1]);\n\n return { x, y, confidence: maxVal };\n}\n\n// Draw keypoints on the input image\nfunction drawKeypoints(image, keypoints) {\n let modifiedImage = image;\n\n keypoints.forEach(keypoint => {\n if (keypoint.confidence < THRESHOLD) return;\n\n modifiedImage = modifiedImage.drawCircle(\n keypoint.x,\n keypoint.y,\n 4,\n { color: COLOR, width: 3 },\n );\n });\n\n return modifiedImage;\n}\n\n// Draw skeleton (lines between keypoints) on the input image\nfunction drawSkeleton(image, keypoints) {\n const skeleton = [\n [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10],\n [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17],\n ];\n\n let modifiedImage = image;\n skeleton.forEach(([start, end]) => {\n const startPoint = keypoints[start];\n const endPoint = keypoints[end];\n\n if (startPoint.confidence < THRESHOLD\n || endPoint.confidence < THRESHOLD) return;\n\n modifiedImage = modifiedImage.drawLine(\n startPoint.x,\n startPoint.y,\n endPoint.x,\n endPoint.y,\n { color: COLOR, width: 3 },\n );\n });\n\n return modifiedImage;\n}\n\nfunction toTFTensor(ovTensor) {\n return tf.tensor(ovTensor.data, ovTensor.getShape());\n}" ], "outputs": [] }, @@ -102,7 +96,7 @@ { "language": "typescript", "source": [ - "const tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\nconst outputs = inferRequest.infer();\n\nconst heatmaps = outputs[heatmapsOutputKey];" + "const inferRequest = compiledModel.createInferRequest();\nconst outputs = await inferRequest.inferAsync([tensor]);\n\nconst heatmaps = outputs[heatmapsOutputKey];" ], "outputs": [] }, @@ -116,18 +110,30 @@ { "language": "typescript", "source": [ - "try {\n const outputHeatmapShape = heatmaps.getShape();\n\n // Define constants\n const numKeypoints = 18;\n const heatmapWidth = outputHeatmapShape[3];\n const heatmapHeight = outputHeatmapShape[2];\n\n // Extract keypoints from heatmaps\n let keypoints = [];\n const heatmapsTFTensors = toTFTensor(heatmaps);\n const xCoef = originalWidth / heatmapWidth;\n const yCoef = originalHeight / heatmapHeight;\n\n for (let i = 0; i < numKeypoints; i++) {\n const heatmap = heatmapsTFTensors.slice(\n [0, i, 0, 0],\n [1, 1, heatmapHeight, heatmapWidth]).squeeze();\n\n const { x, y, confidence } = getCoords(heatmap);\n\n keypoints.push({\n x: x * xCoef,\n y: y * yCoef,\n confidence,\n });\n }\n\n drawKeypoints(originalImage, keypoints);\n drawSkeleton(originalImage, keypoints);\n\n displayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n} catch(e) {\n console.log(e);\n}" + "try {\n const [N, C, heatmapHeight, heatmapWidth] = heatmaps.getShape();\n\n // Define constants\n const numKeypoints = 18;\n\n // Extract keypoints from heatmaps\n let keypoints = [];\n const heatmapsTFTensors = toTFTensor(heatmaps);\n const xCoef = img.width / heatmapWidth;\n const yCoef = img.height / heatmapHeight;\n\n for (let i = 0; i < numKeypoints; i++) {\n const heatmap = heatmapsTFTensors.slice(\n [0, i, 0, 0],\n [1, 1, heatmapHeight, heatmapWidth]).squeeze();\n\n const { x, y, confidence } = getCoords(heatmap);\n\n keypoints.push({\n x: x * xCoef,\n y: y * yCoef,\n confidence,\n });\n }\n\n let imgWithKeypoints = drawKeypoints(img, keypoints);\n imgWithKeypoints = drawSkeleton(imgWithKeypoints, keypoints);\n\n imgWithKeypoints.display(display);\n} catch(e) {\n console.log(e);\n}" ], "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stderr", + "value": [ + "(node:3778163) [DEP0051] DeprecationWarning: The `util.isNullOrUndefined` API is deprecated. Please use `arg === null || arg === undefined` instead.", + "(Use `node --trace-deprecation ...` to show where the warning was created)", + "" + ] + } + ] + }, { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } ] } ] -} \ No newline at end of file +} diff --git a/samples/js/node/notebooks/question-answering.nnb b/samples/js/node/notebooks/question-answering.nnb index 3b53ca163f0826..c1bf61063b5e15 100644 --- a/samples/js/node/notebooks/question-answering.nnb +++ b/samples/js/node/notebooks/question-answering.nnb @@ -17,62 +17,10 @@ { "language": "javascript", "source": [ - "const {\n exp,\n sum,\n tril,\n triu,\n argMax,\n reshape,\n getShape,\n downloadFile,\n extractValues,\n matrixMultiplication,\n} = require('../helpers.js');\nconst tokens = require('./tokens_bert.js');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const {\n exp,\n sum,\n tril,\n triu,\n argMax,\n reshape,\n getShape,\n downloadFile,\n extractValues,\n matrixMultiplication,\n} = require('../helpers.js');\nconst Tokenizer = require('./tokenizer_bert.js');\n\nconst { addon: ov } = require('openvino-node');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'bert-small-uncased-whole-word-masking-squad-int8-0002';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/bert-small-uncased-whole-word-masking-squad-int8-0002/FP16-INT8/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/bert-small-uncased-whole-word-masking-squad-int8-0002.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download the Vocab" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseImagesDir = '../../assets/text';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/bert-uncased/vocab.txt';\n\nawait downloadFile(imgUrl, 'vocab.txt', baseImagesDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/text/vocab.txt'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,36 +31,10 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\n\nconst _ppp = new ov.preprocess.PrePostProcessor(model);\n_ppp.input(0).tensor().setElementType(ov.element.f32);\n_ppp.input(1).tensor().setElementType(ov.element.f32);\n_ppp.input(2).tensor().setElementType(ov.element.f32);\n_ppp.input(3).tensor().setElementType(ov.element.f32);\n_ppp.build();\n\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputs = compiledModel.inputs;\nconst outputs = compiledModel.outputs;\n\nconst inputSize = compiledModel.input(0).shape[1];\n" + "const modelXMLPath =\n '../../assets/models/bert-small-uncased-whole-word-masking-squad-0001.xml',\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\n\nconst _ppp = new ov.preprocess.PrePostProcessor(model);\n_ppp.input(0).tensor().setElementType(ov.element.f32);\n_ppp.input(1).tensor().setElementType(ov.element.f32);\n_ppp.input(2).tensor().setElementType(ov.element.f32);\n_ppp.build();\n\nconst compiledModel = await core.compileModel(model, 'AUTO');\n\nconst { inputs, outputs } = compiledModel;\nconst inputSize = compiledModel.input(0).shape[1];\n\nconsole.log('Input size:', inputSize);\n" ], "outputs": [] }, - { - "language": "javascript", - "source": [ - "console.log('=== Model Inputs:');\ninputs.forEach(i => console.log(`${i}`));\nconsole.log('=== Model Outputs:');\noutputs.forEach(o => console.log(`${o}`));\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "=== Model Inputs:", - "input_ids", - "attention_mask", - "token_type_ids", - "position_ids", - "=== Model Outputs:", - "output_s", - "output_e", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -123,14 +45,14 @@ { "language": "javascript", "source": [ - "// The path to the vocabulary file.\nconst vocabFilePath = \"../../assets/text/vocab.txt\";\n\n// Create a dictionary with words and their indices.\nconst vocab = await tokens.loadVocabFile(vocabFilePath);\n\n// Define special tokens.\nconst clsToken = vocab[\"[CLS]\"];\nconst padToken = vocab[\"[PAD]\"];\nconst sepToken = vocab[\"[SEP]\"];\n\n// A function to load text from given urls.\nfunction loadContext(sources) {\n const input_urls = [];\n const paragraphs = [];\n \n for (source of sources) {\n paragraphs.push(source);\n\n // Produce one big context string.\n return paragraphs.join('\\n');\n }\n}\n" + "// Initialize BERT tokenizer\nconst tokenizer = await Tokenizer.load('../../assets/vocab/vocab.txt');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Preprocessing\n\nThe input size in this case is 384 tokens long. The main input (`input_ids`) to used BERT model consists of two parts: question tokens and context tokens separated by some special tokens. \n\nIf `question + context` are shorter than 384 tokens, padding tokens are added. If `question + context` is longer than 384 tokens, the context must be split into parts and the question with different parts of context must be fed to the network many times. \n\nUse overlapping, so neighbor parts of the context are overlapped by half size of the context part (if the context part equals 300 tokens, neighbor context parts overlap with 150 tokens). You also need to provide the following sequences of integer values: \n\n- `attention_mask` - a sequence of integer values representing the mask of valid values in the input. \n- `token_type_ids` - a sequence of integer values representing the segmentation of `input_ids` into question and context. \n- `position_ids` - a sequence of integer values from 0 to 383 representing the position index for each input token. \n\nFor more information, refer to the **Input** section of [BERT model documentation](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/bert-small-uncased-whole-word-masking-squad-int8-0002#input)." + "## Preprocessing\n\nThe input size in this case is 384 tokens long. The main input (`input_ids`) to used BERT model consists of two parts: question tokens and context tokens separated by some special tokens. \n\nIf `question + context` are shorter than 384 tokens, padding tokens are added. If `question + context` is longer than 384 tokens, the context must be split into parts and the question with different parts of context must be fed to the network many times. \n\nUse overlapping, so neighbor parts of the context are overlapped by half size of the context part (if the context part equals 300 tokens, neighbor context parts overlap with 150 tokens). You also need to provide the following sequences of integer values: \n\n- `attention_mask` - a sequence of integer values representing the mask of valid values in the input. \n- `token_type_ids` - a sequence of integer values representing the segmentation of `input_ids` into question and context.\n\nFor more information, refer to the **Input** section of [BERT model documentation](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/bert-small-uncased-whole-word-masking-squad-int8-0002#input)." ], "outputs": [] }, @@ -158,14 +80,14 @@ { "language": "javascript", "source": [ - "// A function to add padding.\nfunction pad({ inputIds, attentionMask, tokenTypeIds }) {\n // How many padding tokens.\n const diffInputSize = inputSize - inputIds.length;\n\n if (diffInputSize > 0) {\n // Add padding to all the inputs.\n inputIds = inputIds.concat(Array(diffInputSize).fill(padToken));\n attentionMask = attentionMask.concat(Array(diffInputSize).fill(0));\n tokenTypeIds = tokenTypeIds.concat(Array(diffInputSize).fill(0));\n }\n\n return [inputIds, attentionMask, tokenTypeIds, diffInputSize];\n}\n" + "// A function to add padding.\nfunction pad({ inputIds, attentionMask, tokenTypeIds }) {\n // How many padding tokens.\n const diffInputSize = inputSize - inputIds.length;\n\n if (diffInputSize > 0) {\n // Add padding to all the inputs.\n inputIds = inputIds.concat(Array(diffInputSize).fill(tokenizer.padToken));\n attentionMask = attentionMask.concat(Array(diffInputSize).fill(0));\n tokenTypeIds = tokenTypeIds.concat(Array(diffInputSize).fill(0));\n }\n\n return [inputIds, attentionMask, tokenTypeIds, diffInputSize];\n}\n" ], "outputs": [] }, { "language": "javascript", "source": [ - "// A generator of a sequence of inputs.\nfunction* prepareInput(questionTokens, contextTokens) {\n // A length of question in tokens.\n const questionLen = questionTokens.length;\n // The context part size.\n const contextLen = inputSize - questionLen - 3;\n\n if (contextLen < 16)\n throw new Error('Question is too long in comparison to input size. No space for context');\n\n const inputLayerNames = inputs.map(i => i.toString());\n\n // Take parts of the context with overlapping by 0.5.\n const max = Math.max(1, contextTokens.length - contextLen);\n\n for (let start = 0; start < max; start += parseInt(contextLen / 2)) {\n // A part of the context.\n const partContextTokens = contextTokens.slice(start, start + contextLen);\n // The input: a question and the context separated by special tokens.\n let inputIds = [clsToken, ...questionTokens, sepToken, ...partContextTokens, sepToken];\n // 1 for any index if there is no padding token, 0 otherwise.\n let attentionMask = Array(inputIds.length).fill(1);\n // 0 for question tokens, 1 for context part.\n let tokenTypeIds = [...Array(questionLen + 2).fill(0), ...Array(partContextTokens.length + 1).fill(1)];\n\n let padNumber = 0;\n\n // Add padding at the end.\n [inputIds, attentionMask, tokenTypeIds, padNumber] = pad({ inputIds, attentionMask, tokenTypeIds });\n\n // Create an input to feed the model.\n const inputDict = {\n 'input_ids': new Float32Array(inputIds),\n 'attention_mask': new Float32Array(attentionMask),\n 'token_type_ids': new Float32Array(tokenTypeIds),\n };\n\n // Some models require additional position_ids.\n if (inputLayerNames.includes('position_ids')) {\n positionIds = inputIds.map((_, index) => index);\n inputDict['position_ids'] = new Float32Array(positionIds);\n }\n\n yield [inputDict, padNumber, start];\n }\n}\n" + "// A generator of a sequence of inputs.\nfunction* prepareInput(questionTokens, contextTokens) {\n // A length of question in tokens.\n const questionLen = questionTokens.length;\n // The context part size.\n const contextLen = inputSize - questionLen - 3;\n\n if (contextLen < 16)\n throw new Error('Question is too long in comparison to input size. No space for context');\n\n const inputLayerNames = inputs.map(i => i.toString());\n\n // Take parts of the context with overlapping by 0.5.\n const max = Math.max(1, contextTokens.length - contextLen);\n\n for (let start = 0; start < max; start += parseInt(contextLen / 2)) {\n // A part of the context.\n const partContextTokens = contextTokens.slice(start, start + contextLen);\n // The input: a question and the context separated by special tokens.\n let inputIds = [\n tokenizer.clsToken,\n ...questionTokens,\n tokenizer.sepToken,\n ...partContextTokens,\n tokenizer.sepToken,\n ];\n // 1 for any index if there is no padding token, 0 otherwise.\n let attentionMask = Array(inputIds.length).fill(1);\n // 0 for question tokens, 1 for context part.\n let tokenTypeIds = [...Array(questionLen + 2).fill(0), ...Array(partContextTokens.length + 1).fill(1)];\n\n let padNumber = 0;\n\n // Add padding at the end.\n [inputIds, attentionMask, tokenTypeIds, padNumber] = pad({ inputIds, attentionMask, tokenTypeIds });\n\n // Create an input to feed the model.\n const inputDict = {\n 'input_ids': new Float32Array(inputIds),\n 'attention_mask': new Float32Array(attentionMask),\n 'token_type_ids': new Float32Array(tokenTypeIds),\n };\n\n // Some models require additional position_ids.\n if (inputLayerNames.includes('position_ids')) {\n positionIds = inputIds.map((_, index) => index);\n inputDict['position_ids'] = new Float32Array(positionIds);\n }\n\n yield [inputDict, padNumber, start];\n }\n}\n" ], "outputs": [] }, @@ -186,7 +108,7 @@ { "language": "javascript", "source": [ - "function getBestAnswer(question, context) {\n // Convert the context string to tokens.\n const [contextTokens, contextTokensStartEnd] = tokens.textToTokens(context.toLowerCase(), vocab);\n // Convert the question string to tokens.\n const [questionTokens] = tokens.textToTokens(question.toLowerCase(), vocab);\n\n const results = [];\n // Iterate through different parts of the context.\n for ([networkInput, padding, startIdx] of prepareInput(questionTokens, contextTokens)) {\n // Get output layers.\n const outputStartKey = compiledModel.output('output_s');\n const outputEndKey = compiledModel.output('output_e');\n\n // OpenVINO inference.\n const inferRequest = compiledModel.createInferRequest();\n\n const transformedInput = {\n 'input_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['input_ids']),\n 'attention_mask': new ov.Tensor(ov.element.f32, [1, 384], networkInput['attention_mask']),\n 'token_type_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['token_type_ids']),\n 'position_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['position_ids']),\n }\n\n inferRequest.infer(transformedInput);\n\n const resultStart = inferRequest.getTensor(outputStartKey).data;\n const resultEnd = inferRequest.getTensor(outputEndKey).data;\n\n // Postprocess the result, getting the score and context range for the answer.\n const scoreStartEnd = postprocess(resultStart,\n resultEnd,\n questionTokens,\n contextTokensStartEnd,\n padding,\n startIdx);\n results.push(scoreStartEnd);\n }\n\n // Find the highest score.\n const scores = results.map(r => r[0]);\n const maxIndex = scores.indexOf(Math.max(scores));\n\n const answer = results[maxIndex];\n // Return the part of the context, which is already an answer.\n return [context.slice(answer[1], answer[2]), answer[0]];\n}\n" + "function getBestAnswer(question, context) {\n // Convert the context string to tokens.\n const [contextTokens, contextTokensStartEnd] = tokenizer.tokenize(context.toLowerCase());\n // Convert the question string to tokens.\n const [questionTokens] = tokenizer.tokenize(question.toLowerCase());\n\n // Get output layers.\n const outputStartKey = compiledModel.output('output_s');\n const outputEndKey = compiledModel.output('output_e');\n\n const inferRequest = compiledModel.createInferRequest();\n\n const results = [];\n const preparedInput = prepareInput(questionTokens, contextTokens);\n\n // Iterate through different parts of the context.\n for ([networkInput, padding, startIdx] of preparedInput) {\n // OpenVINO inference\n inferRequest.infer({\n 'input_ids': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['input_ids']),\n 'attention_mask': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['attention_mask']),\n 'token_type_ids': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['token_type_ids']),\n });\n\n const resultStartData = inferRequest.getTensor(outputStartKey).data;\n const resultEndData = inferRequest.getTensor(outputEndKey).data;\n\n // Postprocess the result, getting the score and context range for the answer.\n const scoreStartEnd = postprocess(resultStartData,\n resultEndData,\n questionTokens,\n contextTokensStartEnd,\n padding,\n startIdx);\n\n results.push(scoreStartEnd);\n }\n\n // Find the highest score.\n const scores = results.map(r => r[0]);\n const maxIndex = argMax(scores);\n\n const answer = results[maxIndex];\n // Return the part of the context, which is already an answer.\n return [context.slice(answer[1], answer[2]), answer[0]];\n}\n" ], "outputs": [] }, @@ -200,23 +122,9 @@ { "language": "javascript", "source": [ - "function runQuestionAnswering(sources, exampleQuestion) {\n console.log(`Context: ${sources}`);\n const context = loadContext(sources);\n\n if (!context.length)\n return console.log('Error: Empty context or outside paragraphs');\n\n if (exampleQuestion) {\n const startTime = process.hrtime.bigint();\n const [answer, score] = getBestAnswer(exampleQuestion, context);\n const execTime = Number(process.hrtime.bigint() - startTime) / 1e9;\n\n console.log(`Question: ${exampleQuestion}`);\n console.log(`Answer: ${answer}`);\n console.log(`Score: ${score}`);\n console.log(`Time: ${execTime}s`);\n }\n}\n\nconst sources = [\"Computational complexity theory is a branch of the theory of computation in theoretical computer \" +\n \"science that focuses on classifying computational problems according to their inherent difficulty, \" +\n \"and relating those classes to each other. A computational problem is understood to be a task that \" +\n \"is in principle amenable to being solved by a computer, which is equivalent to stating that the \" +\n \"problem may be solved by mechanical application of mathematical steps, such as an algorithm.\"]\n\nrunQuestionAnswering(sources, 'What is the term for a task that generally lends itself to being solved by a computer?');\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Context: Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm.", - "Score: 0.5286847737759395", - "Time: 0.045750747s", - "" - ] - } - ] - } - ] + "function runQuestionAnswering(sources, exampleQuestion) {\n const context = sources.join('\\n');\n\n if (!context.length)\n return console.log('Error: Empty context or outside paragraphs');\n\n if (exampleQuestion) {\n const startTime = process.hrtime.bigint();\n const [answer, score] = getBestAnswer(exampleQuestion, context);\n const execTime = Number(process.hrtime.bigint() - startTime) / 1e9;\n\n console.log(`Question: ${exampleQuestion}`);\n console.log(`Answer: ${answer}`);\n console.log(`Score: ${score}`);\n console.log(`Time: ${execTime}s`);\n }\n}\n\nconst context = [\n 'Computational complexity theory is a branch of the theory of computation in ' +\n 'theoretical computer science that focuses on classifying computational ' +\n 'problems according to their inherent difficulty and relating those classes ' +\n 'to each other. A computational problem is understood to be a task that is in ' +\n 'principle amenable to being solved by a computer, which is equivalent to ' +\n 'stating that the problem may be solved by mechanical application of ' +\n 'mathematical steps, such as an algorithm.',\n];\n\nconst question = 'What is the term for a task that generally lends itself to being solved by a computer?';\n\ntry {\n runQuestionAnswering(context, question);\n} catch (error) {\n console.error(error);\n}\n" + ], + "outputs": [] } ] -} +} \ No newline at end of file diff --git a/samples/js/node/notebooks/tokens_bert.js b/samples/js/node/notebooks/tokenizer_bert.js similarity index 79% rename from samples/js/node/notebooks/tokens_bert.js rename to samples/js/node/notebooks/tokenizer_bert.js index 10dc250abe8e51..b23bcbfc8519d2 100644 --- a/samples/js/node/notebooks/tokens_bert.js +++ b/samples/js/node/notebooks/tokenizer_bert.js @@ -1,10 +1,39 @@ const fs = require('node:fs/promises'); -exports.cleanWord = cleanWord; -exports.encodeByVoc = encodeByVoc; -exports.textToTokens = textToTokens; -exports.splitToWords = splitToWords; -exports.loadVocabFile = loadVocabFile; +class Tokenizer { + constructor(vocab, original) { + this.vocab = vocab; + this.original = original; + } + + get clsToken() { + return this.vocab["[CLS]"]; + } + + get padToken() { + return this.vocab["[PAD]"]; + } + + get sepToken() { + return this.vocab["[SEP]"]; + } + + tokenize(text) { + return textToTokens(text, this.vocab); + } + + detokenize(tokens) { + return tokens.map(t => this.original[t]).join(' '); + } + + static async load(path) { + const { vocab, original } = await loadVocabFile(path); + + return new Tokenizer(vocab, original); + } +} + +module.exports = Tokenizer; // Load vocabulary file for encoding async function loadVocabFile(vocabFileName) { @@ -17,7 +46,7 @@ async function loadVocabFile(vocabFileName) { vocab[token] = index; }); - return vocab; + return { vocab, original: lines }; } // Remove mark and control chars diff --git a/samples/js/node/notebooks/vision-background-removal.nnb b/samples/js/node/notebooks/vision-background-removal.nnb index 92e4da28d1b4e9..fea5ba9fc5dd68 100644 --- a/samples/js/node/notebooks/vision-background-removal.nnb +++ b/samples/js/node/notebooks/vision-background-removal.nnb @@ -10,38 +10,23 @@ { "language": "typescript", "source": [ - "const { cv } = require(\"opencv-wasm\");\nconst fs = require(\"fs\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst {\n downloadFile,\n getImageData,\n transform,\n setShape,\n displayArrayAsImage,\n} = require(\"../helpers\");\n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst { transform } = require('../helpers');\nconst Image = require('../image');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Download Images and Model" + "## Load and Compile Unet Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseArtifactsDir = \"../../assets/models\";\nconst baseImagesDir = \"../../assets/images\";\n\nconst modelXMLName = \"unet_ir_model.xml\";\nconst modelBINName = \"unet_ir_model.bin\";\nconst modelXMLPath = `${baseArtifactsDir}/${modelXMLName}`;\nconst modelBinPath = `${baseArtifactsDir}/${modelBINName}`;\n\nconsole.log(`Model XML path: ${modelXMLPath}`);\nconsole.log(`Model BIN path: ${modelBinPath}`);\n\nconst foregroundImgUrl =\n \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_hollywood.jpg\";\nconst backgroundImgUrl =\n \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/wall.jpg\";\nconst modelBaseURL =\n \"https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/vision-background-removal/\";\n\nawait downloadFile(foregroundImgUrl, \"coco_hollywood.jpg\", baseImagesDir);\nawait downloadFile(backgroundImgUrl, \"wall.jpg\", baseImagesDir);\n\nawait downloadFile(modelBaseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(modelBaseURL + modelBINName, modelBINName, baseArtifactsDir);\n" + "const modelXMLPath = '../../assets/models/unet_ir_model.xml';\n\nconst core = new ov.Core();\n\n// Read and compile model\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n\nconsole.log(`inputLayer: ${inputLayer}`);\nconsole.log(`outputLayer: ${outputLayer}`);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Model XML path: ../assets/models/unet_ir_model.xml", - "Model BIN path: ../assets/models/unet_ir_model.bin", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample-clone/ocr-node-sample/assets/images/coco_hollywood.jpg'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample-clone/ocr-node-sample/assets/images/wall.jpg'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -53,37 +38,10 @@ { "language": "typescript", "source": [ - "function normalizeImage(imageData, width, height) {\n // Mean and scale values\n const inputMean = [123.675, 116.28, 103.53];\n const inputScale = [58.395, 57.12, 57.375];\n\n const normalizedData = new Float32Array(imageData.length);\n const channels = 3;\n\n for (let i = 0; i < height; i++) {\n for (let j = 0; j < width; j++) {\n for (let c = 0; c < channels; c++) {\n const index = i * width * channels + j * channels + c;\n normalizedData[index] =\n (imageData[index] - inputMean[c]) / inputScale[c];\n }\n }\n }\n\n return normalizedData;\n}\n\nfunction removeBackground(mask, image) {\n // Iterate over the mask and set all background pixels to white\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 0) {\n image.ucharPtr(i, j)[0] = 255;\n image.ucharPtr(i, j)[1] = 255;\n image.ucharPtr(i, j)[2] = 255;\n }\n }\n }\n}\n\nfunction removeForeground(mask, image) {\n // Iterate over the mask and set all foreground pixels to black\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 1) {\n image.ucharPtr(i, j)[0] = 0;\n image.ucharPtr(i, j)[1] = 0;\n image.ucharPtr(i, j)[2] = 0;\n } else {\n image.ucharPtr(i, j)[0] = image.ucharPtr(i, j)[0];\n image.ucharPtr(i, j)[1] = image.ucharPtr(i, j)[1];\n image.ucharPtr(i, j)[2] = image.ucharPtr(i, j)[2];\n }\n }\n }\n}\n\nfunction combineImages(mask, fgImage, bgImage, newImage) {\n // Iterate over the mask and combine the foreground and background images\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 1) {\n newImage.ucharPtr(i, j)[0] = fgImage.ucharPtr(i, j)[0];\n newImage.ucharPtr(i, j)[1] = fgImage.ucharPtr(i, j)[1];\n newImage.ucharPtr(i, j)[2] = fgImage.ucharPtr(i, j)[2];\n } else {\n newImage.ucharPtr(i, j)[0] = bgImage.ucharPtr(i, j)[0];\n newImage.ucharPtr(i, j)[1] = bgImage.ucharPtr(i, j)[1];\n newImage.ucharPtr(i, j)[2] = bgImage.ucharPtr(i, j)[2];\n }\n }\n }\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Load and Compile Unet Model" + "// Details about this normalization:\n// https://docs.openvino.ai/2024/notebooks/vision-background-removal-with-output.html#load-and-pre-process-input-image\nfunction normalizeImage(imageData, width, height) {\n // Mean and scale values\n const inputMean = [123.675, 116.28, 103.53];\n const inputScale = [58.395, 57.12, 57.375];\n\n const normalizedData = new Float32Array(imageData.length);\n const channels = 3;\n\n for (let i = 0; i < height; i++) {\n for (let j = 0; j < width; j++) {\n for (let c = 0; c < channels; c++) {\n const index = i * width * channels + j * channels + c;\n\n normalizedData[index] =\n (imageData[index] - inputMean[c]) / inputScale[c];\n }\n }\n }\n\n return normalizedData;\n}" ], "outputs": [] }, - { - "language": "typescript", - "source": [ - "const core = new ov.Core();\n\n// Read and compile model\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, \"CPU\");\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n\nconsole.log(`inputLayer: ${inputLayer}`);\nconsole.log(`outputLayer: ${outputLayer}`);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "inputLayer: x", - "outputLayer: test_0", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -94,7 +52,7 @@ { "language": "typescript", "source": [ - "// Get Image data from the foreground image\n\nconst fgrImageData = await getImageData(`${baseImagesDir}/coco_hollywood.jpg`);\nconst inputImageMat = cv.matFromImageData(fgrImageData);\nconst originalImageDisplayMat = inputImageMat.clone();\n\n// Convert the image shape to a shape and a data type expected by the network\nconst [B, C, H, W] = inputLayer.shape;\nconst resizedImage = new cv.Mat();\n\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\nlet inputImage = transform(\n resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]\n);\n\ninputImage = normalizeImage(inputImage, W, H);\n\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n\n" + "const foregroundImagePath = '../../assets/images/coco_hollywood.jpg';\n\n// Load foreground image\nconst originalImg = await Image.load(foregroundImagePath);\n\n// Resize image to a shape expected by the network\nconst [modelInputHeight, modelInputWidth] = inputLayer.shape.slice(2);\nconst resized = await originalImg.resize(modelInputWidth, modelInputHeight);\n\n// Create a tensor from the normalized input image\nconst transformed = transform(\n resized.rgb,\n {\n width: modelInputWidth,\n height: modelInputHeight\n },\n [0, 1, 2]\n);\nconst normalizedInputImage = normalizeImage(\n transformed,\n modelInputWidth,\n modelInputHeight,\n);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, normalizedInputImage);\n" ], "outputs": [] }, @@ -108,26 +66,9 @@ { "language": "typescript", "source": [ - "// Do inference\nconst inferRequest = compiledModel.createInferRequest();\nconst inferResult = await inferRequest.inferAsync([tensor]);\n\nconst { data } = inferResult[outputLayer];\nconst reshapedResult = setShape(data, [512, 512]);\n\n// Create a Mat from the reshaped result\nconst reshapedMat = cv.matFromArray(512, 512, cv.CV_32F, reshapedResult.flat());\n\n// Get the height and width of the original image\nconst height = inputImageMat.rows;\nconst width = inputImageMat.cols;\n\n// Resize the inference result to the original image size\nconst resizedResult = new cv.Mat();\ncv.resize(\n reshapedMat,\n resizedResult,\n new cv.Size(width, height),\n 0,\n 0,\n cv.INTER_LINEAR\n);\n\n// Convert the resized result to uint8\nresizedResult.convertTo(resizedResult, cv.CV_8U);\n\n// Create a Mat to store the background removed result\nconst bgRemovedResult = originalImageDisplayMat.clone();\n\nremoveBackground(resizedResult, bgRemovedResult);\n\ndisplayArrayAsImage(\n originalImageDisplayMat.data,\n originalImageDisplayMat.cols,\n originalImageDisplayMat.rows,\n display\n);\ndisplayArrayAsImage(\n bgRemovedResult.data,\n bgRemovedResult.cols,\n bgRemovedResult.rows,\n display\n);\n" + "const inferRequest = compiledModel.createInferRequest();\n const inferResult = await inferRequest.inferAsync([tensor]);\n const { data: resultData } = inferResult[outputLayer];\n\n // Normalize the result data from grayscale to RGB\n const rgbData = [];\n for (let i = 0; i < resultData.length; i += 1) {\n const value = resultData[i] * 255;\n\n rgbData.push(value, value, value, 255);\n }\n\n // Create image based on result data\n const [outputHeight, outputWidth] = outputLayer.shape.slice(2);\n const maskImg = await Image.fromArray(rgbData, outputWidth, outputHeight);\n\n // Resize the result mask to the original image size and save it\n const { width, height } = originalImg;\n const resizedMaskImg = await maskImg.resize(originalImg.width, originalImg.height);\n resizedMaskImg.display(display);\n\n // Remove the foreground from the original image\n const removedBgImg = Image.mask(originalImg, resizedMaskImg);\n removedBgImg.display(display);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - }, - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -139,26 +80,9 @@ { "language": "typescript", "source": [ - "// Get the background image data\nconst bgrImageData = await getImageData(`${baseImagesDir}/wall.jpg`);\nconst bgrImageMat = cv.matFromImageData(bgrImageData);\n\n// Resize the background image to the original image size\nconst resizedBgrImageMat = new cv.Mat();\ncv.resize(bgrImageMat, resizedBgrImageMat, new cv.Size(width, height));\n\n// Remove the foreground from the background image by\n// setting all foreground pixels to white\nremoveForeground(resizedResult, resizedBgrImageMat);\n\ndisplayArrayAsImage(\n resizedBgrImageMat.data,\n resizedBgrImageMat.cols,\n resizedBgrImageMat.rows,\n display\n);\n\n// create a new Mat to store the final image\nconst newImage = new cv.Mat(\n resizedBgrImageMat.rows,\n resizedBgrImageMat.cols,\n cv.CV_8UC3\n);\n\n// combine the foreground and background images to get the final image\ncombineImages(resizedResult, bgRemovedResult, resizedBgrImageMat, newImage);\n\ndisplayArrayAsImage(newImage.data, newImage.cols, newImage.rows, display);\n" + "const backgroundImagePath = '../../assets/images/wall.jpg';\n\n// Load the background image\nconst bgrImage = await Image.load(backgroundImagePath);\n\n// Resize the background image to the same size as the original image\nconst resizedBgrImage = bgrImage.resize(width, height);\n\n// Remove object from the background image\nconst removedFgImg = Image.mask(resizedBgrImage, resizedMaskImg.invert());\nremovedFgImg.display(display);\n\n// Combine the background and foreground images\nconst resultImg = Image.merge(removedBgImg, removedFgImg);\nresultImg.display(display);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - }, - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] } ] } \ No newline at end of file diff --git a/samples/js/node/optical_character_recognition/README.md b/samples/js/node/optical_character_recognition/README.md index 130566ca0bcd4c..c397bb3c846db3 100644 --- a/samples/js/node/optical_character_recognition/README.md +++ b/samples/js/node/optical_character_recognition/README.md @@ -1,6 +1,10 @@ # Optical Character Recognition Node.js Sample -Run: +Run sample: ```bash -node hello_reshape_ssd.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* AUTO -``` \ No newline at end of file +node optical-character-recognition.js ../../assets/models/horizontal-text-detection-0001.xml ../../assets/models/text-recognition-resnet-fc.xml ../../assets/images/intel_rnb.jpg AUTO +``` +Where: +```bash +node optical-character-recognition.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* *device* +``` diff --git a/samples/js/node/optical_character_recognition/optical-character-recognition.js b/samples/js/node/optical_character_recognition/optical-character-recognition.js index 5e371c1975a993..a5665afc67fa84 100644 --- a/samples/js/node/optical_character_recognition/optical-character-recognition.js +++ b/samples/js/node/optical_character_recognition/optical-character-recognition.js @@ -1,14 +1,10 @@ -const { addon: ov } = require('openvino-node'); -const fs = require('node:fs'); const path = require('node:path'); -const { createCanvas, ImageData } = require('canvas'); -const { cv } = require('opencv-wasm'); -const { - transform, - getImageData, - argMax, - setShape, -} = require('../helpers.js'); +const { addon: ov } = require('openvino-node'); + +const Image = require('../image.js'); +const { transform, argMax, setShape } = require('../helpers.js'); + +const OUTPUT_PATH = './output/'; if (require.main === module) { // Parsing and validation of input arguments @@ -32,110 +28,91 @@ if (require.main === module) { } async function main(detModelXMLPath, recModelXMLPath, imagePath, deviceName) { - // Initialize OpenVINO core and load the detection mode + // Initialize OpenVINO Core const core = new ov.Core(); + // Load the detection model const detModel = await core.readModel(detModelXMLPath); const detCompiledModel = await core.compileModel(detModel, deviceName); const detInputLayer = detCompiledModel.input(0); const detOutputLayer = detCompiledModel.output('boxes'); - const imageData = await getImageData(imagePath); - const inputImageMat = cv.matFromImageData(imageData); + const img = await Image.load(imagePath); // Resize the image to meet network input size - const [, , H, W] = detInputLayer.shape; - const resizedImage = new cv.Mat(); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB); - cv.resize(inputImageMat, resizedImage, new cv.Size(W, H)); + const [, , detInputHeight, detInputWidth] = detInputLayer.shape; + const resizedImg = img.resize(detInputWidth, detInputHeight); // Prepare input tensor - const inputImage = transform( - resizedImage.data, - { width: W, height: H }, + const inputImageTransformedData = transform( + resizedImg.rgb, + { width: detInputWidth, height: detInputHeight }, [0, 1, 2], ); - const tensorData = new Float32Array(inputImage); + const tensorData = new Float32Array(inputImageTransformedData); const tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData); + // Run inference on the detection model const detInferRequest = detCompiledModel.createInferRequest(); - const detResult = await detInferRequest.inferAsync([tensor]); - const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]); + // Load the recognition model const recModel = await core.readModel(recModelXMLPath); const recModelCompiled = await core.compileModel(recModel, deviceName); - const recInputLayer = recModelCompiled.input(0); - const recOutputLayer = recModelCompiled.output(0); + const recInferRequest = recModelCompiled.createInferRequest(); - // Process each bounding box and run inference on the recognition model - const [, , height, width] = recInputLayer.shape; // Calculate ratios - const { ratioX, ratioY } = calculateRatios(inputImageMat, resizedImage); + const [ratioX, ratioY] = + [img.width / detInputWidth, img.height / detInputHeight]; + const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]); + // Resize bounding boxes to the original image size + const boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box => + [...multiplyByRatio(ratioX, ratioY, box), box[4]]); - // Convert image to grayscale - const grayscaleImage = convertToGrayscale(inputImageMat); + // Process each bounding box and run inference on the recognition model + const boxesWithAnnotations = []; + for (let i = 0; i < boundingBoxesOriginalSizeArray.length; i++) { + const box = boundingBoxesOriginalSizeArray[i]; + const [xMin, yMin, xMax, yMax] = box; + const croppedImg = img.crop(xMin, yMin, xMax - xMin, yMax - yMin); + await croppedImg.save(OUTPUT_PATH + `cropped_image_${i}.jpg`); - const annotations = []; - const croppedImages = []; + const annotation = + await performTextRecognition(recModel, recInferRequest, croppedImg); - for (let i = 0; i < boundingBoxesArray.length; i++) { - const crop = boundingBoxesArray[i]; - const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map( - Math.floor, - ); - const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin); - const croppedImage = grayscaleImage.roi(cropRect); - - try { - const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [ - width, - height, - ]); - const tensorData = new Float32Array(preprocessedCrop); - const tensor = new ov.Tensor( - ov.element.f32, - Int32Array.from(recInputLayer.shape), - tensorData, - ); - - await inferAsyncProcess( - tensor, - recModelCompiled, - recOutputLayer, - i, - annotations, - ); - - croppedImages.push(cropImage(inputImageMat, xMin, yMin, xMax, yMax)); - } catch(error) { - console.error('Error during preprocessing:', error); - } - - croppedImage.delete(); + boxesWithAnnotations.push({ box, annotation }); + + console.log(`Box ${i}: [${box.join(',')}], Annotation: '${annotation}'`); } - grayscaleImage.delete(); + const annotatedImg = await putAnnotationsOnTheImage( + img, + boxesWithAnnotations, + { threshold: 0.3, confLabels: false }, + ); + const savePath = path.join(OUTPUT_PATH, 'output_image.jpg'); + await annotatedImg.save(savePath); + console.log(`The result was saved to ${savePath}`); +} - const boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({ - box, - annotation: annotations[index], - })); +async function performTextRecognition(model, inferenceRequest, img) { + const inputLayerShape = model.input(0).shape; + const outputLayer = model.output(0); - logBoxesWithAnnotations(boxesWithAnnotations); + const [,, inputHeight, inputWidth] = inputLayerShape; + const resizedImg = img.resize(inputWidth, inputHeight); - convertResultToImage( - inputImageMat, - resizedImage, - boxesWithAnnotations, - { threshold: 0.3, confLabels: true }, - './assets/results/output_image.jpg', + // Convert image to grayscale and create tensor + const tensor = new ov.Tensor( + ov.element.f32, + inputLayerShape, + new Float32Array(resizedImg.grayscale), ); - croppedImages.forEach((croppedImage, i) => { - const savePath = `./assets/results/cropped_image_${i}.jpg`; - saveImage(croppedImage, savePath); - }); + const result = await inferenceRequest.inferAsync([tensor]); + const recognitionResults = extractRecognitionResults(result[outputLayer]); + const annotation = parseAnnotations(recognitionResults); + + return annotation; } // Function to extract bounding boxes from the model output @@ -147,56 +124,17 @@ function extractBoundingBoxes(output) { return setShape(boxes, [numberOfBoxes, foldingCoefficient]); } -// Function to calculate the ratios for the image -function calculateRatios(originalImage, resizedImage) { - const realY = originalImage.rows; - const realX = originalImage.cols; - const resizedY = resizedImage.rows; - const resizedX = resizedImage.cols; - const ratioX = realX / resizedX; - const ratioY = realY / resizedY; - - return { ratioX, ratioY }; -} - -// Function to convert the image to grayscale -function convertToGrayscale(originalImage) { - const grayscaleImage = new cv.Mat(); - cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY); - - return grayscaleImage; -} - // Function to adjust bounding box coordinates by a given ratio function multiplyByRatio(ratioX, ratioY, box) { - const scaleShape = (shape, idx) => - idx % 2 ? Math.max(shape * ratioY, 10) : shape * ratioX; - - return box.map(scaleShape); -} - -// Function to resize and convert a crop to the recognition model input format -function resizeAndConvertCropToModelInput(crop, netShape) { - const [netWidth, netHeight] = netShape; + const scaleShape = (shape, idx) => { + const position = idx % 2 + ? Math.max(shape * ratioY, 10) + : shape * ratioX; - // Resize the crop to the network's input shape - const tempImg = new cv.Mat(); - cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight)); - - // Create the reshaped buffer - const reshapedBuffer = new Uint8Array(netHeight * netWidth); - let index = 0; - - for (let i = 0; i < netHeight; i++) { - for (let j = 0; j < netWidth; j++) { - reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0]; - } + return Math.floor(position); } - // Clean up - tempImg.delete(); - - return reshapedBuffer; + return box.map(scaleShape); } // Function to extract recognition results from the model output @@ -219,204 +157,49 @@ function parseAnnotations(recognitionResults) { // Stop if end character is encountered if (parsedLetter === letters[0]) break; + annotation.push(parsedLetter); } return annotation.join(''); } -// Function to crop the image based on the bounding box coordinates -function cropImage(originalImage, xMin, yMin, xMax, yMax) { - xMin = Math.max(0, xMin); - yMin = Math.max(0, yMin); - xMax = Math.min(originalImage.cols, xMax); - yMax = Math.min(originalImage.rows, yMax); - if (xMin >= xMax || yMin >= yMax) { - throw new Error('Invalid crop coordinates'); - } - const roi = originalImage.roi( - new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin), - ); - const cropped = new cv.Mat(); - roi.copyTo(cropped); - roi.delete(); - - return cropped; -} - -// Get Text size -function getTextSize(text, fontFace, fontScale) { - const canvas = createCanvas(200, 200); - const ctx = canvas.getContext('2d'); - const adjustedFontScale = fontScale * 35; - ctx.font = `${adjustedFontScale}px ${fontFace}`; - const metrics = ctx.measureText(text); - const width = metrics.width; - const height = - metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent; - - return { width, height }; -} - -/* The convertResultToImage function visualizes object detection - results on an image by drawing bounding boxes around detected - objects and optionally adding labels to them. */ - -function convertResultToImage( - bgrImage, - resizedImage, - boxesWithAnnotations, - options, - savePath, -) { +// Takes original image and bounding boxes with annotations +// and returns the image with annotations +async function putAnnotationsOnTheImage(img, boxesWithAnnotations, options) { const defaultOptions = { threshold: 0.3, confLabels: true }; const { threshold, confLabels } = Object.assign(defaultOptions, options); - const colors = { - red: [255, 0, 0, 255], - green: [0, 255, 0, 255], - white: [255, 255, 255, 255], - }; - const [realY, realX] = [bgrImage.rows, bgrImage.cols]; - const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols]; - const [ratioX, ratioY] = [realX / resizedX, realY / resizedY]; - - const rgbImage = new cv.Mat(); - cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB); + let finalImage = img; - boxesWithAnnotations.forEach(({ box, annotation }) => { + for (const item of boxesWithAnnotations) { + const { box, annotation } = item; const conf = box[box.length - 1]; - if (conf < threshold) return; - - const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box); - - cv.rectangle( - rgbImage, - new cv.Point(xMin, yMin), - new cv.Point(xMax, yMax), - colors.green, - 3, - ); + if (conf < threshold) continue; - if (!confLabels) return; + const [xMin, yMin, xMax, yMax] = box; + const yOffset = 10; - const text = `${annotation}`; - const fontScale = 0.8; - const thickness = 1; - const { width: textW, height: textH } = getTextSize( - text, - 'Arial', - fontScale, + finalImage = finalImage.drawRect( + xMin, yMin, + xMax - xMin, yMax - yMin, + { color: 'green', width: 3 }, ); - const imageCopy = rgbImage.clone(); - - cv.rectangle( - imageCopy, - new cv.Point(xMin, yMin - textH - 10), - new cv.Point(xMin + textW, yMin - 10), - colors.white, - cv.FILLED, + finalImage = finalImage.drawText( + annotation, + xMin, yMin - yOffset, + { font: '30px Arial' }, ); - cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage); - cv.putText( - rgbImage, - text, - new cv.Point(xMin, yMin - 10), - cv.FONT_HERSHEY_SIMPLEX, - fontScale, - colors.red, - thickness, - cv.LINE_AA, - ); - - imageCopy.delete(); - }); - - const saveDir = path.dirname(savePath); - if (!fs.existsSync(saveDir)) { - fs.mkdirSync(saveDir, { recursive: true }); - } - try { - saveImage(rgbImage, savePath); - } catch(e) { - console.log(`Error occurred while saving ----> ${e}`); - } - - return rgbImage; -} + if (!confLabels) continue; -// Infer async helper function - -async function inferAsyncProcess( - tensor, - recModelCompiled, - recOutputLayer, - i, - annotations, -) { - // Create infer request - const inferRequest = recModelCompiled.createInferRequest(); - - // Define the completion callback function - function completionCallback(outputTensor, i, annotations) { - const recognitionResults = extractRecognitionResults(outputTensor); - const annotation = parseAnnotations(recognitionResults); - annotations.push(annotation); - } - - // Start inference in asynchronous mode - try { - const result = await inferRequest.inferAsync([tensor]); - completionCallback(result[recOutputLayer], i, annotations); - } catch(error) { - console.error('Error during inference:', error); - } -} - -// Log boudning boxes with annotations -function logBoxesWithAnnotations(boxesWithAnnotations) { - boxesWithAnnotations.forEach((item, i) => { - const { box, annotation } = item; - console.log(`Box ${i}: [${box}], Annotation: ${annotation}`); - }); -} - -function saveImage(rgbImage, savePath) { - const canvas = createCanvas(rgbImage.cols, rgbImage.rows); - const ctx = canvas.getContext('2d'); - const componentsPerPixel = - rgbImage.data.length / (rgbImage.cols * rgbImage.rows); - const imgDataArr = []; - - if (componentsPerPixel === 1) { - for (const val of rgbImage.data) { - imgDataArr.push(val, val, val, 255); - } - } else if (componentsPerPixel === 3) { - for (let i = 0; i < rgbImage.data.length; i++) { - if (i % 3 === 0) imgDataArr.push(255); - imgDataArr.push(rgbImage.data[i]); - } - } - - const imageData = new ImageData( - new Uint8ClampedArray(imgDataArr), - rgbImage.cols, - rgbImage.rows, - ); - ctx.putImageData(imageData, 0, 0); - - const dataURL = canvas.toDataURL('image/jpeg'); - const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, ''); - const imageBuffer = Buffer.from(base64Data, 'base64'); - - const saveDir = path.dirname(savePath); - if (!fs.existsSync(saveDir)) { - fs.mkdirSync(saveDir, { recursive: true }); + finalImage = finalImage.drawText( + conf.toFixed(2), + xMin, yMax + 2 * yOffset, + { font: '20px Arial' }, + ); } - fs.writeFileSync(savePath, imageBuffer); - console.log('Image saved successfully!', savePath); + return finalImage; } diff --git a/samples/js/node/package-lock.json b/samples/js/node/package-lock.json index fe003eaac14935..96a013fb0435c7 100644 --- a/samples/js/node/package-lock.json +++ b/samples/js/node/package-lock.json @@ -7,14 +7,14 @@ "": { "name": "openvino-node-demo", "version": "1.0.0", + "hasInstallScript": true, "license": "Apache-2.0", "devDependencies": { + "@napi-rs/canvas": "^0.1.59", "@tensorflow/tfjs-node": "^4.19.0", "args": "^5.0.3", - "canvas": "^2.11.2", "eslint": "^8.39.0", "https-proxy-agent": "^7.0.2", - "opencv-wasm": "^4.3.0-10", "openvino-node": "^2024.4.0" }, "engines": { @@ -173,6 +173,180 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.59.tgz", + "integrity": "sha512-3vUtQ8DzYcz9xy86UUe8OfDiXNuuLB9zFAUs5N/I2GpkY/MWBJ2M7w5FqH380oC44IzYOWaOMLWCPfNZBsbBww==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.59", + "@napi-rs/canvas-darwin-arm64": "0.1.59", + "@napi-rs/canvas-darwin-x64": "0.1.59", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.59", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.59", + "@napi-rs/canvas-linux-arm64-musl": "0.1.59", + "@napi-rs/canvas-linux-x64-gnu": "0.1.59", + "@napi-rs/canvas-linux-x64-musl": "0.1.59", + "@napi-rs/canvas-win32-x64-msvc": "0.1.59" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.59.tgz", + "integrity": "sha512-p4rRL9KIDz57Z+gKLpemX36DB7fVVHmY4DtesMGrnjx4gSBUM2M7LNzbzf4o3oPZGDiHMY0vnvNHR4dKfszNeg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.59.tgz", + "integrity": "sha512-+8s06WxcM9ilv9PVOl57hvasbwKWMfrrNAYknqMPCn4jpc4XDcLbrM5LTZGhhptlv9jQ9DmHfZ978/xInsMYXw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.59.tgz", + "integrity": "sha512-6kziJHjXdxduYK2L2uuwjEIYoPJednKq+C81MCm3fPobXE4HBKs0JGXwq3GkWNe340U340vmagwXiFi6muEy+g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.59.tgz", + "integrity": "sha512-eCkyS7jojNmaUPaVFdNjAyS0R3isrJtUfRf1vRP6K50GRuHso3vwQRbZBPKM71qHdjPDylfaQc5H6/M7epyD+w==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.59.tgz", + "integrity": "sha512-1u4++lbsolP1MAPViuDoZmgmDLKlV0iJnlHN2dfwgbu3t53P0l3jIT1oCIAiWil0OlrWtDF24JbY7LUUGH5aHg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.59.tgz", + "integrity": "sha512-eqevZ2kWPxeAnvhxl7U5tf6AiMnhlO4w2Hci79WQkfeirqQG6RRM4Jnxbh9iO3jkAnnOXmM4r+S3UrOcfIx1Rg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.59.tgz", + "integrity": "sha512-F+T63RnLt0qYUXhbOpaome3vIWLW4xoQRmhTnkKDzOtBSnKVP7sCM6E5/5tByOFCR3fTj4ksMeeHy8zJScEExA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.59.tgz", + "integrity": "sha512-HhUgpTGQUR2VRslEC5Idf6s0hhamJiVlEh2k3AG9XXOwX6fg0xXkqm84DPiOCLzsO5bqtJEo+rh03BUSDcf53g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.59.tgz", + "integrity": "sha512-bYMiZJsKPkU7HEoYI5E0alOSV1EkaigY4VEgGHPK9W/qGMmNFsxdbURQqa5h3zbhZTK5QRSdYYqowcTEYVIlug==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -774,21 +948,6 @@ "node": ">=6" } }, - "node_modules/canvas": { - "version": "2.11.2", - "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", - "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", - "dev": true, - "hasInstallScript": true, - "dependencies": { - "@mapbox/node-pre-gyp": "^1.0.0", - "nan": "^2.17.0", - "simple-get": "^3.0.3" - }, - "engines": { - "node": ">=6" - } - }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -924,18 +1083,6 @@ } } }, - "node_modules/decompress-response": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", - "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", - "dev": true, - "dependencies": { - "mimic-response": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -1745,18 +1892,6 @@ "node": ">= 0.6" } }, - "node_modules/mimic-response": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", - "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -1830,12 +1965,6 @@ "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", "dev": true }, - "node_modules/nan": { - "version": "2.20.0", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.20.0.tgz", - "integrity": "sha512-bk3gXBZDGILuuo/6sKtr0DQmSThYHLtNCdSdXk9YkxD/jK6X2vmCyyXBBxyqZ4XcnzTyYEAThfX3DCEnLf6igw==", - "dev": true - }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -1908,12 +2037,6 @@ "wrappy": "1" } }, - "node_modules/opencv-wasm": { - "version": "4.3.0-10", - "resolved": "https://registry.npmjs.org/opencv-wasm/-/opencv-wasm-4.3.0-10.tgz", - "integrity": "sha512-EWmWLUzp2suoc6N44Y4ouWT85QwvShx23Q430R+lp6NyS828bjQn6mCgA3NJ6Z/S59aaTeeu+RhqPQIJIYld1w==", - "dev": true - }, "node_modules/openvino-node": { "version": "2024.4.0", "resolved": "https://registry.npmjs.org/openvino-node/-/openvino-node-2024.4.0.tgz", @@ -2272,37 +2395,6 @@ "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", "dev": true }, - "node_modules/simple-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/simple-get": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", - "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", - "dev": true, - "dependencies": { - "decompress-response": "^4.2.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" - } - }, "node_modules/sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", diff --git a/samples/js/node/package.json b/samples/js/node/package.json index 683312fab6742b..d392a72143de03 100644 --- a/samples/js/node/package.json +++ b/samples/js/node/package.json @@ -2,17 +2,18 @@ "name": "openvino-node-demo", "version": "1.0.0", "license": "Apache-2.0", + "type": "commonjs", "devDependencies": { + "@tensorflow/tfjs-node": "^4.19.0", "args": "^5.0.3", - "canvas": "^2.11.2", "eslint": "^8.39.0", "https-proxy-agent": "^7.0.2", - "opencv-wasm": "^4.3.0-10", - "@tensorflow/tfjs-node": "^4.19.0", - "openvino-node": "^2024.4.0" + "openvino-node": "^2024.4.0", + "@napi-rs/canvas": "^0.1.59" }, "scripts": { - "lint": "eslint ." + "lint": "eslint .", + "postinstall": "node ./fetch-samples-assets.js" }, "engines": { "node": ">=21.0.0" diff --git a/samples/js/node/vision_background_removal/README.md b/samples/js/node/vision_background_removal/README.md index cdefc0c4186cbb..a9a3e3270eae45 100644 --- a/samples/js/node/vision_background_removal/README.md +++ b/samples/js/node/vision_background_removal/README.md @@ -1,6 +1,10 @@ # Vision Background Removal Node.js Sample -Run: +Run sample: ```bash -node vision_background_removal.js *path_to_model_file* *path_to_foreground_image* *path_to_background_image* AUTO -``` \ No newline at end of file +node vision_background_removal.js ../../assets/models/unet_ir_model.xml ../../assets/images/coco_hollywood.jpg ../../assets/images/wall.jpg AUTO +``` +Where: +```bash +node vision_background_removal.js *path_to_model_file* *path_to_foreground_image* *path_to_background_image* *device* +``` diff --git a/samples/js/node/vision_background_removal/vision_background_removal.js b/samples/js/node/vision_background_removal/vision_background_removal.js index 2487371c597008..aabaad1a65b80c 100644 --- a/samples/js/node/vision_background_removal/vision_background_removal.js +++ b/samples/js/node/vision_background_removal/vision_background_removal.js @@ -1,12 +1,10 @@ -const { cv } = require('opencv-wasm'); -const fs = require('node:fs').promises; -const path = require('node:path'); const { addon: ov } = require('openvino-node'); -const { createCanvas, ImageData } = require('canvas'); -const { getImageData, transform, setShape } = require('../helpers'); + +const { transform } = require('../helpers'); +const Image = require('../image'); if (require.main === module) { -// Parsing and validation of input arguments + // Parsing and validation of input arguments if (process.argv.length !== 6) throw new Error( `Usage: ${process.argv[1]} ` + @@ -15,21 +13,23 @@ if (require.main === module) { ); const unetModelPath = process.argv[2]; - const foreGroundImage = process.argv[3]; - const backGroundImage = process.argv[4]; + const foregroundImagePath = process.argv[3]; + const backgroundImagePath = process.argv[4]; const deviceName = process.argv[5]; try { - main(unetModelPath, foreGroundImage, backGroundImage, deviceName); + main(unetModelPath, foregroundImagePath, backgroundImagePath, deviceName); } catch(error) { console.error('Error occurred', error); } } +module.exports = main; + async function main( unetModelPath, - foreGroundImage, - backGroundImage, + foregroundImagePath, + backgroundImagePath, deviceName, ) { const core = new ov.Core(); @@ -38,104 +38,81 @@ async function main( const model = await core.readModel(unetModelPath); const compiledModel = await core.compileModel(model, deviceName); - // Get the names of input and output layers. + // Get the names of input and output layers const inputLayer = compiledModel.input(0); const outputLayer = compiledModel.output(0); - // Get Image data from the foreground image - const imageData = await getImageData(foreGroundImage); - const inputImageMat = cv.matFromImageData(imageData); - - // Convert the image shape to a shape and a data type expected by the network - const [, , H, W] = inputLayer.shape; - const resizedImage = new cv.Mat(); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB); - cv.resize(inputImageMat, resizedImage, new cv.Size(W, H)); + // Load foreground image + const originalImg = await Image.load(foregroundImagePath); - const inputImage = transform( - resizedImage.data, - { width: W, height: H }, - [0, 1, 2], - ); - - // Normalize the input image Mat - const normalizedInputImage = normalizeImage(inputImage, W, H); + // Resize image to a shape expected by the network + const [, , modelInputHeight, modelInputWidth] = inputLayer.shape; + const resized = await originalImg.resize(modelInputWidth, modelInputHeight); // Create a tensor from the normalized input image - const tensorData = new Float32Array(normalizedInputImage); - const tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData); + const transformed = transform( + resized.rgb, + { + width: modelInputWidth, + height: modelInputHeight + }, + [0, 1, 2] + ); + const normalizedInputImage = normalizeImage( + transformed, + modelInputWidth, + modelInputHeight, + ); + const tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, normalizedInputImage); // Do inference const inferRequest = compiledModel.createInferRequest(); const inferResult = await inferRequest.inferAsync([tensor]); + const { data: resultData } = inferResult[outputLayer]; - const { data } = inferResult[outputLayer]; - const reshapedResult = setShape(data, [512, 512]); - - // Create a Mat from the reshaped result - const reshapedMat = cv.matFromArray( - 512, - 512, - cv.CV_32F, - reshapedResult.flat(), - ); - - // Get the height and width of the original image - const height = inputImageMat.rows; - const width = inputImageMat.cols; - - // Resize the inference result to the original image size - const resizedResult = new cv.Mat(); - cv.resize( - reshapedMat, - resizedResult, - new cv.Size(width, height), - 0, - 0, - cv.INTER_LINEAR, - ); - - // Convert the resized result to uint8 - resizedResult.convertTo(resizedResult, cv.CV_8U); - - // Create a Mat to store the background removed result - const bgRemovedResult = inputImageMat.clone(); + // Normalize the result data from grayscale to RGB + const rgbData = []; + for (let i = 0; i < resultData.length; i += 1) { + const value = resultData[i] * 255; - removeBackground(resizedResult, bgRemovedResult); + rgbData.push(value, value, value, 255); + } - // Save the background removed result - await saveImage(bgRemovedResult, './bg_removed_result.jpg'); + // Create image based on result data + const [outputHeight, outputWidth] = outputLayer.shape.slice(2); + const maskImg = await Image.fromArray(rgbData, outputWidth, outputHeight); - // Get the background image data - const bgrImageData = await getImageData(backGroundImage); - const bgrImageMat = cv.matFromImageData(bgrImageData); + // Resize the result mask to the original image size and save it + const { width, height } = originalImg; + const resizedMaskImg = await maskImg.resize(originalImg.width, originalImg.height); + const maskImagePath = './out_mask.jpg'; + await resizedMaskImg.save(maskImagePath); + console.log(`The mask image was saved to '${maskImagePath}'`); - // Resize the background image to the original image size - const resizedBgrImageMat = new cv.Mat(); - cv.cvtColor(bgrImageMat, bgrImageMat, cv.COLOR_BGR2RGB); - cv.resize(bgrImageMat, resizedBgrImageMat, new cv.Size(width, height)); + // Remove the foreground from the original image + const removedBgImg = Image.mask(originalImg, resizedMaskImg); - // Remove the foreground from the background image by - // setting all foreground pixels to white - removeForeground(resizedResult, resizedBgrImageMat); + // Load the background image + const bgrImage = await Image.load(backgroundImagePath); - // Save the foreground removed from the background image - await saveImage(resizedBgrImageMat, './fg_removed_from_background.jpg'); + // Resize the background image to the same size as the original image + const resizedBgrImage = bgrImage.resize(width, height); - // create a new Mat to store the final image - const newImage = new cv.Mat( - resizedBgrImageMat.rows, - resizedBgrImageMat.cols, - cv.CV_8UC3, - ); + // Remove object from the background image + const removedFgImg = Image.mask(resizedBgrImage, resizedMaskImg.invert()); - // combine the foreground and background images to get the final image - combineImages(resizedResult, bgRemovedResult, resizedBgrImageMat, newImage); + // Combine the background and foreground images + const resultImg = Image.merge(removedBgImg, removedFgImg); // Save the final image - await saveImage(newImage, './background_changed_image.jpg'); + const outputImagePath = './out_bgr_changed_image.jpg'; + await resultImg.save(outputImagePath); + console.log(`The result image was saved to '${outputImagePath}'`); + console.log('The background was successfully changed'); } +// Details about this normalization: +// https://docs.openvino.ai/2024/notebooks/vision-background-removal-with-output.html#load-and-pre-process-input-image function normalizeImage(imageData, width, height) { // Mean and scale values const inputMean = [123.675, 116.28, 103.53]; @@ -148,6 +125,7 @@ function normalizeImage(imageData, width, height) { for (let j = 0; j < width; j++) { for (let c = 0; c < channels; c++) { const index = i * width * channels + j * channels + c; + normalizedData[index] = (imageData[index] - inputMean[c]) / inputScale[c]; } @@ -156,93 +134,3 @@ function normalizeImage(imageData, width, height) { return normalizedData; } - -function removeBackground(mask, image) { - // Iterate over the mask and set all background pixels to white - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 0) { - image.ucharPtr(i, j)[0] = 255; - image.ucharPtr(i, j)[1] = 255; - image.ucharPtr(i, j)[2] = 255; - } - } - } -} - -function removeForeground(mask, image) { - // Iterate over the mask and set all foreground pixels to black - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 1) { - image.ucharPtr(i, j)[0] = 0; - image.ucharPtr(i, j)[1] = 0; - image.ucharPtr(i, j)[2] = 0; - } else { - image.ucharPtr(i, j)[0] = image.ucharPtr(i, j)[0]; - image.ucharPtr(i, j)[1] = image.ucharPtr(i, j)[1]; - image.ucharPtr(i, j)[2] = image.ucharPtr(i, j)[2]; - } - } - } -} - -function combineImages(mask, fgImage, bgImage, newImage) { - // Iterate over the mask and combine the foreground and background images - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 1) { - newImage.ucharPtr(i, j)[0] = fgImage.ucharPtr(i, j)[0]; - newImage.ucharPtr(i, j)[1] = fgImage.ucharPtr(i, j)[1]; - newImage.ucharPtr(i, j)[2] = fgImage.ucharPtr(i, j)[2]; - } else { - newImage.ucharPtr(i, j)[0] = bgImage.ucharPtr(i, j)[0]; - newImage.ucharPtr(i, j)[1] = bgImage.ucharPtr(i, j)[1]; - newImage.ucharPtr(i, j)[2] = bgImage.ucharPtr(i, j)[2]; - } - } - } -} - -async function saveImage(rgbImage, savePath) { - const canvas = createCanvas(rgbImage.cols, rgbImage.rows); - const ctx = canvas.getContext('2d'); - const componentsPerPixel = - rgbImage.data.length / (rgbImage.cols * rgbImage.rows); - const imgDataArr = []; - - if (componentsPerPixel === 1) { - for (const val of rgbImage.data) { - imgDataArr.push(val, val, val, 255); - } - } else if (componentsPerPixel === 3) { - for (let i = 0; i < rgbImage.data.length; i += 3) { - imgDataArr.push( - rgbImage.data[i + 2], // Red - rgbImage.data[i + 1], // Green - rgbImage.data[i], // Blue - 255, // Alpha - ); - } - } - - const imageData = new ImageData( - new Uint8ClampedArray(imgDataArr), - rgbImage.cols, - rgbImage.rows, - ); - ctx.putImageData(imageData, 0, 0); - - const dataURL = canvas.toDataURL('image/jpeg'); - const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, ''); - const imageBuffer = Buffer.from(base64Data, 'base64'); - - const saveDir = path.dirname(savePath); - try { - await fs.mkdir(saveDir, { recursive: true }); - await fs.writeFile(savePath, imageBuffer); - console.log('Image saved successfully!', savePath); - } catch(error) { - console.error('Error saving image:', error); - } -} diff --git a/src/bindings/c/include/openvino/c/ov_prepostprocess.h b/src/bindings/c/include/openvino/c/ov_prepostprocess.h index ea2a82943dd683..d7cbbdc26eb5cb 100644 --- a/src/bindings/c/include/openvino/c/ov_prepostprocess.h +++ b/src/bindings/c/include/openvino/c/ov_prepostprocess.h @@ -93,6 +93,18 @@ typedef enum { RESIZE_NEAREST //!< nearest algorithm } ov_preprocess_resize_algorithm_e; +/** + * @enum ov_padding_mode_e + * @ingroup ov_prepostprocess_c_api + * @brief This enum contains enumeration for padding mode. + */ +typedef enum { + CONSTANT = 0, //!< Pads with given constant value. + EDGE, //!< Pads with tensor edge values. + REFLECT, //!< Pads with reflection of tensor data along axis. Values on the edges are not duplicated. + SYMMETRIC //!< Pads similar like `REFLECT` but values on the edges are duplicated. +} ov_padding_mode_e; + /** * @brief Create a ov_preprocess_prepostprocessor_t instance. * @ingroup ov_prepostprocess_c_api @@ -512,3 +524,23 @@ ov_preprocess_input_model_info_set_layout(ov_preprocess_input_model_info_t* prep */ OPENVINO_C_API(ov_status_e) ov_preprocess_prepostprocessor_build(const ov_preprocess_prepostprocessor_t* preprocess, ov_model_t** model); + +/** + * @brief Add pad preprocess operation. Extends an input tensor on edges with constants. + * + * @param preprocess_input_process_steps A pointer to the ov_preprocess_preprocess_steps_t. + * @param pads_begin Number of padding elements to add at the beginning of each axis. + * @param pads_begin_size Pads begin size (number of axes). + * @param pads_end Number of padding elements to add at the end of each axis. + * @param pads_end_size Pads end size (number of axes). + * @param value Value to be populated in the padded area (mode=CONSTANT) + * @param mode Padding mode. + */ +OPENVINO_C_API(ov_status_e) +ov_preprocess_preprocess_steps_pad(const ov_preprocess_preprocess_steps_t* preprocess_input_process_steps, + const int* const pads_begin, + size_t pads_begin_size, + const int* const pads_end, + size_t pads_end_size, + float value, + ov_padding_mode_e mode); diff --git a/src/bindings/c/src/ov_prepostprocess.cpp b/src/bindings/c/src/ov_prepostprocess.cpp index 616883dd54e74c..8448e9daeb10b1 100644 --- a/src/bindings/c/src/ov_prepostprocess.cpp +++ b/src/bindings/c/src/ov_prepostprocess.cpp @@ -24,6 +24,12 @@ const std::map color_format_map {ov_color_format_e::RGBX, ov::preprocess::ColorFormat::RGBX}, {ov_color_format_e::BGRX, ov::preprocess::ColorFormat::BGRX}}; +const std::map padding_mode_map = { + {ov_padding_mode_e::CONSTANT, ov::preprocess::PaddingMode::CONSTANT}, + {ov_padding_mode_e::EDGE, ov::preprocess::PaddingMode::EDGE}, + {ov_padding_mode_e::REFLECT, ov::preprocess::PaddingMode::REFLECT}, + {ov_padding_mode_e::SYMMETRIC, ov::preprocess::PaddingMode::SYMMETRIC}}; + #define GET_OV_COLOR_FARMAT(a) \ (color_format_map.find(a) == color_format_map.end() ? ov::preprocess::ColorFormat::UNDEFINED \ : color_format_map.at(a)) @@ -524,3 +530,23 @@ ov_status_e ov_preprocess_prepostprocessor_build(const ov_preprocess_prepostproc return ov_status_e::OK; } + +ov_status_e ov_preprocess_preprocess_steps_pad(const ov_preprocess_preprocess_steps_t* preprocess_input_process_steps, + const int* const pads_begin, + size_t pads_begin_size, + const int* const pads_end, + size_t pads_end_size, + float value, + ov_padding_mode_e mode) { + if (!preprocess_input_process_steps) { + return ov_status_e::INVALID_C_PARAM; + } + try { + std::vector vec_begin(pads_begin, pads_begin + pads_begin_size); + std::vector vec_end(pads_end, pads_end + pads_end_size); + preprocess_input_process_steps->object->pad(vec_begin, vec_end, value, padding_mode_map.at(mode)); + } + CATCH_OV_EXCEPTIONS + + return ov_status_e::OK; +} diff --git a/src/bindings/c/tests/ov_preprocess_test.cpp b/src/bindings/c/tests/ov_preprocess_test.cpp index 94252bd3a3cbb9..aeb7a097af1352 100644 --- a/src/bindings/c/tests/ov_preprocess_test.cpp +++ b/src/bindings/c/tests/ov_preprocess_test.cpp @@ -1,6 +1,8 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include "ov_test.hpp" class ov_preprocess_test : public ::testing::Test { @@ -596,3 +598,71 @@ TEST_F(ov_preprocess_test, ov_preprocess_prepostprocessor_for_nv12_input) { ov_layout_free(layout); } + +TEST_F(ov_preprocess_test, ov_preprocess_preprocess_steps_pad_constant) { + OV_EXPECT_OK(ov_preprocess_prepostprocessor_create(model, &preprocess)); + EXPECT_NE(nullptr, preprocess); + + OV_EXPECT_OK(ov_preprocess_prepostprocessor_get_input_info(preprocess, &input_info)); + EXPECT_NE(nullptr, input_info); + + OV_EXPECT_OK(ov_preprocess_input_info_get_tensor_info(input_info, &input_tensor_info)); + EXPECT_NE(nullptr, input_tensor_info); + + ov_shape_t shape; + int64_t dims[4] = {1, 1, 225, 230}; + OV_EXPECT_OK(ov_shape_create(4, dims, &shape)); + + OV_EXPECT_OK(ov_tensor_create(ov_element_type_e::F32, shape, &tensor)); + OV_EXPECT_OK(ov_preprocess_input_tensor_info_set_from(input_tensor_info, tensor)); + + OV_EXPECT_OK(ov_preprocess_input_info_get_preprocess_steps(input_info, &input_process)); + EXPECT_NE(nullptr, input_process); + + constexpr auto pads_begin = std::array{0, 1, 2, 0}; + constexpr auto pads_end = std::array{0, 1, 0, -3}; + + OV_EXPECT_OK(ov_preprocess_preprocess_steps_pad(input_process, + pads_begin.data(), + pads_begin.size(), + pads_end.data(), + pads_end.size(), + 1.0f, + ov_padding_mode_e::CONSTANT)); + OV_EXPECT_OK(ov_preprocess_prepostprocessor_build(preprocess, &ppp_model)); + EXPECT_NE(nullptr, ppp_model); +} + +TEST_F(ov_preprocess_test, ov_preprocess_preprocess_steps_pad_edge) { + OV_EXPECT_OK(ov_preprocess_prepostprocessor_create(model, &preprocess)); + EXPECT_NE(nullptr, preprocess); + + OV_EXPECT_OK(ov_preprocess_prepostprocessor_get_input_info(preprocess, &input_info)); + EXPECT_NE(nullptr, input_info); + + OV_EXPECT_OK(ov_preprocess_input_info_get_tensor_info(input_info, &input_tensor_info)); + EXPECT_NE(nullptr, input_tensor_info); + + ov_shape_t shape; + int64_t dims[4] = {1, 2, 225, 230}; + OV_EXPECT_OK(ov_shape_create(4, dims, &shape)); + + OV_EXPECT_OK(ov_tensor_create(ov_element_type_e::F32, shape, &tensor)); + OV_EXPECT_OK(ov_preprocess_input_tensor_info_set_from(input_tensor_info, tensor)); + + OV_EXPECT_OK(ov_preprocess_input_info_get_preprocess_steps(input_info, &input_process)); + EXPECT_NE(nullptr, input_process); + + constexpr auto pads_begin = std::array{0, 0, 2, 0}; + constexpr auto pads_end = std::array{0, 1, 0, -3}; + + OV_EXPECT_OK(ov_preprocess_preprocess_steps_pad(input_process, + pads_begin.data(), + pads_begin.size(), + pads_end.data(), + pads_end.size(), + 1.0f, + ov_padding_mode_e::EDGE)); + OV_EXPECT_OK(ov_preprocess_prepostprocessor_build(preprocess, &ppp_model)); + EXPECT_NE(nullptr, ppp_model); +} diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index c7837798c8aca7..a0fbf982105ad6 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -18,7 +18,7 @@ patchelf<=0.17.2.1 # Frontends h5py>=3.1.0,<3.13.0 docopt~=0.6.2 -paddlepaddle==2.6.0 +paddlepaddle==2.6.2 tensorflow>=1.15.5,<2.18.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index a9ee93b1d9e9bc..1aa2ff24b1b948 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -7,7 +7,7 @@ flake8-annotations-complexity<=0.0.8 flake8-broken-line<=1.0.0 flake8-bugbear<=24.8.19 flake8-class-attributes-order<=0.1.3 -flake8-comprehensions<=3.15.0 +flake8-comprehensions<=3.16.0 flake8-debugger<=4.1.2 flake8-docstrings<=1.7.0 flake8-eradicate<=1.5.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py index c6c01fa98e5e99..9f2ef019769875 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py @@ -49,18 +49,22 @@ openvino_options = {} -@register_backend +# Disable regional compilation which was enabled by default from Torch 2.5.0 +if hasattr(torch._dynamo.config, "inline_inbuilt_nn_modules"): + torch._dynamo.config.inline_inbuilt_nn_modules=False + @fake_tensor_unsupported def openvino(subgraph, example_inputs, options=None): - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): global openvino_options openvino_options = options decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list() - return aot_autograd(fw_compiler=fx_openvino, - bw_compiler=fx_openvino, - decompositions=get_decompositions(decompositions))(subgraph, example_inputs) + return aot_autograd(fw_compiler=fx_openvino, bw_compiler=fx_openvino, decompositions=get_decompositions(decompositions))(subgraph, example_inputs) return fx_openvino(subgraph, example_inputs, options) +if "openvino" not in torch.compiler.list_backends(): + register_backend(compiler_fn=openvino, name="openvino") + def fx_openvino(subgraph, example_inputs, options=None): try: if len(openvino_options) != 0: @@ -70,7 +74,7 @@ def fx_openvino(subgraph, example_inputs, options=None): openvino_model_caching = _get_model_caching(options) if openvino_model_caching is not None and openvino_model_caching: # Create a hash to be used for caching - model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest() + model_hash_str = sha256(subgraph.code.encode("utf-8")).hexdigest() executor_parameters = {"model_hash_str": model_hash_str} # Check if the model was fully supported and already cached example_inputs.reverse() @@ -79,15 +83,17 @@ def fx_openvino(subgraph, example_inputs, options=None): if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"): # Model is fully supported and already cached. Run the cached OV model directly. compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs) + def _call(*args): res = execute_cached(compiled_model, *args) return res + return _call if inputs_reversed: example_inputs.reverse() preserved_arg_indices = [] - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): if tracing_context := torch._guards.TracingContext.try_get(): fw_metadata = tracing_context.fw_metadata params_flat = tracing_context.params_flat @@ -97,6 +103,7 @@ def _call(*args): model = subgraph else: from torch._subclasses.fake_tensor import FakeTensorMode + decompositions = _get_decompositions(options) + get_inf_decomposition_list() with FakeTensorMode(allow_non_fake_inputs=True): model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs) @@ -106,26 +113,27 @@ def _call(*args): partitioner = Partitioner(options) compiled_model = partitioner.make_partitions(model, options) - if executor_parameters is not None and 'model_hash_str' in executor_parameters: + if executor_parameters is not None and "model_hash_str" in executor_parameters: # Check if the model is fully supported. fully_supported = partitioner.check_fully_supported(compiled_model) if fully_supported: executor_parameters["model_hash_str"] += "_fs" def _call(*args): - if(_get_aot_autograd(options)): + if _get_aot_autograd(options): args_list = args[0] args_new = [args_list[i] for i in preserved_arg_indices] args = args_new - res = execute(compiled_model, *args, executor="openvino", - executor_parameters=executor_parameters, options=options) + res = execute(compiled_model, *args, executor="openvino", executor_parameters=executor_parameters, options=options) return res - if(_get_aot_autograd(options)): - _call._boxed_call = True # type: ignore[attr-defined] + + if _get_aot_autograd(options): + _call._boxed_call = True # type: ignore[attr-defined] return _call except Exception as e: logger.debug(f"Failed in OpenVINO execution: {e}") return compile_fx(subgraph, example_inputs) + def reset(): clear_caches() diff --git a/src/bindings/python/src/openvino/runtime/opset1/__init__.py b/src/bindings/python/src/openvino/runtime/opset1/__init__.py index 5bfa42f43f26b9..ca7e1aef385b2a 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset1/__init__.py @@ -54,7 +54,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops.py b/src/bindings/python/src/openvino/runtime/opset1/ops.py index c100a6c2db2cb4..54f32d404336d4 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset1/ops.py @@ -1532,95 +1532,6 @@ def lstm_cell( return _get_node_factory_opset1().create("LSTMCell", node_inputs, attributes) -@deprecated(version="2025.0", message="Use lstm_sequence from opset 5") -@nameable_op -def lstm_sequence( - X: NodeInput, - initial_hidden_state: NodeInput, - initial_cell_state: NodeInput, - sequence_lengths: NodeInput, - W: NodeInput, - R: NodeInput, - B: NodeInput, - hidden_size: int, - direction: str, - activations: Optional[List[str]] = None, - activations_alpha: Optional[List[float]] = None, - activations_beta: Optional[List[float]] = None, - clip: float = 0.0, - name: Optional[str] = None, -) -> Node: - """Return a node which performs LSTMSequence operation. - - :param X: The input tensor. Shape: [batch_size, seq_length, input_size]. - :param initial_hidden_state: The hidden state tensor. - Shape: [batch_size, num_directions, hidden_size]. - :param initial_cell_state: The cell state tensor. - Shape: [batch_size, num_directions, hidden_size]. - :param sequence_lengths: Specifies real sequence lengths for each batch element. - Shape: [batch_size]. Integer type. - :param W: Tensor with weights for matrix multiplication operation with input portion of data. - Shape: [num_directions, 4*hidden_size, input_size]. - :param R: The tensor with weights for matrix multiplication operation with hidden state. - Shape: [num_directions, 4*hidden_size, hidden_size]. - :param B: The tensor with biases. - Shape: [num_directions, 4*hidden_size]. - :param hidden_size: Specifies hidden state size. - :param direction: Specifies if the RNN is forward, reverse, or bidirectional. - :param activations: The list of three activation functions for gates. - :param activations_alpha: The list of alpha parameters for activation functions. - :param activations_beta: The list of beta parameters for activation functions. - :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. - :param name: An optional name of the output node. - - :return: The new node represents LSTMSequence. Node outputs count: 3. - """ - if activations is None: - activations = ["sigmoid", "tanh", "tanh"] - if activations_alpha is None: - activations_alpha = [] - if activations_beta is None: - activations_beta = [] - - node_inputs = as_nodes( - X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - name=name, - ) - - # P - nGraph additional input, no such input in the OV spec - peepholes_count = 3 # nGraph default - if direction.lower() == "bidirectional": - num_directions = 2 - else: - num_directions = 1 - peepholes_shape = [num_directions, peepholes_count * hidden_size] - peepholes_array = np.zeros(peepholes_shape) # nGraph default - data_dtype = get_dtype(node_inputs[0].get_output_element_type(0)) - default_p = make_constant_node(peepholes_array, dtype=data_dtype) - node_inputs.append(default_p) - - weights_format = "fico" # OV LSTMWeightsFormat, no such attribute in the OV spec - input_forget = False # nGraph default, no such attribute in the OV spec - - attributes = { - "hidden_size": hidden_size, - "direction": direction.lower(), - "activations": activations, - "activations_alpha": activations_alpha, - "activations_beta": activations_beta, - "clip": clip, - "weights_format": weights_format, - "input_forget": input_forget, - } - return _get_node_factory_opset1().create("LSTMSequence", node_inputs, attributes) - - @nameable_op def matmul( data_a: NodeInput, diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index cb201d3d4263dd..a624ffb4f79873 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -15,7 +15,7 @@ from openvino.runtime.op import Constant, Result from openvino.runtime.opset1 import convert_like from openvino.runtime.opset_utils import _get_node_factory -from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading from openvino.runtime.utils.types import ( NumericData, NodeInput, @@ -271,7 +271,7 @@ def scaled_dot_product_attention( return _get_node_factory_opset13().create("ScaledDotProductAttention", inputs, attributes) -@singledispatch +@overloading(Union[NumericData, np.number, bool, np.bool_, list], Union[NumericType, Type], Optional[str], bool) # type: ignore @nameable_op def constant( value: Union[NumericData, np.number, bool, np.bool_, list], @@ -339,9 +339,9 @@ def display_shared_memory_warning(warning_message: str) -> None: return Constant(_value, shared_memory=_shared_memory) -@constant.register +@overloading(Tensor, bool, Optional[str]) # type: ignore @nameable_op -def _( +def constant( # noqa: F811 tensor: Tensor, shared_memory: bool = False, name: Optional[str] = None, diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 6cc9c24827a85f..c4dd48d9087ae1 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -188,7 +188,7 @@ from openvino.runtime.opset1.ops import split from openvino.runtime.opset1.ops import sqrt from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze +from openvino.runtime.opset15.ops import squeeze from openvino.runtime.opset15.ops import stft from openvino.runtime.opset1.ops import strided_slice from openvino.runtime.opset1.ops import subtract diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index 45b01a11bc3588..93aacb29572340 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -326,7 +326,7 @@ def stft( :return: The new node performing STFT operation. """ inputs = as_nodes(data, window, frame_size, frame_step, name=name) - return _get_node_factory_opset15().create("STFT", inputs) + return _get_node_factory_opset15().create("STFT", inputs, {"transpose_frames": transpose_frames}) @nameable_op @@ -348,3 +348,42 @@ def search_sorted( inputs = as_nodes(sorted_sequence, values, name=name) attributes = {"right_mode": right_mode} return _get_node_factory_opset15().create("SearchSorted", inputs, attributes) + + +@nameable_op +def squeeze( + data: NodeInput, + axes: Optional[NodeInput] = None, + allow_axis_skip: bool = False, + name: Optional[str] = None, +) -> Node: + """Perform squeeze operation on input tensor. + + :param data: The node with data tensor. + :param axes: Optional list of integers, indicating the dimensions to squeeze. + Negative indices are supported. One of: input node or array. + :param allow_axis_skip: If true, shape inference results in a dynamic rank, when + selected axis has value 1 in its dynamic range. Used only if axes input + is given. Defaults to false. + :param name: Optional new name for output node. + :return: The new node performing a squeeze operation on input tensor. + + Remove single-dimensional entries from the shape of a tensor. + Takes an optional parameter `axes` with a list of axes to squeeze. + If `axes` is not provided, all the single dimensions will be removed from the shape. + + For example: + + Inputs: tensor with shape [1, 2, 1, 3, 1, 1], axes=[2, 4] + + Result: tensor with shape [1, 2, 3, 1] + """ + if axes is None: + inputs = as_nodes(data, name=name) + else: + inputs = as_nodes(data, axes, name=name) + return _get_node_factory_opset15().create( + "Squeeze", + inputs, + {"allow_axis_skip": allow_axis_skip} + ) diff --git a/src/bindings/python/src/openvino/runtime/opset2/__init__.py b/src/bindings/python/src/openvino/runtime/opset2/__init__.py index 34d0d9b6737709..6624149e157e9e 100644 --- a/src/bindings/python/src/openvino/runtime/opset2/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset2/__init__.py @@ -56,7 +56,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset3/__init__.py b/src/bindings/python/src/openvino/runtime/opset3/__init__.py index 964acb15b34bbc..5bd68912cae807 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset3/__init__.py @@ -65,7 +65,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset4/__init__.py b/src/bindings/python/src/openvino/runtime/opset4/__init__.py index bf57172bed40e2..e7cef8eb216d17 100644 --- a/src/bindings/python/src/openvino/runtime/opset4/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset4/__init__.py @@ -70,7 +70,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index dd90ded374ca11..d1dce289d09941 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -23,3 +23,4 @@ from openvino._pyopenvino.properties.hint import allow_auto_batching from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size from openvino._pyopenvino.properties.hint import kv_cache_precision +from openvino._pyopenvino.properties.hint import activations_scale_factor diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index a6b30bd773001f..564e5f69f5ee14 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -101,6 +101,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching"); wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size"); wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision"); + wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor"); // Submodule intel_cpu py::module m_intel_cpu = diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py index e28a4ad05510f2..131654855b380a 100644 --- a/src/bindings/python/tests/test_graph/test_constant.py +++ b/src/bindings/python/tests/test_graph/test_constant.py @@ -87,7 +87,7 @@ def test_init_with_array(src_dtype, dst_dtype, shared_flag, data_getter): data = np.ascontiguousarray(data) # Create constant from based on numpy dtype or openvino type - ov_const = ops.constant(data, dtype=dst_dtype, shared_memory=shared_flag) + ov_const = ops.constant(data, dst_dtype, shared_memory=shared_flag) # Check shape and element type of Constant class assert isinstance(ov_const, Constant) @@ -842,7 +842,7 @@ def test_get_data_casting_bf16(src_dtype, dst_dtype, copy_flag): ) def test_get_data_casting_packed(src_dtype, ov_type, dst_dtype, copy_flag): data = np.array([[0, 0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1]], dtype=src_dtype) - ov_const = ops.constant(data, dtype=ov_type) + ov_const = ops.constant(value=data, dtype=ov_type) arr = ov_const.get_data(dtype=dst_dtype, copy=copy_flag) if dst_dtype is None: @@ -867,7 +867,7 @@ def test_const_from_tensor(shared_flag): shape = [1, 3, 32, 32] arr = np.ones(shape).astype(np.float32) ov_tensor = Tensor(arr, shape, Type.f32) - ov_const = ops.constant(ov_tensor, shared_memory=shared_flag) + ov_const = ops.constant(tensor=ov_tensor, shared_memory=shared_flag) assert isinstance(ov_const, Constant) assert np.all(list(ov_const.shape) == shape) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index 87787e1e29bc32..f86ea4a18a8ca1 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -315,264 +315,6 @@ def test_lstm_cell_operator(dtype): assert list(node_param.get_output_shape(1)) == [1, 128] -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("op_name", ["lstm", "lstmOpset1"]) -def test_lstm_cell_operator_opset1(dtype, op_name): - batch_size = 1 - input_size = 16 - hidden_size = 128 - - x_shape = [batch_size, input_size] - h_t_shape = [batch_size, hidden_size] - c_t_shape = [batch_size, hidden_size] - w_shape = [4 * hidden_size, input_size] - r_shape = [4 * hidden_size, hidden_size] - b_shape = [4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - node_default = ov_opset1.lstm_cell( - parameter_x, parameter_h_t, parameter_c_t, parameter_w, parameter_r, parameter_b, hidden_size, name=op_name, - ) - - assert node_default.get_type_name() == "LSTMCell" - assert node_default.get_friendly_name() == op_name - assert node_default.get_output_size() == 2 - assert list(node_default.get_output_shape(0)) == [1, 128] - assert list(node_default.get_output_shape(1)) == [1, 128] - - activations = ["tanh", "Sigmoid", "RELU"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 0.5 - - node_param = ov_opset1.lstm_cell( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMCell" - assert node_param.get_output_size() == 2 - assert list(node_param.get_output_shape(0)) == [1, 128] - assert list(node_param.get_output_shape(1)) == [1, 128] - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("op_name", ["lstm", "lstmOpset1"]) -def test_lstm_sequence_operator_bidirectional_opset1(dtype, op_name): - batch_size = 1 - input_size = 16 - hidden_size = 128 - num_directions = 2 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "BIDIRECTIONAL" - with pytest.warns(DeprecationWarning): - node = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - name=op_name, - ) - - assert node.get_type_name() == "LSTMSequence" - assert node.get_friendly_name() == op_name - assert node.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 1.22 - - with pytest.warns(DeprecationWarning): - node_param = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMSequence" - assert node_param.get_output_size() == 3 - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_lstm_sequence_operator_reverse_opset1(dtype): - batch_size = 2 - input_size = 4 - hidden_size = 3 - num_directions = 1 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "REVERSE" - with pytest.warns(DeprecationWarning): - node_default = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - ) - - assert node_default.get_type_name() == "LSTMSequence" - assert node_default.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 1.22 - with pytest.warns(DeprecationWarning): - node_param = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMSequence" - assert node_param.get_output_size() == 3 - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_lstm_sequence_operator_forward_opset1(dtype): - batch_size = 2 - input_size = 4 - hidden_size = 3 - num_directions = 1 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "forward" - with pytest.warns(DeprecationWarning): - node_default = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - ) - - assert node_default.get_type_name() == "LSTMSequence" - assert node_default.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [2.0] - activation_beta = [1.0] - clip = 0.5 - with pytest.warns(DeprecationWarning): - node = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node.get_type_name() == "LSTMSequence" - assert node.get_output_size() == 3 - - def test_gru_cell_operator(): batch_size = 1 input_size = 16 @@ -2492,8 +2234,8 @@ def test_stft(): window = ov.parameter([7], name="window", dtype=np.float32) frame_size = ov.constant(np.array(11, dtype=np.int32)) frame_step = ov.constant(np.array(3, dtype=np.int32)) - transpose_frames = True + transpose_frames = False op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) assert op.get_type_name() == "STFT" @@ -2501,6 +2243,14 @@ def test_stft(): assert op.get_output_element_type(0) == Type.f32 assert op.get_output_shape(0) == [4, 13, 6, 2] + transpose_frames = True + op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) + + assert op.get_type_name() == "STFT" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_shape(0) == [4, 6, 13, 2] + def test_search_sorted(): sorted_sequence = ov.parameter([7, 256, 200, 200], name="sorted", dtype=np.float32) diff --git a/src/bindings/python/tests/test_graph/test_ops_fused.py b/src/bindings/python/tests/test_graph/test_ops_fused.py index bdbf4a1a9f1f9c..2bab743bfd7afb 100644 --- a/src/bindings/python/tests/test_graph/test_ops_fused.py +++ b/src/bindings/python/tests/test_graph/test_ops_fused.py @@ -110,17 +110,6 @@ def test_clamp_operator(): assert list(model.get_output_shape(0)) == [2, 2] -def test_squeeze_operator(): - data_shape = [1, 2, 1, 3, 1, 1] - parameter_data = ov.parameter(data_shape, name="Data", dtype=np.float32) - axes = [2, 4] - model = ov.squeeze(parameter_data, axes) - - assert model.get_type_name() == "Squeeze" - assert model.get_output_size() == 1 - assert list(model.get_output_shape(0)) == [1, 2, 3, 1] - - def test_squared_difference_operator(): x1_shape = [1, 2, 3, 4] x2_shape = [2, 3, 4] diff --git a/src/bindings/python/tests/test_graph/test_squeeze.py b/src/bindings/python/tests/test_graph/test_squeeze.py new file mode 100644 index 00000000000000..869d84a0414841 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_squeeze.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino.runtime.opset1 as ov_opset1 +import openvino.runtime.opset15 as ov_opset15 +import numpy as np +import pytest + + +def test_squeeze_v1_operator(): + data_shape = [1, 2, 1, 3, 1, 1] + parameter_data = ov_opset1.parameter(data_shape, name="Data", dtype=np.float32) + axes = [2, 4] + model = ov_opset1.squeeze(parameter_data, axes) + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 1] + + +@pytest.mark.parametrize(("input_shape", "axes", "allow_axis_skip", "expected_shape"), [ + ((1, 2, 1, 3, 1, 1), [1, 2, 4], True, [1, 2, 3, 1]), + ((1, 2, 1, 3, 1, 1), [1, 2, 4], False, [1, 2, 3, 1]), + ((2, -1, 3), [1], False, [2, 3]) +]) +def test_squeeze_v15_operator(input_shape, axes, allow_axis_skip, expected_shape): + parameter_data = ov_opset15.parameter(input_shape, name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, axes, allow_axis_skip, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == expected_shape + + +def test_squeeze_v15_dynamic_rank_output(): + parameter_data = ov_opset15.parameter((2, -1, 3), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, [1], True, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert model.get_output_partial_shape(0).to_string() == "[...]" + + +def test_squeeze_v15_axes_not_given(): + parameter_data = ov_opset15.parameter((1, 3, 1, 1, 3, 5), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(data=parameter_data, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [3, 3, 5] diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 32eb48f6765f41..6065d72196b44b 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -335,6 +335,11 @@ def test_properties_ro(ov_property_ro, expected_value): ((64, 64),), ), (hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)), + ( + hints.activations_scale_factor, + "ACTIVATIONS_SCALE_FACTOR", + ((0.0, 0.0),), + ), ( intel_cpu.denormals_optimization, "CPU_DENORMALS_OPTIMIZATION", diff --git a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp index 69c148305fb94f..2eb79322b84e28 100644 --- a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp +++ b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp @@ -14,29 +14,75 @@ namespace internal { /// \brief Operator performing Dynamic Quantize class TRANSFORMATIONS_API DynamicQuantize : public ov::op::Op { public: - OPENVINO_OP("DynamicQuantize", "gpu_opset"); + OPENVINO_OP("DynamicQuantize", "ie_internal_opset"); + + /** + * @brief Configuration for the type of quantization applied to the data: + * - Symmetric: Quantization where the zero point is fixed at zero, and the range is symmetric around zero. + * - Asymmetric: Quantization where the zero point is not fixed at zero. + */ + enum class QuantizationType { Symmetric, Asymmetric }; + + /** + * @brief Configuration for how Activations, Scales and Zero Points will be stored in output buffers: + * - Planar: Activations, Scales, and Zero Points are stored in independent buffers. + * - InterleavedScalesZP: Activations are stored in an independent buffer, while Scales and Zero Points (if any) are + * combined in a separate buffer. + */ + enum class OutputStorageType { Planar, InterleavedScalesZP, /* InterleavedActivationsScalesZP */ }; + + /// \brief Structure that specifies attributes for interpolation + struct Attributes { + QuantizationType quantization_type = QuantizationType::Symmetric; + element::Type quantization_dt = element::undefined; + element::Type scale_dt = element::undefined; + element::Type zp_dt = element::undefined; + + std::vector group_sizes = {}; + std::vector scales_zp_output_order = {}; + OutputStorageType output_storage_type = OutputStorageType::Planar; + }; DynamicQuantize() = default; /// \brief Constructs an DynamicQuantize operation. /// /// \param data Input tensor with data - /// \param group_sizes Group sizes for dynamic quantization - /// \param dt_scale Data type for scale output - DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale); + /// \param config Dynamic quantization configuration + DynamicQuantize(const Output& data, const Attributes& attrs); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + const Attributes& get_attrs() const { + return m_attrs; + } + + void set_attrs(Attributes attrs) { + m_attrs = std::move(attrs); + } + const std::vector& get_group_sizes() const { - return m_group_sizes; - }; + return m_attrs.group_sizes; + } + + QuantizationType get_quantization_type() const { + return m_attrs.quantization_type; + } + + OutputStorageType get_output_storage_type() const { + return m_attrs.output_storage_type; + } + + const std::vector& get_scales_zp_output_order() const { + return m_attrs.scales_zp_output_order; + } + static std::vector shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes); + const std::vector& input_shapes); -private: - std::vector m_group_sizes; - element::Type m_dt_scale; +protected: + Attributes m_attrs; }; } // namespace internal diff --git a/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp new file mode 100644 index 00000000000000..c2ebfbc0f3138b --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +/** + * @ingroup ov_transformation_common_api + * @brief Converts Squeeze v15 to Squeeze v0. + */ +class TRANSFORMATIONS_API ConvertSqueeze15ToSqueeze0 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertSqueeze15ToSqueeze0", "0"); + ConvertSqueeze15ToSqueeze0(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/include/transformations/utils/utils.hpp b/src/common/transformations/include/transformations/utils/utils.hpp index fed812ecc864ca..6bcc50e29b3733 100644 --- a/src/common/transformations/include/transformations/utils/utils.hpp +++ b/src/common/transformations/include/transformations/utils/utils.hpp @@ -64,18 +64,10 @@ inline bool has_decompression_converts(const std::shared_ptr& f OPENVINO_DEPRECATED("Plugins should use ov::ISyncInferRequest::find_port") inline std::string create_ie_output_name(const Output& output) { - std::string out_name; - OPENVINO_SUPPRESS_DEPRECATED_START - auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(output.get_tensor()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (!tensor_name.empty()) { - out_name = std::move(tensor_name); - } else { - const auto& prev_layer = output.get_node_shared_ptr(); - out_name = prev_layer->get_friendly_name(); - if (prev_layer->get_output_size() != 1) { - out_name += "." + std::to_string(output.get_index()); - } + const auto& prev_layer = output.get_node_shared_ptr(); + auto out_name = prev_layer->get_friendly_name(); + if (prev_layer->get_output_size() != 1) { + out_name += "." + std::to_string(output.get_index()); } return out_name; } diff --git a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp index 74c0498e9a4425..9d1dfa5e5e3f62 100644 --- a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp +++ b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp @@ -7,62 +7,113 @@ #include "openvino/core/partial_shape.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/op/variadic_split.hpp" -#include "variadic_split_shape_inference.hpp" +#include "openvino/util/common_util.hpp" namespace ov { namespace op { namespace internal { -DynamicQuantize::DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale) - : Op({data}), - m_group_sizes(std::move(group_sizes)), - m_dt_scale(dt_scale) { - OPENVINO_ASSERT(data.get_partial_shape().rank() == m_group_sizes.size(), - "FC input rank should be same as the rank of group_size ", +DynamicQuantize::DynamicQuantize(const Output& data, const Attributes& attrs) : Op({data}), m_attrs(attrs) { + if (m_attrs.scales_zp_output_order.empty()) { + m_attrs.scales_zp_output_order.resize(data.get_partial_shape().size()); + std::iota(m_attrs.scales_zp_output_order.begin(), m_attrs.scales_zp_output_order.end(), 0); + } + + OPENVINO_ASSERT(data.get_partial_shape().rank() == m_attrs.group_sizes.size(), + "DQ input rank should be same as the rank of group_size ", data.get_tensor_ptr()->get_partial_shape().rank(), " / ", - m_group_sizes.size()); - set_output_size(2); + m_attrs.group_sizes.size()); + + OPENVINO_ASSERT(data.get_partial_shape().size() == m_attrs.scales_zp_output_order.size(), + "DQ input rank should be same as the rank of scales and zero points output order)"); + + size_t outputs_number = 2; + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + outputs_number = 3; + + OPENVINO_ASSERT( + (m_attrs.output_storage_type == OutputStorageType::Planar) || + (m_attrs.quantization_type == QuantizationType::Asymmetric && m_attrs.scale_dt == m_attrs.zp_dt), + "Scales and Zero Points should have the same data type to be stored in the single buffer"); + + set_output_size(outputs_number); validate_and_infer_types(); } void DynamicQuantize::validate_and_infer_types() { std::vector input_shapes = {get_input_partial_shape(0)}; - auto out_shapes = shape_infer(this, input_shapes, m_group_sizes); - set_output_type(0, element::i8, out_shapes[0]); - set_output_type(1, m_dt_scale, out_shapes[1]); + auto out_shapes = shape_infer(this, input_shapes); + set_output_type(0, m_attrs.quantization_dt, out_shapes[0]); + set_output_type(1, m_attrs.scale_dt, out_shapes[1]); + + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + set_output_type(2, m_attrs.zp_dt, out_shapes[2]); } std::shared_ptr DynamicQuantize::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), m_group_sizes, m_dt_scale); + return std::make_shared(new_args.at(0), m_attrs); } std::vector DynamicQuantize::shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes) { + const std::vector& input_shapes) { std::vector out_shapes; out_shapes.push_back(input_shapes[0]); auto scale_shape = input_shapes[0]; + const auto& group_sizes = op->m_attrs.group_sizes; OPENVINO_ASSERT(scale_shape.size() == group_sizes.size(), "Scale_shape and group_size are supposed to have same rank: ", scale_shape.size(), " / ", group_sizes.size()); for (size_t i = 0; i < scale_shape.size(); i++) { - if (scale_shape[i].is_dynamic()) + if (scale_shape[i].is_dynamic() || scale_shape[i] == 0) continue; - if (group_sizes[i] == UINT64_MAX) + if (group_sizes[i] == UINT64_MAX) { scale_shape[i] = 1; - else { - scale_shape[i] /= group_sizes[i]; // if group_size is larger than shape, scale_shape will be 1 - scale_shape[i] = std::max(static_cast(scale_shape[i].get_length()), 1); + } else { + scale_shape[i] = ov::util::ceil_div(scale_shape[i].get_length(), static_cast(group_sizes[i])); } } out_shapes.push_back(scale_shape); + + // Add zero points shape, same as the scales + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type == OutputStorageType::Planar) + out_shapes.push_back(scale_shape); + + auto transpose_shape = [](const ov::PartialShape& shape, const std::vector& scales_zp_output_order) { + auto transposed_shape = shape; + for (size_t i = 0; i < scales_zp_output_order.size(); i++) { + OPENVINO_ASSERT(scales_zp_output_order[i] < transposed_shape.size()); + transposed_shape[i] = shape[scales_zp_output_order[i]]; + } + + return transposed_shape; + }; + + // Transpose scales and zero points shapes + const auto& scales_zp_output_order = op->m_attrs.scales_zp_output_order; + for (size_t i = 1; i < out_shapes.size(); i++) { + out_shapes[i] = transpose_shape(out_shapes[i], scales_zp_output_order); + } + + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type != OutputStorageType::Planar) { + // Currently scales and zero points are supposed to be combined over the last dimension only + const auto combine_axis = scales_zp_output_order.empty() ? out_shapes[1].size() - 1 + : scales_zp_output_order[out_shapes[1].size() - 1]; + OPENVINO_ASSERT(group_sizes[combine_axis] != 1); + + out_shapes[1][combine_axis] *= 2; // [scale, zero_point] pairs + } + return out_shapes; } diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 9d46b583a828f2..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -98,6 +98,7 @@ #include "transformations/op_conversions/convert_softmax_downgrade.hpp" #include "transformations/op_conversions/convert_softmax_upgrade.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" #include "transformations/op_conversions/convert_subtract.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" #include "transformations/op_conversions/convert_xor_to_logical_xor.hpp" @@ -235,6 +236,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 221484c75cccde..8d075f4a727758 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -22,6 +22,7 @@ #include "openvino/op/parameter.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" +#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" @@ -280,9 +281,16 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto add_1 = ov::pass::pattern::wrap_type({tanh, add_1_constant}); auto mul_2_constant = ov::pass::pattern::wrap_type(); - auto mul_2 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); - auto mul_3 = ov::pass::pattern::wrap_type({input, mul_2}); + // x * (0.5 * (1 + tanh)) + auto mul_2_1 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); + auto mul_3_1 = ov::pass::pattern::wrap_type({input, mul_2_1}); + + // (x * 0.5) * (1 + tanh) + auto mul_2_2 = ov::pass::pattern::wrap_type({input, mul_2_constant}); + auto mul_3_2 = ov::pass::pattern::wrap_type({add_1, mul_2_2}); + + auto mul_3 = std::make_shared(OutputVector{mul_3_1, mul_3_2}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -298,7 +306,6 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { ov::as_type_ptr(pattern_to_output.at(mul_2_constant).get_node_shared_ptr()); auto add_1_constant_value = ov::as_type_ptr(pattern_to_output.at(add_1_constant).get_node_shared_ptr()); - if (!pow_constant_value || !add_1_constant_value || !mul_0_constant_value || !mul_1_constant_value || !mul_2_constant_value) { return false; @@ -318,18 +325,17 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto gelu = std::make_shared(x_output, op::GeluApproximationMode::TANH); gelu->set_friendly_name(m.get_match_root()->get_friendly_name()); - ov::copy_runtime_info( - { - pattern_to_output.at(pow).get_node_shared_ptr(), - pattern_to_output.at(mul_0).get_node_shared_ptr(), - pattern_to_output.at(mul_1).get_node_shared_ptr(), - pattern_to_output.at(mul_2).get_node_shared_ptr(), - pattern_to_output.at(mul_3).get_node_shared_ptr(), - pattern_to_output.at(tanh).get_node_shared_ptr(), - pattern_to_output.at(add_0).get_node_shared_ptr(), - pattern_to_output.at(add_1).get_node_shared_ptr(), - }, - gelu); + + std::vector> pattern_nodes = + {pow, mul_0, mul_1, tanh, add_0, add_1, mul_2_1, mul_2_2, mul_3_1, mul_3_2}; + std::vector> cp_rt_info_nodes; + for (const auto& pattern_node : pattern_nodes) { + if (pattern_to_output.count(pattern_node)) { + cp_rt_info_nodes.push_back(pattern_to_output.at(pattern_node).get_node_shared_ptr()); + } + } + ov::copy_runtime_info(cp_rt_info_nodes, gelu); + ov::replace_node(m.get_match_root(), gelu); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp index 366ce00894242e..95b754062c13b8 100644 --- a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp @@ -9,6 +9,7 @@ #include "itt.hpp" #include "openvino/op/add.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/convolution.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/multiply.hpp" @@ -26,12 +27,19 @@ ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { auto lora_input_m = any_input(); auto transpose_const1_m = wrap_type(consumers_count(1)); auto transpose1_m = optional({lora_input_m, transpose_const1_m}, consumers_count(1)); + auto read_value1_m = wrap_type(); - auto matmul1_m = wrap_type({transpose1_m, read_value1_m}, consumers_count(1)); + auto convert1_m = optional(read_value1_m, consumers_count(1)); + auto matmul1_m = wrap_type({transpose1_m, convert1_m}, consumers_count(1)); + auto read_value2_m = wrap_type(); - auto multiply_m = wrap_type({matmul1_m, read_value2_m}, consumers_count(1)); + auto convert2_m = optional(read_value2_m, consumers_count(1)); + auto multiply_m = wrap_type({matmul1_m, convert2_m}, consumers_count(1)); + auto read_value3_m = wrap_type(); - auto matmul2_m = wrap_type({multiply_m, read_value3_m}, consumers_count(1)); + auto convert3_m = optional(read_value3_m, consumers_count(1)); + auto matmul2_m = wrap_type({multiply_m, convert3_m}, consumers_count(1)); + auto transpose_const2_m = wrap_type(consumers_count(1)); auto transpose2_m = optional({matmul2_m, transpose_const2_m}, consumers_count(1)); auto main_flow_m = wrap_type({lora_input_m, any_input()}); @@ -41,11 +49,14 @@ ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { const auto& pattern_map = m.get_pattern_value_map(); const auto& lora_input = pattern_map.at(lora_input_m); const auto& matmul1 = pattern_map.at(matmul1_m); - const auto& read_value1 = pattern_map.at(read_value1_m); + const auto& state_1 = + pattern_map.count(convert1_m) ? pattern_map.at(convert1_m) : pattern_map.at(read_value1_m); const auto& multiply = pattern_map.at(multiply_m); - const auto& read_value2 = pattern_map.at(read_value2_m); + const auto& state_2 = + pattern_map.count(convert2_m) ? pattern_map.at(convert2_m) : pattern_map.at(read_value2_m); const auto& matmul2 = pattern_map.at(matmul2_m); - const auto& read_value3 = pattern_map.at(read_value3_m); + const auto& state_3 = + pattern_map.count(convert3_m) ? pattern_map.at(convert3_m) : pattern_map.at(read_value3_m); const auto& main_flow = pattern_map.at(main_flow_m); const auto& add = pattern_map.at(add_m); @@ -70,15 +81,15 @@ ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { pattern_map.count(transpose1_m) ? pattern_map.at(transpose1_m).get_node()->input(0) : matmul1.get_node()->input(0), matmul1.get_node()->input(1), - find_connected_input(multiply.get_node(), read_value2.get_node()), + find_connected_input(multiply.get_node(), state_2.get_node()), matmul2.get_node()->input(1), }; const ov::OutputVector external_connections{ main_flow, lora_input, - read_value1, - read_value2, - read_value3, + state_1, + state_2, + state_3, }; ov::ParameterVector subgraph_parameters; diff --git a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp index 211f351da34024..9a06201f688675 100644 --- a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp @@ -282,6 +282,15 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptrget_input_tensor(0).m_element_type = element::boolean; is_changed = true; } + + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + // If operations created by fusing Switch-Merge sub-graph contain tf_switch_merge_if rt-info + if (if_op->get_rt_info().count("tf_switch_merge_if") && + if_op->get_rt_info()["tf_switch_merge_if"].as() && + if_op->input_value(0).get_partial_shape().rank().is_dynamic()) { + if_op->get_input_tensor(0).m_partial_shape = ov::PartialShape({}); + is_changed = true; + } } else if (ov::as_type_ptr(op)) { is_changed |= inherit_output_shape(op, {0}); is_changed |= inherit_output_type(op, {1}); diff --git a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp index 6ef5536495e663..14d28a3a3fcfa3 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp @@ -54,14 +54,7 @@ bool ov::pass::UnrollIf::run_on_model(const std::shared_ptr& f) { } for (const auto& output_desc : output_descriptions) { std::shared_ptr result = body->get_results()[output_desc->m_body_value_index]; - const auto& in_value = result->input_value(0); - // set output name to Tensor to store it for openvino to cnn conversion - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name( - in_value.get_tensor(), - op::util::create_ie_output_name(if_node->output(output_desc->m_output_index))); - OPENVINO_SUPPRESS_DEPRECATED_END for (const auto& input : if_node->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } diff --git a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp index 431527ade998a1..dde1ab35898b67 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp @@ -115,21 +115,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_output_descriptions()) { - // we need to insert tensor_name to the outputs of TensorIterator if they directly connected to - // Results ops. It's necessary to save original TensorIterator name when we use CNNNetwork. - auto insert_tensor_name = [&](const ov::Output& ti_output, const ov::Output& insert_to) { - auto target_inputs = ti_output.get_target_inputs(); - if (target_inputs.empty() || - std::any_of(target_inputs.begin(), target_inputs.end(), [](const ov::Input& target_inp) { - return ov::as_type(target_inp.get_node()) != nullptr; - })) { - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(insert_to.get_tensor(), - ov::op::util::create_ie_output_name(ti_output)); - OPENVINO_SUPPRESS_DEPRECATED_END - } - }; - if (const auto& concat_desc = std::dynamic_pointer_cast(desc)) { if (!concat_desc) { @@ -155,9 +140,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr(to_concat, concat_desc->m_axis); copy_runtime_info(sub_graph_op, concat); - // set output name to Tensor to store it for openvino to cnn conversion - insert_tensor_name(sub_graph_op->output(concat_desc->m_output_index), concat->output(0)); - // connect the Concat layer to the corresponding TI outputs for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(concat); @@ -167,8 +149,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr result = body_functions[0]->get_results().at(concat_desc->m_body_value_index); const auto& input_to_res = result->get_input_source_output(0); - // set output name to Tensor to store it for openvino to cnn conversion - insert_tensor_name(sub_graph_op->output(concat_desc->m_output_index), input_to_res); for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(input_to_res); @@ -179,11 +159,8 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrm_iteration; iter = iter >= 0 ? iter : num_iter - 1; - std::shared_ptr result = - body_functions[iter]->get_results()[output_desc->m_body_value_index]; - const auto& in_value = result->input_value(0); + auto result = body_functions[iter]->get_results()[output_desc->m_body_value_index]; - insert_tensor_name(sub_graph_op->output(output_desc->m_output_index), in_value); for (const auto& input : sub_graph_op->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 6f5166dfd26760..d8fd21699a5c20 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -62,6 +62,8 @@ bool fuse_type_to_ctc_greedy_decoder_seq_len(const std::shared_ptr& no bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions); + bool extend_select_type(const std::shared_ptr& node, const precisions_map& precisions); bool extend_reverse_type(const std::shared_ptr& node, const precisions_map& precisions); @@ -254,7 +256,6 @@ bool convert_function_precision(const std::shared_ptr& f, // Register internal constants only after fixing input type that could lead to nodes // replacement register_constants(ops); - for (auto& node : ops) { // skip precision sensitive nodes if (skip_precision_sensitive && fp16_compression_is_disabled(node) && has_fp16_compression) @@ -331,12 +332,6 @@ bool convert_function_precision(const std::shared_ptr& f, auto& convert_output_tensor = convert->get_output_tensor(0); convert_output_tensor.set_names(result_input.get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - const auto& legacy_name = ov::descriptor::get_ov_tensor_legacy_name(result_input.get_tensor()); - if (!legacy_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(convert_output_tensor, legacy_name); - } - OPENVINO_SUPPRESS_DEPRECATED_END result_input.set_names({}); result->input(0).replace_source_output(convert->output(0)); @@ -469,7 +464,8 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& {ov::op::v13::Multinomial::get_type_info_static(), fuse_type_to_multinomial_v13}, {ov::op::v0::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, {ov::op::v8::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, - {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}}; + {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}, + {ov::op::v15::SearchSorted::get_type_info_static(), fuse_type_to_search_sorted_v15}}; for (const auto& it : m_additional_type_to_fuse_map) { type_to_fuse[it.first] = it.second; @@ -554,6 +550,18 @@ bool fuse_type_to_unique_v10(const std::shared_ptr& node, const precisions return res; } +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions) { + bool res = false; + if (auto op = ov::as_type_ptr(node)) { + auto it = precisions.find(node->get_output_element_type(0)); + if (it != precisions.end()) { + op->set_output_type_attr(it->second); + res = true; + } + } + return res; +} + bool fuse_type_to_range_v4(const std::shared_ptr& node, const precisions_map& precisions) { auto it = precisions.find(node->get_output_element_type(0)); if (it == precisions.end()) @@ -1000,12 +1008,14 @@ bool extend_select_type(const std::shared_ptr& node, const precisions_ type_relaxed->set_origin_input_type(ov::element::boolean, 0); return true; } else if (auto casted = ov::as_type_ptr(node)) { - auto relaxed_op = - std::make_shared>(*casted, - ov::element::TypeVector{ov::element::boolean}, - ov::element::TypeVector{}); - replace_node(node, relaxed_op); - return true; + if (precisions.count(ov::element::boolean) != 0) { + auto relaxed_op = + std::make_shared>(*casted, + ov::element::TypeVector{ov::element::boolean}, + ov::element::TypeVector{}); + replace_node(node, relaxed_op); + return true; + } } return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp index 50a49202742bde..0e0865b3845f48 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp @@ -57,10 +57,6 @@ ov::pass::ConvertMaxPool8ToMaxPool1::ConvertMaxPool8ToMaxPool1() { ov::copy_runtime_info(maxpool_v8_node, maxpool_v1_node); maxpool_v8_node->clear_control_dependencies(); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(maxpool_v1_node->output(0).get_tensor(), out_name); - OPENVINO_SUPPRESS_DEPRECATED_END - return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp new file mode 100644 index 00000000000000..50701d3d6acd56 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertSqueeze15ToSqueeze0::ConvertSqueeze15ToSqueeze0() { + MATCHER_SCOPE(ConvertSqueeze15ToSqueeze0); + + const auto& squeeze_v15_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + const auto& squeeze_v15 = ov::as_type_ptr(m.get_match_root()); + if (!squeeze_v15 || transformation_callback(squeeze_v15)) { + return false; + } + std::shared_ptr squeeze_v0; + if (squeeze_v15->get_input_size() == 1) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0)); + } else if (squeeze_v15->get_input_size() == 2 && !squeeze_v15->get_allow_axis_skip()) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0), squeeze_v15->input_value(1)); + } else { + return false; + } + squeeze_v0->set_friendly_name(squeeze_v15->get_friendly_name()); + copy_runtime_info(squeeze_v15, squeeze_v0); + replace_node(squeeze_v15, squeeze_v0); + + return true; + }; + + auto m = std::make_shared(squeeze_v15_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp index ba950ad93bf501..ae40ca869c7dcc 100644 --- a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp +++ b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp @@ -122,12 +122,6 @@ void SwapOutputNames(Output output1, Output output2) { const auto node2_output_names = output2.get_names(); output2.set_names(output1.get_names()); output1.set_names(node2_output_names); - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto node2_legacy_output_names = get_ov_tensor_legacy_name(output2.get_tensor()); - set_ov_tensor_legacy_name(output2.get_tensor(), get_ov_tensor_legacy_name(output1.get_tensor())); - set_ov_tensor_legacy_name(output1.get_tensor(), node2_legacy_output_names); - OPENVINO_SUPPRESS_DEPRECATED_END } void SwapFriendlyNames(const NodePtr& node1, const NodePtr& node2) { diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp index 837d2ba6d4597e..dbc54f5492bffa 100644 --- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp @@ -388,6 +388,44 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) { } } +TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value_2) { + { + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = + std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); + + auto mul_1_constant = + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); + + auto tanh = std::make_shared(mul_1); + + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); + + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(input, mul_2_constant); + + auto mul_3 = std::make_shared(add_1, mul_2); + + model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); + manager.register_pass(); + } + + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); + } +} + TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { { auto input = std::make_shared(element::f32, Shape{2, 2}); diff --git a/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp index 6557f763c6b368..2294668f49a6c5 100644 --- a/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp @@ -20,21 +20,25 @@ using namespace ov; static constexpr auto netType = ov::element::f32; -std::pair create_states(const std::vector& shapes) { - ov::OutputVector read_values; +std::pair create_states(const std::vector& shapes, + const ov::element::Type& states_precision = netType) { + ov::OutputVector state_outs; ov::SinkVector assigns; size_t idx = 0; auto create_state = [&](const ov::PartialShape& shape) { - auto variable = - std::make_shared(ov::op::util::VariableInfo{shape, netType, std::to_string(idx++)}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{shape, states_precision, std::to_string(idx++)}); auto read_value = std::make_shared(variable); auto assign = std::make_shared(read_value, variable); - read_values.push_back(read_value); assigns.push_back(assign); + if (states_precision == netType) + state_outs.push_back(read_value); + else + state_outs.push_back(std::make_shared(read_value, netType)); }; for (const auto& shape : shapes) create_state(shape); - return std::make_pair(read_values, assigns); + return std::make_pair(state_outs, assigns); } std::shared_ptr create_lora_subgraph(const ov::Output& main_flow, @@ -134,6 +138,47 @@ TEST_F(LoraSubgraphFusionMatMulTests, StandardPattern) { } } +TEST_F(LoraSubgraphFusionMatMulTests, StandardPatternWithConvert) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}, ov::element::f16); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}, ov::element::f16); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + TEST_F(LoraSubgraphFusionMatMulTests, ReshaffledEltwiseInputs) { { auto param_lora = std::make_shared(netType, shape_x); diff --git a/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp b/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp index 9ff2be2e754e95..fb4891d1590cc7 100644 --- a/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp +++ b/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp @@ -491,14 +491,12 @@ TEST(TransformationTests, UnrollTensorIteratorLSTMCellSingleIterationSingleItera ASSERT_TRUE(res.first) << res.second; } -void collect_legacy_tensor_names(const std::shared_ptr& model, std::vector& holder) { +void collect_tensor_names(const std::shared_ptr& model, + std::vector>& holder) { for (const auto& op : model->get_ordered_ops()) { for (const auto& out : op->outputs()) { - OPENVINO_SUPPRESS_DEPRECATED_START - auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(out.get_tensor()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (!tensor_name.empty() && ov::as_type_ptr(op)) - holder.emplace_back(tensor_name); + if (!out.get_tensor().get_names().empty() && ov::as_type_ptr(op)) + holder.emplace_back(out.get_tensor().get_names()); } } } @@ -538,8 +536,8 @@ TEST(TransformationTests, CheckTensorNamesAfterConvertToTIAndUnrolling) { f = std::make_shared(NodeVector{Y_out, Ho}, ParameterVector{X, Y}); } - std::vector names_before; - collect_legacy_tensor_names(f, names_before); + std::vector> names_before; + collect_tensor_names(f, names_before); pass::Manager m; m.register_pass(); @@ -548,8 +546,8 @@ TEST(TransformationTests, CheckTensorNamesAfterConvertToTIAndUnrolling) { m.run_passes(f); OV_ASSERT_NO_THROW(check_rt_info(f)); - std::vector names_after; - collect_legacy_tensor_names(f, names_after); + std::vector> names_after; + collect_tensor_names(f, names_after); EXPECT_EQ(names_before, names_after); } @@ -605,8 +603,8 @@ TEST(TransformationTests, CheckTensorNamesAfterUnrolling) { f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, Y, Z}); } - std::vector names_before; - collect_legacy_tensor_names(f, names_before); + std::vector> names_before; + collect_tensor_names(f, names_before); pass::Manager m; m.register_pass(); @@ -614,11 +612,9 @@ TEST(TransformationTests, CheckTensorNamesAfterUnrolling) { m.run_passes(f); OV_ASSERT_NO_THROW(check_rt_info(f)); - std::vector names_after; - collect_legacy_tensor_names(f, names_after); + std::vector> names_after; + collect_tensor_names(f, names_after); - EXPECT_NE(names_before, names_after); - EXPECT_EQ(names_after.size(), 2); - EXPECT_EQ(names_after[0], "TensorIterator.0"); - EXPECT_EQ(names_after[1], "TensorIterator.1"); + ASSERT_EQ(names_after.size(), 0); + EXPECT_EQ(names_before, names_after); } diff --git a/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp new file mode 100644 index 00000000000000..f3d90ab2c748bd --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/utils/utils.hpp" +using namespace ov; +using namespace testing; + +namespace { + +enum class IndicesMode { NONE, CONST, PARAM }; + +std::shared_ptr create_v15_model(const IndicesMode indices_mode, + const std::vector indices_const_val, + const bool allow_axis_skip) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data, allow_axis_skip); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset15::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +std::shared_ptr create_v1_model(const IndicesMode indices_mode, const std::vector indices_const_val) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset1::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, false); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, true); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_const_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::CONST, {0, -4, 3}, false); + model_ref = create_v1_model(IndicesMode::CONST, {0, -4, 3}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_dynamic_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, false); + model_ref = create_v1_model(IndicesMode::PARAM, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_unsupported_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, true); +} diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 03095b28d96a74..318f15ab1a64dc 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" +#include "openvino/opsets/opset15.hpp" #include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset4.hpp" #include "openvino/opsets/opset5.hpp" @@ -963,6 +964,34 @@ TEST(TransformationTests, ConvertPrecision_Select) { ASSERT_TRUE(has_type(f)); } +TEST(TransformationTests, ConvertPrecision_Select_Relaxed) { + std::shared_ptr f(nullptr); + { + auto input1 = std::make_shared(element::boolean, Shape{15, 20, 3}); + auto node = std::make_shared(input1); + auto select = std::make_shared(node, input1, input1); + + f = std::make_shared(OutputVector{select}, ParameterVector{input1}); + + // Explicitly setting the element type of a node to a different one to + // test the appearance of TypeRelaxed within Select + node->set_output_type(0, ov::element::u8, node->get_output_partial_shape(0)); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::u8, element::boolean}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); + int counter = 0; + for (const auto& node : f->get_ordered_ops()) + if (std::dynamic_pointer_cast(node)) + ++counter; + ASSERT_EQ(counter, 1); +} + TEST(TransformationTests, ConvertPrecision_TypeRelaxedWithSelect) { std::shared_ptr f(nullptr); { @@ -1008,6 +1037,28 @@ TEST(TransformationTests, ConvertPrecision_TypeRelaxed) { } } +TEST(TransformationTests, ConvertPrecision_SearchSorted) { + std::shared_ptr f(nullptr); + { + auto search_sorted_input = opset15::Constant::create(ov::element::i64, {5}, {1, 2, 3, 4, 5}); + auto indices = std::make_shared(ov::element::i64, Shape{3}); + auto search_sorted = std::make_shared(search_sorted_input, indices); + + auto less_input = opset15::Constant::create(ov::element::i64, {3}, {4, 5, 6}); + auto less = std::make_shared(search_sorted, less_input); + + f = std::make_shared(OutputVector{less}, ParameterVector{indices}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::i64, element::i32}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); +} + TEST(TransformationTests, ConvertPrecision_Variables) { std::shared_ptr f(nullptr); { @@ -2253,11 +2304,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp split->get_output_tensor(0).add_names({"split:0"}); split->get_output_tensor(1).add_names({"split:1"}); split->get_output_tensor(2).add_names({"split:2"}); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(0), "legacy_split:0"); - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(1), "legacy_split:1"); - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(2), "legacy_split:2"); - OPENVINO_SUPPRESS_DEPRECATED_END model = make_shared(split->outputs(), ParameterVector{param_1}); type_to_fuse_map empty_type_to_fuse_map = {}; @@ -2294,11 +2340,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp ASSERT_EQ("split.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("split.1", results[1]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("split.2", results[2]->get_input_node_ptr(0)->get_friendly_name()); - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_EQ("legacy_split:0", ov::descriptor::get_ov_tensor_legacy_name(results[0]->get_input_tensor(0))); - ASSERT_EQ("legacy_split:1", ov::descriptor::get_ov_tensor_legacy_name(results[1]->get_input_tensor(0))); - ASSERT_EQ("legacy_split:2", ov::descriptor::get_ov_tensor_legacy_name(results[2]->get_input_tensor(0))); - OPENVINO_SUPPRESS_DEPRECATED_END } TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { diff --git a/src/common/util/include/openvino/util/weights_path.hpp b/src/common/util/include/openvino/util/weights_path.hpp new file mode 100644 index 00000000000000..11215d8eee1304 --- /dev/null +++ b/src/common/util/include/openvino/util/weights_path.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/util/common_util.hpp" + +namespace ov { +namespace util { + +bool validate_weights_path(std::string& weights_path); + +} // namespace util +} // namespace ov diff --git a/src/common/util/src/weights_path.cpp b/src/common/util/src/weights_path.cpp new file mode 100644 index 00000000000000..56d0b56d643fa8 --- /dev/null +++ b/src/common/util/src/weights_path.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/util/weights_path.hpp" + +bool ov::util::validate_weights_path(std::string& weights_path) { + if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) { + return false; + } + + return true; +} diff --git a/src/core/dev_api/openvino/core/descriptor_tensor.hpp b/src/core/dev_api/openvino/core/descriptor_tensor.hpp index 4c7c0ad8c3c972..9418183a1189fc 100644 --- a/src/core/dev_api/openvino/core/descriptor_tensor.hpp +++ b/src/core/dev_api/openvino/core/descriptor_tensor.hpp @@ -14,14 +14,5 @@ void set_element_type(Tensor& tensor, const element::Type& elemenet_type); // To change Tensor type please change the Parameter type. OPENVINO_API void set_tensor_type(Tensor& tensor, const element::Type& element_type, const PartialShape& pshape); - -OPENVINO_DEPRECATED("get_ov_tensor_legacy_name() is deprecated. Please don't use this function.") -OPENVINO_API -std::string get_ov_tensor_legacy_name(const Tensor& tensor); - -OPENVINO_DEPRECATED("set_ov_tensor_legacy_name() is deprecated. Please don't use this function.") -OPENVINO_API -void set_ov_tensor_legacy_name(Tensor& tensor, const std::string& tensor_name); - } // namespace descriptor } // namespace ov diff --git a/src/core/include/openvino/core/descriptor/tensor.hpp b/src/core/include/openvino/core/descriptor/tensor.hpp index edf2e690e8c860..a1cac56fa64e01 100644 --- a/src/core/include/openvino/core/descriptor/tensor.hpp +++ b/src/core/include/openvino/core/descriptor/tensor.hpp @@ -98,14 +98,11 @@ class OPENVINO_API Tensor { PartialShape m_partial_shape; ov::Tensor m_lower_value, m_upper_value; TensorSymbol m_value_symbol; - std::string m_legacy_name; std::unordered_set m_names; std::unordered_set::const_iterator m_name_it; RTMap m_rt_info; - friend OPENVINO_API std::string get_ov_tensor_legacy_name(const Tensor& tensor); - friend OPENVINO_API void set_ov_tensor_legacy_name(Tensor& tensor, const std::string& tensor_name); friend OPENVINO_API void set_element_type(Tensor& tensor, const element::Type& elemenet_type); friend OPENVINO_API void set_tensor_type(Tensor& tensor, const element::Type& element_type, diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp index e579e8ba83210b..2af2fd272a890d 100644 --- a/src/core/include/openvino/core/type/element_type.hpp +++ b/src/core/include/openvino/core/type/element_type.hpp @@ -129,9 +129,6 @@ class OPENVINO_API Type { // Return element type in string representation std::string to_string() const; - OPENVINO_DEPRECATED("This constructor is deprecated. It will be removed in 2025.0") - Type(size_t bitwidth, bool is_real, bool is_signed, bool is_quantized, const std::string& cname); - private: Type_t m_type{Type_t::undefined}; }; diff --git a/src/core/include/openvino/core/version.hpp b/src/core/include/openvino/core/version.hpp index db1595f7addcb6..79f688795ce37c 100644 --- a/src/core/include/openvino/core/version.hpp +++ b/src/core/include/openvino/core/version.hpp @@ -19,8 +19,8 @@ * @brief Defines OpenVINO patch version */ -#define OPENVINO_VERSION_MAJOR 2024 -#define OPENVINO_VERSION_MINOR 5 +#define OPENVINO_VERSION_MAJOR 2025 +#define OPENVINO_VERSION_MINOR 0 #define OPENVINO_VERSION_PATCH 0 namespace ov { diff --git a/src/core/include/openvino/op/concat.hpp b/src/core/include/openvino/op/concat.hpp index 0e8fa67c54dfae..d32171c9facbc1 100644 --- a/src/core/include/openvino/op/concat.hpp +++ b/src/core/include/openvino/op/concat.hpp @@ -36,17 +36,6 @@ class OPENVINO_API Concat : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - /// \return The concatenation axis. - OPENVINO_DEPRECATED("The function get_concatenation_axis() is deprecated. Will be removed in 2025.0 release. Use " - "get_axis() instead.") - int64_t get_concatenation_axis() const { - return m_concat_axis; - } - OPENVINO_DEPRECATED("The function set_concatenation_axis() is deprecated. Will be removed in 2025.0 release. Use " - "set_axis() instead.") - void set_concatenation_axis(int64_t concatenation_axis) { - m_concat_axis = concatenation_axis; - } /// \return The concatenation axis. int64_t get_axis() const { return m_axis; diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp index 4d68b5ceb7f9c6..28f5a2b1b8bfaf 100644 --- a/src/core/include/openvino/op/lstm_sequence.hpp +++ b/src/core/include/openvino/op/lstm_sequence.hpp @@ -17,103 +17,6 @@ namespace ov { namespace op { -namespace v0 { - -/// -/// \brief Class for lstm sequence node. -/// -/// \note It follows notation and equations defined as in ONNX standard: -/// https://github.com/onnx/onnx/blob/master/docs/Operators.md#LSTM -/// -/// \sa LSTMCell, RNNCell, GRUCell -/// -/// -/// \ingroup ov_ops_cpp_api -class OPENVINO_API OPENVINO_DEPRECATED( - "The class ov::op::v0::LSTMSequence is deprecated. It will be removed in 2025.0 release. Use " - "ov::op::v5::LSTMSequence instead.") LSTMSequence : public util::RNNCellBase { -public: - OPENVINO_OP("LSTMSequence", "opset1", util::RNNCellBase); - LSTMSequence() = default; - - using direction = RecurrentSequenceDirection; - - size_t get_default_output_index() const override { - return no_default_index(); - } - explicit LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const Output& P, - const std::int64_t hidden_size, - const direction lstm_direction, - LSTMWeightsFormat weights_format = LSTMWeightsFormat::IFCO, - const std::vector activations_alpha = {}, - const std::vector activations_beta = {}, - const std::vector activations = {"sigmoid", "tanh", "tanh"}, - const float clip_threshold = 0, - const bool input_forget = false); - - explicit LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const std::int64_t hidden_size, - const direction lstm_direction, - LSTMWeightsFormat weights_format = LSTMWeightsFormat::IFCO, - const std::vector& activations_alpha = {}, - const std::vector& activations_beta = {}, - const std::vector& activations = {"sigmoid", "tanh", "tanh"}, - const float clip_threshold = 0, - const bool input_forget = false); - - void validate_and_infer_types() override; - bool visit_attributes(AttributeVisitor& visitor) override; - - std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - - std::vector get_activations_alpha() const { - return m_activations_alpha; - } - std::vector get_activations_beta() const { - return m_activations_beta; - } - std::vector get_activations() const { - return m_activations; - } - float get_clip_threshold() const { - return m_clip; - } - direction get_direction() const { - return m_direction; - } - void set_direction(const direction& dir) { - m_direction = dir; - } - std::int64_t get_hidden_size() const { - return m_hidden_size; - } - bool get_input_forget() const { - return m_input_forget; - } - LSTMWeightsFormat get_weights_format() const { - return m_weights_format; - } - -private: - direction m_direction; - bool m_input_forget; - LSTMWeightsFormat m_weights_format; -}; -} // namespace v0 - namespace v5 { /// /// \brief Class for lstm sequence node. diff --git a/src/core/include/openvino/op/search_sorted.hpp b/src/core/include/openvino/op/search_sorted.hpp index c370ba46b2f182..efb1f8491e0882 100644 --- a/src/core/include/openvino/op/search_sorted.hpp +++ b/src/core/include/openvino/op/search_sorted.hpp @@ -22,7 +22,15 @@ class OPENVINO_API SearchSorted : public Op { /// \param values Values to search indexs for. /// \param right_mode If False, return the first suitable index that is found for given value. If True, return /// the last such index. - SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode = false); + /// \param output_type The element type of the output tensor. This is purely an implementation flag, which + /// is used to convert the output type for CPU plugin in ConvertPrecision transformation (and potentially other + /// plugins as well). Setting this flag to element::i32 will result in the output tensor of i32 element type. + /// Setting this flag to element::i64 will generally not give any effect, since it will be converted to i32 anyway, + /// at least for CPU plugin. + SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode = false, + const element::Type& output_type = element::i64); void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; @@ -36,8 +44,17 @@ class OPENVINO_API SearchSorted : public Op { m_right_mode = right_mode; } + void set_output_type_attr(const element::Type& output_type) { + m_output_type = output_type; + } + + element::Type get_output_type_attr() const { + return m_output_type; + } + private: bool m_right_mode{}; + element::Type m_output_type = element::i64; }; } // namespace v15 } // namespace op diff --git a/src/core/include/openvino/op/squeeze.hpp b/src/core/include/openvino/op/squeeze.hpp index 8c27f29d66df66..dde456aa2eef47 100644 --- a/src/core/include/openvino/op/squeeze.hpp +++ b/src/core/include/openvino/op/squeeze.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/op/op.hpp" +#include "openvino/op/util/squeeze_base.hpp" namespace ov { namespace op { @@ -12,30 +12,65 @@ namespace v0 { /// \brief Squeeze operation. /// /// \ingroup ov_ops_cpp_api -class OPENVINO_API Squeeze : public Op { +class OPENVINO_API Squeeze : public util::SqueezeBase { public: OPENVINO_OP("Squeeze", "opset1"); Squeeze(); - Squeeze(const Output& data, const Output& axes); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data Squeeze(const Output& data); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + Squeeze(const Output& data, const Output& axes); void validate_and_infer_types() override; bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; - bool has_evaluate() const override; - bool evaluate_lower(TensorVector& outputs) const override; - bool evaluate_upper(TensorVector& outputs) const override; - bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; - bool can_constant_fold(const OutputVector& inputs_values) const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool is_dynamic() const override; - private: Output get_default_axes_input() const; }; } // namespace v0 + +namespace v15 { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API Squeeze : public util::SqueezeBase { +public: + OPENVINO_OP("Squeeze", "opset15"); + + Squeeze(); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const bool allow_axis_skip = false); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip = false); + + void validate_and_infer_types() override; + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + bool visit_attributes(AttributeVisitor& visitor) override; + + bool get_allow_axis_skip() const; + +private: + Output get_default_axes_input() const; + bool m_allow_axis_skip{}; +}; +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/include/openvino/op/stft.hpp b/src/core/include/openvino/op/stft.hpp index 0a8ab80e7a48fe..0b0d8ae9a9e473 100644 --- a/src/core/include/openvino/op/stft.hpp +++ b/src/core/include/openvino/op/stft.hpp @@ -13,7 +13,7 @@ namespace v15 { /// \ingroup ov_ops_cpp_api class OPENVINO_API STFT : public Op { public: - OPENVINO_OP("STFT"); + OPENVINO_OP("STFT", "opset15"); STFT() = default; /// \brief Constructs a STFT operation. diff --git a/src/core/include/openvino/op/util/broadcast_base.hpp b/src/core/include/openvino/op/util/broadcast_base.hpp index 2e500eb611c04c..6300559ac8cf00 100644 --- a/src/core/include/openvino/op/util/broadcast_base.hpp +++ b/src/core/include/openvino/op/util/broadcast_base.hpp @@ -63,6 +63,7 @@ class OPENVINO_API BroadcastBase : public Op { bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(ov::TensorSymbolVector& output_symbols) const override; PartialShape get_result_shape_pdpd(const PartialShape& arg0_shape, const PartialShape& target_shape, diff --git a/src/core/include/openvino/op/util/squeeze_base.hpp b/src/core/include/openvino/op/util/squeeze_base.hpp new file mode 100644 index 00000000000000..50d960824e10d2 --- /dev/null +++ b/src/core/include/openvino/op/util/squeeze_base.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace util { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API SqueezeBase : public Op { +public: + OPENVINO_OP("Squeeze", "util"); + SqueezeBase() = default; + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + SqueezeBase(const Output& data); + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + SqueezeBase(const Output& data, const Output& axes); + + bool has_evaluate() const override; + bool evaluate_lower(TensorVector& outputs) const override; + bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; + bool can_constant_fold(const OutputVector& inputs_values) const override; + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool is_dynamic() const override; +}; +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset1.hpp b/src/core/include/openvino/opsets/opset1.hpp index 80ea1b517e4080..e2a7a159299ad3 100644 --- a/src/core/include/openvino/opsets/opset1.hpp +++ b/src/core/include/openvino/opsets/opset1.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset1 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset1_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset1 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset15_tbl.hpp b/src/core/include/openvino/opsets/opset15_tbl.hpp index 9a49e421f9ad8e..8d12420719bb6b 100644 --- a/src/core/include/openvino/opsets/opset15_tbl.hpp +++ b/src/core/include/openvino/opsets/opset15_tbl.hpp @@ -97,7 +97,7 @@ _OPENVINO_OP_REG(Sqrt, ov::op::v0) _OPENVINO_OP_REG(SpaceToDepth, ov::op::v0) _OPENVINO_OP_REG(Split, ov::op::v1) _OPENVINO_OP_REG(SquaredDifference, ov::op::v0) -_OPENVINO_OP_REG(Squeeze, ov::op::v0) +_OPENVINO_OP_REG(Squeeze, ov::op::v15) _OPENVINO_OP_REG(StridedSlice, ov::op::v1) _OPENVINO_OP_REG(Subtract, ov::op::v1) _OPENVINO_OP_REG(Tan, ov::op::v0) diff --git a/src/core/include/openvino/opsets/opset1_tbl.hpp b/src/core/include/openvino/opsets/opset1_tbl.hpp index 089df5896e664c..3008e450065926 100644 --- a/src/core/include/openvino/opsets/opset1_tbl.hpp +++ b/src/core/include/openvino/opsets/opset1_tbl.hpp @@ -88,7 +88,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/include/openvino/opsets/opset2.hpp b/src/core/include/openvino/opsets/opset2.hpp index 3c0888c0a88487..bcdaa6a1bc354b 100644 --- a/src/core/include/openvino/opsets/opset2.hpp +++ b/src/core/include/openvino/opsets/opset2.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset2 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset2_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset2 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset2_tbl.hpp b/src/core/include/openvino/opsets/opset2_tbl.hpp index 1d7c55ea74549e..e14887aaced94b 100644 --- a/src/core/include/openvino/opsets/opset2_tbl.hpp +++ b/src/core/include/openvino/opsets/opset2_tbl.hpp @@ -57,7 +57,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/include/openvino/opsets/opset3.hpp b/src/core/include/openvino/opsets/opset3.hpp index 29cd23b89c1111..a4e99a21e1c643 100644 --- a/src/core/include/openvino/opsets/opset3.hpp +++ b/src/core/include/openvino/opsets/opset3.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset3 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset3_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset3 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset3_tbl.hpp b/src/core/include/openvino/opsets/opset3_tbl.hpp index 3c34a6999f2696..5c16414a02e987 100644 --- a/src/core/include/openvino/opsets/opset3_tbl.hpp +++ b/src/core/include/openvino/opsets/opset3_tbl.hpp @@ -60,7 +60,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/reference/include/openvino/reference/sequences.hpp b/src/core/reference/include/openvino/reference/sequences.hpp index af1946fcbe72c9..42b4083793b63b 100644 --- a/src/core/reference/include/openvino/reference/sequences.hpp +++ b/src/core/reference/include/openvino/reference/sequences.hpp @@ -380,131 +380,6 @@ void lstm_sequence(const char* X, } } -template -void lstm_sequence_v1(const char* X, - const Shape& X_shape, - const char* H, - const Shape& H_shape, - const char* C, - const Shape& C_shape, - const char* seq_lengths, - const Shape& seq_lengths_shape, - const char* W, - const Shape& W_shape, - const char* R, - const Shape& R_shape, - const char* B, - const Shape& B_shape, - const char* P, - const Shape& P_shape, - char* Y, - char* Ho, - char* Co, - const std::string& activation_f, - const std::string& activation_g, - const std::string& activation_h, - float clip, - const ov::op::LSTMWeightsFormat weight_format, - bool input_forget, - op::RecurrentSequenceDirection direction) { - OutputVector results; - if (direction == op::RecurrentSequenceDirection::FORWARD || direction == op::RecurrentSequenceDirection::REVERSE) { - CellArgs args; - args.activation_f = activation_f; - args.activation_g = activation_g; - args.activation_h = activation_h; - args.clip = clip; - args.weight_format = weight_format; - args.input_forget = input_forget; - std::vector inputs = {X, seq_lengths, H, C, W, R, B, P}; - std::vector outputs = {Y, Ho, Co}; - std::vector shapes = {X_shape, seq_lengths_shape, H_shape, C_shape, W_shape, R_shape, B_shape, P_shape}; - cell_pass(CellType::LSTM_v1, - inputs, - shapes, - outputs, - args, - direction == op::RecurrentSequenceDirection::REVERSE); - } else if (direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - // Split bidirectional case to forward + reverse passes. - // split inputs - std::vector> H_split(2, std::vector(sizeof(T) * shape_size(H_shape) / 2)); - std::vector> C_split(2, std::vector(sizeof(T) * shape_size(C_shape) / 2)); - std::vector> W_split(2, std::vector(sizeof(T) * shape_size(W_shape) / 2)); - std::vector> R_split(2, std::vector(sizeof(T) * shape_size(R_shape) / 2)); - std::vector> B_split(2, std::vector(sizeof(T) * shape_size(B_shape) / 2)); - std::vector> P_split(2, std::vector(sizeof(T) * shape_size(P_shape) / 2)); - char* h_pointers[2] = {H_split[0].data(), H_split[1].data()}; - char* c_pointers[2] = {C_split[0].data(), C_split[1].data()}; - char* w_pointers[2] = {W_split[0].data(), W_split[1].data()}; - char* r_pointers[2] = {R_split[0].data(), R_split[1].data()}; - char* b_pointers[2] = {B_split[0].data(), B_split[1].data()}; - char* p_pointers[2] = {P_split[0].data(), P_split[1].data()}; - reference::split(H, H_shape, sizeof(T), 1, 2, h_pointers); - reference::split(C, C_shape, sizeof(T), 1, 2, c_pointers); - reference::split(W, W_shape, sizeof(T), 0, 2, w_pointers); - reference::split(R, R_shape, sizeof(T), 0, 2, r_pointers); - reference::split(B, B_shape, sizeof(T), 0, 2, b_pointers); - reference::split(P, P_shape, sizeof(T), 0, 2, p_pointers); - std::vector forward_res_y(sizeof(T) * H_shape[0] * H_shape[2] * X_shape[1]); - std::vector reverse_res_y(sizeof(T) * H_shape[0] * H_shape[2] * X_shape[1]); - std::vector> forward_res(2, std::vector(sizeof(T) * H_shape[0] * H_shape[2])); - std::vector> reverse_res(2, std::vector(sizeof(T) * H_shape[0] * H_shape[2])); - - CellArgs args; - args.activation_f = activation_f; - args.activation_g = activation_g; - args.activation_h = activation_h; - args.clip = clip; - args.weight_format = weight_format; - args.input_forget = input_forget; - std::vector shapes = {X_shape, seq_lengths_shape, H_shape, C_shape, W_shape, R_shape, B_shape, P_shape}; - // update H,C,W,R,B,P shapes after split - shapes[2][1] = 1; - shapes[3][1] = 1; - for (size_t i = 4; i < shapes.size(); ++i) { - shapes[i][0] = 1; - } - // forward pass - cell_pass( - CellType::LSTM_v1, - {X, seq_lengths, h_pointers[0], c_pointers[0], w_pointers[0], r_pointers[0], b_pointers[0], p_pointers[0]}, - shapes, - {forward_res_y.data(), forward_res[0].data(), forward_res[1].data()}, - args, - false); - // reverse pass - cell_pass( - CellType::LSTM_v1, - {X, seq_lengths, h_pointers[1], c_pointers[1], w_pointers[1], r_pointers[1], b_pointers[1], p_pointers[1]}, - shapes, - {reverse_res_y.data(), reverse_res[0].data(), reverse_res[1].data()}, - args, - true); - - // Stack together respective outputs from both forward and reverse passes. - std::vector in_shapes_y = {{H_shape[0], 1, X_shape[1], H_shape[2]}, - {H_shape[0], 1, X_shape[1], H_shape[2]}}; - std::vector in_shapes_h_c = {{H_shape[0], 1, H_shape[2]}, {H_shape[0], 1, H_shape[2]}}; - Shape output_shape_y{H_shape[0], 2, X_shape[1], H_shape[2]}; - Shape output_shape_h_c{H_shape[0], 2, H_shape[2]}; - - reference::concat({forward_res_y.data(), reverse_res_y.data()}, Y, in_shapes_y, output_shape_y, 1, sizeof(T)); - reference::concat({forward_res[0].data(), reverse_res[0].data()}, - Ho, - in_shapes_h_c, - output_shape_h_c, - 1, - sizeof(T)); - reference::concat({forward_res[1].data(), reverse_res[1].data()}, - Co, - in_shapes_h_c, - output_shape_h_c, - 1, - sizeof(T)); - } -} - template void gru_sequence(const char* X, const Shape& X_shape, diff --git a/src/core/reference/src/op/stft.cpp b/src/core/reference/src/op/stft.cpp index d3a93db0e1e937..6fd5583be21a75 100644 --- a/src/core/reference/src/op/stft.cpp +++ b/src/core/reference/src/op/stft.cpp @@ -21,8 +21,9 @@ void stft(const float* signal, const int64_t frame_size, const int64_t frame_step, const bool transpose_frames) { - constexpr size_t signal_axis = 1; - const auto batch_size = signal_shape[0]; + const auto is_signal_1D = signal_shape.size() == 1; + const size_t batch_size = is_signal_1D ? 1 : signal_shape[0]; + const size_t signal_axis = is_signal_1D ? 0 : 1; const auto signal_length = signal_shape[signal_axis]; const auto num_frames = static_cast((signal_length - frame_size) / frame_step) + 1; const auto frame_size_dim = static_cast(frame_size); diff --git a/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp index c2b5f1389dee7b..8b0daeb27663fb 100644 --- a/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp +++ b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp @@ -7,32 +7,6 @@ namespace ov { namespace op { -namespace v0 { -OPENVINO_SUPPRESS_DEPRECATED_START -template -std::vector> shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { - OPENVINO_SUPPRESS_DEPRECATED_END - constexpr auto num_gates = 4; - constexpr auto num_state_nodes = 2; - const auto output_shapes = - rnn::seq_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_direction()); - // Validate rank and dimension for P input (the input doesn't exists in the next version of LSTM or other RNN based - // ops) The checks are compatible with the original restrictions of the v0::LSTMSequence - const auto& hidden_size = output_shapes[0][3]; - if (input_shapes.size() > 7 && input_shapes[7].is_static() && hidden_size.is_static()) { - const auto& p_pshape = input_shapes[7]; - NODE_VALIDATION_CHECK(op, p_pshape.rank().compatible(2), "Input tensor P should have rank equal 2."); - NODE_VALIDATION_CHECK(op, - p_pshape[1].compatible(hidden_size * (num_gates - 1)), - "Inorrect shape of P input. Second dimension is: ", - p_pshape[1], - ", expected: ", - hidden_size.get_length() * (num_gates - 1), - "."); - } - return output_shapes; -} -} // namespace v0 namespace v5 { template std::vector> shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { diff --git a/src/core/shape_inference/include/squeeze_shape_inference.hpp b/src/core/shape_inference/include/squeeze_shape_inference.hpp index ee71b5452db1c3..31eeea5d36a9ea 100644 --- a/src/core/shape_inference/include/squeeze_shape_inference.hpp +++ b/src/core/shape_inference/include/squeeze_shape_inference.hpp @@ -11,6 +11,117 @@ namespace ov { namespace op { namespace v0 { +template > +std::vector shape_infer(const Squeeze* op, + const std::vector& input_shapes, + const ITensorAccessor& ta = make_tensor_accessor()) { + using DimType = typename T::value_type; + + const auto number_of_inputs = input_shapes.size(); + OPENVINO_ASSERT(!input_shapes.empty()); + + const auto& arg_shape = input_shapes[0]; + const auto& arg_rank = arg_shape.rank(); + auto output_shapes = std::vector(1); + auto& output_shape = output_shapes[0]; + + std::unique_ptr> unique_axes; + + if (number_of_inputs == 1) { + unique_axes.reset(new std::set()); + } else if (number_of_inputs == 2) { + const auto& axes_shape = input_shapes[1]; + NODE_VALIDATION_CHECK(op, + axes_shape.is_dynamic() || ov::util::is_rank_compatible_any_of(axes_shape.rank(), {0, 1}), + "Second input (axes) should not be of rank higher than 1. Got: ", + axes_shape.rank().get_length()); + + std::vector axes; + if (arg_rank.is_static() && axes_shape.is_static()) { + if (auto axes = get_input_const_data_as(op, 1, ta)) { + // The values of `axes` input are known + ov::util::try_normalize_axes(*axes, arg_rank, *op); + unique_axes.reset(new std::set(axes->cbegin(), axes->cend())); + } else if (arg_rank.get_length() > 0 && shape_size(axes_shape.to_shape()) == 1) { + // The `axes` input is a single element tensor which is unique by definition, deducing output rank + const auto has_squeezable_dim = + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& dim) { + return dim.compatible(1); + }); + if (has_squeezable_dim) { + output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + } else { + output_shape = arg_shape; + } + return output_shapes; + } + } + } else { + // Invalid number of inputs, empty error message for backward compatibility. + NODE_VALIDATION_CHECK(op, false); + } + + if (arg_rank.is_static() && (unique_axes != nullptr)) { + output_shape.resize(0); + if (unique_axes->empty()) { + // if only first input provided or axes are empty remove all dimensions equal to 1. + if (std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& d) { + return d.is_dynamic() && d.compatible(1); + })) { + // we are unsure if dynamic dimensions would be equal to 1 or not, so we set dynamic output rank + output_shape = PartialShape::dynamic(); + return output_shapes; + } else { + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + [](const DimType& dim) { + return !dim.compatible(1); + }); + } + } else { + int64_t idx = 0; + auto rm_axis_iter = unique_axes->cbegin(); + auto rm_axis_end = unique_axes->cend(); + + // Returns true if dimension not squeezable on axis from input axes. + const auto not_squeezable_at_axis = [&rm_axis_iter, &rm_axis_end, &idx](const DimType& dim) { + if ((rm_axis_iter != rm_axis_end) && (*rm_axis_iter == idx++)) { + ++rm_axis_iter; + // Ignore: Pointed by axis, but not squeezable + return !dim.compatible(1); + } else { + return true; + } + }; + + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + not_squeezable_at_axis); + } + } else { + output_shape = PartialShape::dynamic(); + } + return output_shapes; +} +} // namespace v0 + +namespace v15 { +template +bool apply_allow_axis_skip(const ov::op::v15::Squeeze* const op, + const std::unique_ptr>& unique_axes, + const T& arg_shape) { + using DimType = typename T::value_type; + int64_t i{-1}; + + return op->get_allow_axis_skip() && + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [&unique_axes, &i](const DimType& d) { + ++i; + // Squeeze result with dynamic rank if 1 is in range of selected dynamic dimension. + return d.is_dynamic() && d.compatible(1) && unique_axes->find(i) != unique_axes->end(); + }); +} /** * \brief Do Squeeze shape inference. @@ -59,7 +170,7 @@ std::vector shape_infer(const Squeeze* op, return dim.compatible(1); }); if (has_squeezable_dim) { - output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + output_shape = PartialShape::dynamic(); } else { output_shape = arg_shape; } @@ -71,7 +182,9 @@ std::vector shape_infer(const Squeeze* op, NODE_VALIDATION_CHECK(op, false); } - if (arg_rank.is_static() && (unique_axes != nullptr)) { + if (!arg_rank.is_static() || (unique_axes == nullptr) || apply_allow_axis_skip(op, unique_axes, arg_shape)) { + output_shape = PartialShape::dynamic(); + } else if (arg_rank.is_static() && (unique_axes != nullptr)) { output_shape.resize(0); if (unique_axes->empty()) { // if only first input provided or axes are empty remove all dimensions equal to 1. @@ -115,6 +228,6 @@ std::vector shape_infer(const Squeeze* op, } return output_shapes; } -} // namespace v0 +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/shape_inference/include/stft_shape_inference.hpp b/src/core/shape_inference/include/stft_shape_inference.hpp index c856b4f905b7e6..41abf6640ddb96 100644 --- a/src/core/shape_inference/include/stft_shape_inference.hpp +++ b/src/core/shape_inference/include/stft_shape_inference.hpp @@ -25,10 +25,11 @@ std::vector shape_infer(const STFT* op, const auto& frame_size_shape = input_shapes[2]; const auto& frame_step_shape = input_shapes[3]; + const auto signal_shape_rank = signal_shape.rank(); NODE_SHAPE_INFER_CHECK(op, input_shapes, - signal_shape.rank().compatible(2), - "The shape of signal must be 2D [batch, signal_size]."); + signal_shape_rank.compatible(1) || signal_shape_rank.compatible(2), + "The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]."); NODE_SHAPE_INFER_CHECK(op, input_shapes, window_shape.rank().compatible(1), @@ -42,29 +43,36 @@ std::vector shape_infer(const STFT* op, frame_step_shape.rank().compatible(0), "The shape of frame_step must be a scalar."); + if (signal_shape_rank.is_dynamic()) { + return {signal_shape}; + } + const auto frame_size = get_input_const_data_as(op, 2, ta); const auto frame_step = get_input_const_data_as(op, 3, ta); - if (signal_shape.rank().is_dynamic()) { - return {signal_shape}; - } else if (!frame_size || !frame_step) { - return {TRShape{signal_shape[0], TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + const auto is_signal_1D = signal_shape.size() == 1; + if (!frame_size || !frame_step) { + if (is_signal_1D) { + return {TRShape{TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } else { + return {TRShape{signal_shape[0], TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } } const auto& frame_size_val = (*frame_size)[0]; const auto& frame_step_val = (*frame_step)[0]; + const TDim& signal_dim = is_signal_1D ? signal_shape[0] : signal_shape[1]; const bool is_frame_size_in_range = - 0 < frame_size_val && - (signal_shape[1].is_static() ? static_cast(frame_size_val) <= signal_shape[1].get_length() - : frame_size_val <= signal_shape[1].get_interval().get_max_val()); + 0 < frame_size_val && (signal_dim.is_static() ? static_cast(frame_size_val) <= signal_dim.get_length() + : frame_size_val <= signal_dim.get_interval().get_max_val()); NODE_SHAPE_INFER_CHECK(op, input_shapes, is_frame_size_in_range, "Provided frame size is ", frame_size_val, " but must be in range [1, ", - signal_shape[1], + signal_dim, "]."); NODE_SHAPE_INFER_CHECK(op, @@ -84,9 +92,8 @@ std::vector shape_infer(const STFT* op, frame_size_val, "]."); - const auto& batch_dim = signal_shape[0]; const TDim frame_size_dim = static_cast(frame_size_val); - const TDim signal_frame_size_diff = signal_shape[1] - frame_size_dim; + const TDim signal_frame_size_diff = signal_dim - frame_size_dim; TDim fft_samples_dim = (frame_size_val / 2) + 1; // Divsion opeartor for static Dimension of PartialShape can return non static dimension and ceil instead of floor @@ -95,9 +102,13 @@ std::vector shape_infer(const STFT* op, std::vector output_shapes; if (op->get_transpose_frames()) { - output_shapes.emplace_back(TRShape{batch_dim, std::move(fft_samples_dim), std::move(frames_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(fft_samples_dim), std::move(frames_dim), 2}); } else { - output_shapes.emplace_back(TRShape{batch_dim, std::move(frames_dim), std::move(fft_samples_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(frames_dim), std::move(fft_samples_dim), 2}); + } + if (!is_signal_1D) { + const auto& batch_dim = signal_shape[0]; + output_shapes[0].insert(output_shapes[0].begin(), batch_dim); } return output_shapes; } diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp index 31509605003fdb..ae3f7c6e77cd4f 100644 --- a/src/core/src/descriptor/tensor.cpp +++ b/src/core/src/descriptor/tensor.cpp @@ -116,18 +116,9 @@ void ov::descriptor::Tensor::clone_from(const ov::descriptor::Tensor& old) { m_lower_value = old.get_lower_value(); m_upper_value = old.get_upper_value(); m_value_symbol = old.get_value_symbol(); - m_legacy_name = old.m_legacy_name; m_rt_info = old.get_rt_info(); } -std::string ov::descriptor::get_ov_tensor_legacy_name(const ov::descriptor::Tensor& tensor) { - return tensor.m_legacy_name; -} - -void ov::descriptor::set_ov_tensor_legacy_name(ov::descriptor::Tensor& tensor, const std::string& tensor_name) { - tensor.m_legacy_name = tensor_name; -} - void ov::descriptor::set_tensor_type(ov::descriptor::Tensor& tensor, const element::Type& element_type, const PartialShape& pshape) { @@ -148,10 +139,6 @@ std::ostream& ov::descriptor::operator<<(std::ostream& out, const ov::descriptor names += ", "; names += name; } - OPENVINO_SUPPRESS_DEPRECATED_START - if (names.empty()) - names = get_ov_tensor_legacy_name(tensor); - OPENVINO_SUPPRESS_DEPRECATED_END out << "Tensor(" << names << ")"; return out; } diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index 17780f7751d52e..4a8f4598f55cc3 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -273,36 +273,16 @@ bool replace_output_update_name(Output output, const Output& replace }); }; - bool preserve_legacy_output_name = false; if (has_result_consumers(output)) { - preserve_legacy_output_name = true; if (output.get_node()->get_output_size() != 1 || replacement.get_node()->get_output_size() != 1 || is_type(replacement.get_node()) || has_result_consumers(replacement)) { return false; } - } - - OPENVINO_SUPPRESS_DEPRECATED_START - if (preserve_legacy_output_name) { replacement.get_node()->set_friendly_name(output.get_node()->get_friendly_name()); - // Update output tensor name - const auto& output_tensor_name = ov::descriptor::get_ov_tensor_legacy_name(output.get_tensor()); - if (!output_tensor_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), output_tensor_name); - } else { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), output.get_node()->get_friendly_name()); - } } - // Save replacement tensor name before replacement as they will be overridden by the output tensor name - const auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(replacement.get_tensor()); - output.replace(replacement); - // Restore back original replacement tensor name - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), tensor_name); - OPENVINO_SUPPRESS_DEPRECATED_END - copy_runtime_info({replacement.get_node_shared_ptr(), output.get_node_shared_ptr()}, replacement.get_node_shared_ptr()); return true; diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 8b9936b5496e7c..ec9197a5a337cb 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -157,10 +157,6 @@ std::shared_ptr ov::Node::copy_with_new_inputs( } for (size_t i = 0; i < get_output_size(); i++) { clone->get_output_tensor(i).set_names(get_output_tensor(i).get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(clone->get_output_tensor(i), - ov::descriptor::get_ov_tensor_legacy_name(get_output_tensor(i))); - OPENVINO_SUPPRESS_DEPRECATED_END } return clone; } diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp index 3608abe13f3953..a2ea865b2ec84e 100644 --- a/src/core/src/node_output.cpp +++ b/src/core/src/node_output.cpp @@ -73,25 +73,6 @@ void Output::replace(const Output& replacement) { input.replace_source_output(replacement); } replacement.get_tensor_ptr()->add_names(get_tensor_ptr()->get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - // In legacy API we rely on output port tensor name and use it as an input or output name for the model - // Due to m_name is just a string, and we can't store multiple aliases for single output port we have to - // handle two situations during replacement: - // 1. When we replace consumers to Parameter output port we can't change its name, so we skip this part - // 2. In other cases when we replace consumers to another output port we should set name. For example: - // if we eliminate Node2 from Node1->Node2->Result we have to set Node2 output port name to Node1 - // output port name, so the output name for model won't be changed. - // But there are some cases when output name can not be preserved, so the replacement shouldn't be used: - // 1. Parameter->Node->Result - if we eliminate Node we will lose output name - // 2. Node1-->Node2->Result - if we eliminate Node2 we will lose Result output name - // `->Result - // In both of these cases please use replace_output_update_name() method which automatically prevents the - // replacement for cases when we can not preserve input/output names of model. - if (!is_type(replacement.get_node())) { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), - ov::descriptor::get_ov_tensor_legacy_name(get_tensor())); - } - OPENVINO_SUPPRESS_DEPRECATED_END ov::copy_output_runtime_info({*this, replacement}, {replacement}); } diff --git a/src/core/src/op/lstm_sequence.cpp b/src/core/src/op/lstm_sequence.cpp index 4d9afa58f469b3..3f70b74e5ef22a 100644 --- a/src/core/src/op/lstm_sequence.cpp +++ b/src/core/src/op/lstm_sequence.cpp @@ -10,155 +10,6 @@ #include "openvino/op/util/recurrent_sequence.hpp" namespace ov { -OPENVINO_SUPPRESS_DEPRECATED_START -op::v0::LSTMSequence::LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const Output& P, - const std::int64_t hidden_size, - const LSTMSequence::direction lstm_direction, - LSTMWeightsFormat weights_format, - const std::vector activations_alpha, - const std::vector activations_beta, - const std::vector activations, - const float clip_threshold, - const bool input_forget) - : RNNCellBase({X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}, - hidden_size, - clip_threshold, - activations, - activations_alpha, - activations_beta), - m_direction(lstm_direction), - m_input_forget(input_forget), - m_weights_format(weights_format) { - constructor_validate_and_infer_types(); -} - -op::v0::LSTMSequence::LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const std::int64_t hidden_size, - const LSTMSequence::direction lstm_direction, - LSTMWeightsFormat weights_format, - const std::vector& activations_alpha, - const std::vector& activations_beta, - const std::vector& activations, - const float clip_threshold, - const bool input_forget) - : op::v0::LSTMSequence( - X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - Constant::create(element::f32, - Shape{(lstm_direction == LSTMSequence::direction::BIDIRECTIONAL ? 2UL : 1UL), - 3UL * static_cast(hidden_size)}, - std::vector{0.f}), - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget) {} - -bool op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_LSTMSequence_visit_attributes); - visitor.on_attribute("hidden_size", m_hidden_size); - visitor.on_attribute("activations", m_activations); - visitor.on_attribute("activations_alpha", m_activations_alpha); - visitor.on_attribute("activations_beta", m_activations_beta); - visitor.on_attribute("clip", m_clip); - visitor.on_attribute("direction", m_direction); - - visitor.on_attribute("input_forget", m_input_forget); - visitor.on_attribute("weights_format", m_weights_format); - return true; -} - -std::shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const { - OV_OP_SCOPE(v0_LSTMSequence_clone_with_new_inputs); - check_new_args_count(this, new_args); - if (new_args.size() == 8) { - return std::make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - new_args.at(7), // P - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip, - m_input_forget); - } else if (new_args.size() == 7) { - return std::make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip, - m_input_forget); - } else { - OPENVINO_THROW("Incorrect number of new arguments"); - } -} - -void op::v0::LSTMSequence::validate_and_infer_types() { - OV_OP_SCOPE(v0_LSTMSequence_validate_and_infer_types); - auto result_et = element::dynamic; - - // Validate input types and save result for output type - NODE_VALIDATION_CHECK(this, - element::Type::merge(result_et, result_et, get_input_element_type(0)) && - element::Type::merge(result_et, result_et, get_input_element_type(1)) && - element::Type::merge(result_et, result_et, get_input_element_type(2)) && - element::Type::merge(result_et, result_et, get_input_element_type(4)) && - element::Type::merge(result_et, result_et, get_input_element_type(5)) && - element::Type::merge(result_et, result_et, get_input_element_type(6)), - "Element types for X, initial_hidden_state, initial_cell_state, W, R and B inputs do " - "not match."); - - // Mark inputs which are relevant to output parameters - for (size_t i = 0; i <= 6; ++i) - set_input_is_relevant_to_shape(i); - - const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); - auto output_shapes = shape_infer(this, input_shapes); - - // Set output size, type and shape - set_output_type(0, result_et, output_shapes[0]); - set_output_type(1, result_et, output_shapes[1]); - set_output_type(2, result_et, output_shapes[2]); -} -OPENVINO_SUPPRESS_DEPRECATED_END - bool op::v5::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v5_LSTMSequence_visit_attributes); visitor.on_attribute("direction", m_direction); diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp index 8b9bb012b27106..65b5ff31861d8e 100644 --- a/src/core/src/op/search_sorted.cpp +++ b/src/core/src/op/search_sorted.cpp @@ -12,9 +12,13 @@ namespace ov { namespace op { namespace v15 { -SearchSorted::SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode) +SearchSorted::SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode, + const element::Type& output_type) : Op({sorted_sequence, values}), - m_right_mode(right_mode) { + m_right_mode(right_mode), + m_output_type(output_type) { constructor_validate_and_infer_types(); } @@ -23,20 +27,25 @@ void SearchSorted::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_element_type(0).compatible(get_input_element_type(1)), "Sorted sequence and values must have the same element type."); + NODE_VALIDATION_CHECK(this, + m_output_type == element::i32 || m_output_type == element::i64, + "The element type of the last output can only be set to i32 or i64."); + const auto& output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); - set_output_type(0, ov::element::i64, output_shapes[0]); + set_output_type(0, m_output_type, output_shapes[0]); } bool SearchSorted::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v15_SearchSorted_visit_attributes); visitor.on_attribute("right_mode", m_right_mode); + visitor.on_attribute("output_type", m_output_type); return true; } std::shared_ptr SearchSorted::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v15_SearchSorted_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode()); + return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode(), get_output_type_attr()); } } // namespace v15 } // namespace op diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp index 1b34a4e48a4faf..b79165ca4f5543 100644 --- a/src/core/src/op/squeeze.cpp +++ b/src/core/src/op/squeeze.cpp @@ -6,31 +6,19 @@ #include -#include "bound_evaluate.hpp" #include "itt.hpp" -#include "openvino/core/validation_util.hpp" -#include "openvino/op/constant.hpp" #include "squeeze_shape_inference.hpp" namespace ov { namespace op { namespace v0 { -namespace validate { -namespace { +Squeeze::Squeeze() : util::SqueezeBase() {} -bool axes_has_and_set_bound(const Node& op) { - return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); -} -} // namespace -} // namespace validate - -Squeeze::Squeeze() : Op() {} - -Squeeze::Squeeze(const Output& data, const Output& axes) : Op({data, axes}) { +Squeeze::Squeeze(const Output& data, const Output& axes) : util::SqueezeBase(data, axes) { constructor_validate_and_infer_types(); } -Squeeze::Squeeze(const Output& data) : Op({data}) { +Squeeze::Squeeze(const Output& data) : util::SqueezeBase(data) { constructor_validate_and_infer_types(); } @@ -69,62 +57,68 @@ bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const return true; } -bool Squeeze::has_evaluate() const { - OV_OP_SCOPE(v0_Squeeze_has_evaluate); - const auto validate_axes_type = [](const element::Type& et) -> bool { - switch (et) { - case element::i8: - case element::i16: - case element::i32: - case element::i64: - case element::u8: - case element::u16: - case element::u32: - case element::u64: - return true; - default: - return false; - } - }; - - return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); -} +} // namespace v0 -bool Squeeze::evaluate_lower(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_lower); - return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +namespace v15 { +Squeeze::Squeeze() : util::SqueezeBase() {} + +Squeeze::Squeeze(const Output& data, const bool allow_axis_skip) + : util::SqueezeBase(data), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_upper(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_upper); - return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +Squeeze::Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip) + : util::SqueezeBase(data, axes), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_symbol(TensorSymbolVector& output_symbols) const { - return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +std::shared_ptr Squeeze::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v15_Squeeze_clone_with_new_inputs); + check_new_args_count(this, new_args); + + switch (new_args.size()) { + case 1: + return std::make_shared(new_args[0], m_allow_axis_skip); + case 2: + return std::make_shared(new_args[0], new_args[1], m_allow_axis_skip); + default: + OPENVINO_THROW("Incorrect number of new arguments"); + } } -bool Squeeze::can_constant_fold(const OutputVector& inputs_values) const { - return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +void Squeeze::validate_and_infer_types() { + OV_OP_SCOPE(v15_Squeeze_validate_and_infer_types); + + const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); + const auto output_shapes = shape_infer(this, input_shapes); + + set_output_type(0, get_input_element_type(0), output_shapes[0]); } -bool Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - OV_OP_SCOPE(v0_Squeeze_constant_fold); - if (!can_constant_fold(inputs_values)) { - return false; - } +bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v15_Squeeze_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); - if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { - const auto& shape = get_output_shape(0); - output_values[0] = std::make_shared(*data_const, shape); - return true; - } - return false; + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + outputs[0].set_shape(output_shapes.front().get_shape()); + + std::memcpy(outputs[0].data(), inputs[0].data(), outputs[0].get_byte_size()); + return true; } -bool Squeeze::is_dynamic() const { - return get_output_partial_shape(0).is_dynamic(); +bool Squeeze::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v15_Squeeze_visit_attributes); + visitor.on_attribute("allow_axis_skip", m_allow_axis_skip); + return true; } -} // namespace v0 + +bool Squeeze::get_allow_axis_skip() const { + OV_OP_SCOPE(v15_Squeeze_get_allow_axis_skip); + return m_allow_axis_skip; +} +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/src/op/util/broadcast_base.cpp b/src/core/src/op/util/broadcast_base.cpp index 59154e45e2b37a..c2c838afeb38bd 100644 --- a/src/core/src/op/util/broadcast_base.cpp +++ b/src/core/src/op/util/broadcast_base.cpp @@ -471,3 +471,10 @@ bool ov::op::util::BroadcastBase::evaluate_upper(ov::TensorVector& output_values return false; return default_upper_bound_evaluator(this, output_values); } + +bool ov::op::util::BroadcastBase::evaluate_symbol(ov::TensorSymbolVector& output_symbols) const { + if (!input_value(1).get_tensor().has_and_set_bound() || + (get_input_size() > 2 && !input_value(2).get_tensor().has_and_set_bound())) + return false; + return default_symbol_evaluator(this, {0}, output_symbols); +} diff --git a/src/core/src/op/util/squeeze_base.cpp b/src/core/src/op/util/squeeze_base.cpp new file mode 100644 index 00000000000000..be5a20cbb58620 --- /dev/null +++ b/src/core/src/op/util/squeeze_base.cpp @@ -0,0 +1,91 @@ +#include "openvino/op/util/squeeze_base.hpp" + +#include "bound_evaluate.hpp" +#include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" + +namespace ov { +namespace op { + +namespace validate { +namespace { + +bool axes_has_and_set_bound(const Node& op) { + return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); +} +} // namespace +} // namespace validate + +namespace util { +SqueezeBase::SqueezeBase(const Output& data, const Output& axes) : Op({data, axes}) { + constructor_validate_and_infer_types(); +} + +SqueezeBase::SqueezeBase(const Output& data) : Op({data}) { + constructor_validate_and_infer_types(); +} + +bool SqueezeBase::has_evaluate() const { + OV_OP_SCOPE(util_SqueezeBase_has_evaluate); + const auto validate_axes_type = [](const element::Type& et) -> bool { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: + return true; + default: + return false; + } + }; + + return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); +} + +bool SqueezeBase::evaluate_lower(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_lower); + return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_upper(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_upper); + return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_symbol(TensorSymbolVector& output_symbols) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_symbol); + return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +} + +bool SqueezeBase::can_constant_fold(const OutputVector& inputs_values) const { + OV_OP_SCOPE(util_SqueezeBase_can_constant_fold); + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + +bool SqueezeBase::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { + OV_OP_SCOPE(util_SqueezeBase_constant_fold); + if (!can_constant_fold(inputs_values)) { + return false; + } + + if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { + const auto& shape = get_output_shape(0); + output_values[0] = std::make_shared(*data_const, shape); + return true; + } + return false; +} + +bool SqueezeBase::is_dynamic() const { + OV_OP_SCOPE(util_SqueezeBase_is_dynamic); + return get_output_partial_shape(0).is_dynamic(); +} + +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/src/pass/low_latency.cpp b/src/core/src/pass/low_latency.cpp index 7ef4fcd5e84cd8..4b9f606b3e4b28 100644 --- a/src/core/src/pass/low_latency.cpp +++ b/src/core/src/pass/low_latency.cpp @@ -158,26 +158,7 @@ std::vector> process_sequence(const std::sha std::shared_ptr cell; std::vector> new_assigns; bool unroll = false; - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto lstm_seq_v0 = std::dynamic_pointer_cast(op)) { - unroll = need_unroll(op); - new_assigns = replace_with_memory(op, {1, 2}, m_use_const_initializer, to); - if (unroll) { - auto inputs = prepare_inputs(op, 3, to); - cell = to.make(inputs[0], - inputs[1], - inputs[2], - inputs[3], - inputs[4], - inputs[5], - lstm_seq_v0->get_hidden_size(), - lstm_seq_v0->get_activations(), - lstm_seq_v0->get_activations_alpha(), - lstm_seq_v0->get_activations_beta(), - lstm_seq_v0->get_clip_threshold()); - } - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (auto lstm_seq_v5 = std::dynamic_pointer_cast(op)) { + if (auto lstm_seq_v5 = std::dynamic_pointer_cast(op)) { unroll = need_unroll(op); new_assigns = replace_with_memory(op, {1, 2}, m_use_const_initializer, to); if (unroll) { diff --git a/src/core/src/preprocess/preprocess_impls.cpp b/src/core/src/preprocess/preprocess_impls.cpp index 13a4c6f1353312..cbe18a78beb575 100644 --- a/src/core/src/preprocess/preprocess_impls.cpp +++ b/src/core/src/preprocess/preprocess_impls.cpp @@ -385,13 +385,6 @@ void OutputInfo::OutputInfoImpl::build(ov::ResultVector& results) { std::to_string(result->get_input_source_output(0).get_index())); } - OPENVINO_SUPPRESS_DEPRECATED_START - const auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(result->get_input_tensor(0)); - if (!tensor_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(node.get_tensor(), tensor_name); - } - OPENVINO_SUPPRESS_DEPRECATED_END - // Reset friendly name of input node to avoid names collision // when there is at a new node inserted by post-processing steps // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input diff --git a/src/core/src/type/element_type.cpp b/src/core/src/type/element_type.cpp index c0fad90285e3f6..3e664335033d80 100644 --- a/src/core/src/type/element_type.cpp +++ b/src/core/src/type/element_type.cpp @@ -164,51 +164,6 @@ std::vector ov::element::Type::get_known_types() { return rc; } -ov::element::Type::Type(size_t bitwidth, - bool is_real, - bool is_signed, - bool is_quantized, - const std::string& /* cname */) { - const ElementTypes::ElementsMap elements_map{ - {ov::element::Type_t::undefined, - {std::numeric_limits::max(), false, false, false, "undefined", "undefined"}}, - {ov::element::Type_t::dynamic, {0, false, false, false, "dynamic", "dynamic"}}, - {ov::element::Type_t::boolean, {8, false, true, false, "char", "boolean"}}, - {ov::element::Type_t::bf16, {16, true, true, false, "bfloat16", "bf16"}}, - {ov::element::Type_t::f16, {16, true, true, false, "float16", "f16"}}, - {ov::element::Type_t::f32, {32, true, true, false, "float", "f32"}}, - {ov::element::Type_t::f64, {64, true, true, false, "double", "f64"}}, - {ov::element::Type_t::i4, {4, false, true, true, "int4_t", "i4"}}, - {ov::element::Type_t::i8, {8, false, true, true, "int8_t", "i8"}}, - {ov::element::Type_t::i16, {16, false, true, false, "int16_t", "i16"}}, - {ov::element::Type_t::i32, {32, false, true, true, "int32_t", "i32"}}, - {ov::element::Type_t::i64, {64, false, true, false, "int64_t", "i64"}}, - {ov::element::Type_t::u1, {1, false, false, false, "uint1_t", "u1"}}, - {ov::element::Type_t::u2, {2, false, false, false, "uint2_t", "u2"}}, - {ov::element::Type_t::u3, {3, false, false, false, "uint3_t", "u3"}}, - {ov::element::Type_t::u4, {4, false, false, false, "uint4_t", "u4"}}, - {ov::element::Type_t::u6, {6, false, false, false, "uint6_t", "u6"}}, - {ov::element::Type_t::u8, {8, false, false, true, "uint8_t", "u8"}}, - {ov::element::Type_t::u16, {16, false, false, false, "uint16_t", "u16"}}, - {ov::element::Type_t::u32, {32, false, false, false, "uint32_t", "u32"}}, - {ov::element::Type_t::u64, {64, false, false, false, "uint64_t", "u64"}}, - {ov::element::Type_t::nf4, {4, false, false, true, "nfloat4", "nf4"}}, - {ov::element::Type_t::f8e4m3, {8, true, true, true, "f8e4m3", "f8e4m3"}}, - {ov::element::Type_t::f8e5m2, {8, true, true, true, "f8e5m2", "f8e5m2"}}, - {ov::element::Type_t::string, {8 * sizeof(std::string), false, false, false, "string", "string"}}, - {ov::element::Type_t::f4e2m1, {4, true, true, true, "f4e2m1", "f4e2m1"}}, - {ov::element::Type_t::f8e8m0, {4, true, true, true, "f8e8m0", "f8e8m0"}}, - }; - for (const auto& t : elements_map) { - const TypeInfo& info = t.second; - if (bitwidth == info.m_bitwidth && is_real == info.m_is_real && is_signed == info.m_is_signed && - is_quantized == info.m_is_quantized) { - m_type = t.first; - return; - } - } -} - ov::element::Type::Type(const std::string& type) : Type(type_from_string(type)) {} std::string ov::element::Type::c_type_string() const { diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index c3ed58783ac946..89acd7bd1809d0 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -18,6 +18,7 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/threading.cpp if(SUGGEST_OVERRIDE_SUPPORTED) set_source_files_properties(ov_tensor_test.cpp type_prop/multiclass_nms.cpp + type_prop/squeeze.cpp PROPERTIES COMPILE_OPTIONS -Wno-suggest-override) endif() diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index 3a2a590de43edf..65c79dc9432439 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -61,9 +61,9 @@ TEST_P(OpsetTests, opset_dump) { INSTANTIATE_TEST_SUITE_P(opset, OpsetTests, - testing::Values(OpsetTestParams{ov::get_opset1, 110}, - OpsetTestParams{ov::get_opset2, 112}, - OpsetTestParams{ov::get_opset3, 127}, + testing::Values(OpsetTestParams{ov::get_opset1, 109}, + OpsetTestParams{ov::get_opset2, 111}, + OpsetTestParams{ov::get_opset3, 126}, OpsetTestParams{ov::get_opset4, 137}, OpsetTestParams{ov::get_opset5, 145}, OpsetTestParams{ov::get_opset6, 152}, diff --git a/src/core/tests/type_prop/lstm_sequence.cpp b/src/core/tests/type_prop/lstm_sequence.cpp index 698206e25a5562..80da26340fd144 100644 --- a/src/core/tests/type_prop/lstm_sequence.cpp +++ b/src/core/tests/type_prop/lstm_sequence.cpp @@ -89,42 +89,6 @@ shared_ptr lstm_seq_direction_initialization(const recurre return lstm_sequence; } - -OPENVINO_SUPPRESS_DEPRECATED_START -shared_ptr lstm_seq_v0_tensor_initialization(const recurrent_sequence_parameters& param) { - auto batch_size = param.batch_size; - auto seq_length = param.seq_length; - auto input_size = param.input_size; - auto num_directions = param.num_directions; - auto hidden_size = param.hidden_size; - auto et = param.et; - - const auto X = make_shared(et, PartialShape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(et, PartialShape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(et, PartialShape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(et, PartialShape{batch_size}); - const auto W = make_shared(et, PartialShape{num_directions, hidden_size * 4, input_size}); - const auto R = make_shared(et, PartialShape{num_directions, hidden_size * 4, hidden_size}); - const auto B = make_shared(et, PartialShape{num_directions, hidden_size * 4}); - const auto P = make_shared(et, PartialShape{num_directions, hidden_size * 3}); - - const auto lstm_sequence = make_shared(); - - lstm_sequence->set_argument(0, X); - lstm_sequence->set_argument(1, initial_hidden_state); - lstm_sequence->set_argument(2, initial_cell_state); - lstm_sequence->set_argument(3, sequence_lengths); - lstm_sequence->set_argument(4, W); - lstm_sequence->set_argument(5, R); - lstm_sequence->set_argument(6, B); - lstm_sequence->set_argument(7, P); - - return lstm_sequence; -} -OPENVINO_SUPPRESS_DEPRECATED_END - } // namespace TEST(type_prop, lstm_sequence_forward) { @@ -173,56 +137,6 @@ TEST(type_prop, lstm_sequence_forward) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v0_forward) { - const size_t batch_size = 8; - const size_t num_directions = 1; - const size_t seq_length = 6; - const size_t input_size = 4; - const size_t hidden_size = 128; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = op::RecurrentSequenceDirection::FORWARD; - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - P, - hidden_size, - lstm_direction); - - EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); - EXPECT_EQ(lstm_sequence->get_direction(), op::RecurrentSequenceDirection::FORWARD); - EXPECT_TRUE(lstm_sequence->get_activations_alpha().empty()); - EXPECT_TRUE(lstm_sequence->get_activations_beta().empty()); - EXPECT_EQ(lstm_sequence->get_activations()[0], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_activations()[1], "tanh"); - EXPECT_EQ(lstm_sequence->get_activations()[2], "tanh"); - EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); - EXPECT_EQ(lstm_sequence->outputs().size(), 3); - EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{batch_size, num_directions, seq_length, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(type_prop, lstm_sequence_bidirectional) { const size_t batch_size = 24; const size_t num_directions = 2; @@ -273,68 +187,6 @@ TEST(type_prop, lstm_sequence_bidirectional) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v0_bidirectional) { - const size_t batch_size = 24; - const size_t num_directions = 2; - const size_t seq_length = 12; - const size_t input_size = 8; - const size_t hidden_size = 256; - const bool input_forget = true; - const ov::op::LSTMWeightsFormat weights_format = ov::op::LSTMWeightsFormat::FICO; - const float clip_threshold = 3.5f; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = opset5::LSTMSequence::direction::BIDIRECTIONAL; - const std::vector activations_alpha = {2.7f, 7.0f, 32.367f}; - const std::vector activations_beta = {0.0f, 5.49f, 6.0f}; - const std::vector activations = {"tanh", "sigmoid", "sigmoid"}; - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget); - - EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); - EXPECT_EQ(lstm_sequence->get_direction(), opset5::LSTMSequence::direction::BIDIRECTIONAL); - EXPECT_EQ(lstm_sequence->get_activations_alpha(), activations_alpha); - EXPECT_EQ(lstm_sequence->get_activations_beta(), activations_beta); - EXPECT_EQ(lstm_sequence->get_activations()[0], "tanh"); - EXPECT_EQ(lstm_sequence->get_activations()[1], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_activations()[2], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_clip_threshold(), 3.5f); - EXPECT_EQ(lstm_sequence->get_input_forget(), true); - EXPECT_EQ(lstm_sequence->get_weights_format(), ov::op::LSTMWeightsFormat::FICO); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{batch_size, num_directions, seq_length, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(type_prop, lstm_sequence_dynamic_batch_size) { recurrent_sequence_parameters param; @@ -573,214 +425,3 @@ TEST(type_prop, lstm_sequence_invalid_input_direction_num_mismatch) { check_error(op::RecurrentSequenceDirection::FORWARD, 2); check_error(op::RecurrentSequenceDirection::REVERSE, 2); } - -OPENVINO_SUPPRESS_DEPRECATED_START -TEST(type_prop, lstm_sequence_v0_dynamic_num_directions) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = Dimension::dynamic(); - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, 1, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_seq_length) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = Dimension::dynamic(); - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_hidden_size) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = Dimension::dynamic(); - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_inputs) { - recurrent_sequence_parameters param; - - param.batch_size = Dimension::dynamic(); - param.input_size = Dimension::dynamic(); - param.hidden_size = Dimension::dynamic(); - param.num_directions = Dimension::dynamic(); - param.seq_length = Dimension::dynamic(); - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, 1, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_dimension) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 2; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - auto invalid_rank0_tensor = make_shared(param.et, PartialShape{}); - - // Validate invalid rank0 tensor for all inputs: X, initial_hidden_state, initial_cell_state W, - // R, B - for (size_t i = 0; i < lstm_sequence->get_input_size(); i++) { - lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->set_argument(i, invalid_rank0_tensor); - ASSERT_THROW(lstm_sequence->validate_and_infer_types(), ov::AssertFailure) - << "LSTMSequence node was created with invalid data."; - } -} - -TEST(type_prop, lstm_sequence_v0_input_dynamic_rank) { - recurrent_sequence_parameters param; - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto op = lstm_seq_tensor_initialization(param); - auto dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); - - for (size_t i = 0; i < op->get_input_size(); i++) { - auto op = lstm_seq_v0_tensor_initialization(param); - op->set_argument(i, dynamic_tensor); - op->validate_and_infer_types(); - if (i == 0) { // X input - EXPECT_EQ(op->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, -1, param.hidden_size})); - } else { - EXPECT_EQ(op->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - } - EXPECT_EQ(op->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(op->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - - EXPECT_EQ(op->get_output_element_type(0), param.et); - EXPECT_EQ(op->get_output_element_type(1), param.et); - } -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_direction) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 3; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING( - error.what(), - std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); - } - - param.num_directions = 2; // 2 is also not allowed for default 'm_direction' = FORWARD - lstm_sequence = lstm_seq_v0_tensor_initialization(param); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING( - error.what(), - std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); - } -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_P) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - auto P = make_shared(element::f32, PartialShape{param.hidden_size * 5}); - lstm_sequence->set_argument(7, P); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), "Input tensor P should have rank equal 2"); - } - P = make_shared(element::f32, PartialShape{param.num_directions, param.hidden_size * 5}); - lstm_sequence->set_argument(7, P); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), "Inorrect shape of P input. Second dimension is: 1280, expected: 768"); - } - OPENVINO_SUPPRESS_DEPRECATED_END -} diff --git a/src/core/tests/type_prop/rnn_seq_base.cpp b/src/core/tests/type_prop/rnn_seq_base.cpp index 9b3c1b7f8f25df..db38622333d67f 100644 --- a/src/core/tests/type_prop/rnn_seq_base.cpp +++ b/src/core/tests/type_prop/rnn_seq_base.cpp @@ -83,11 +83,7 @@ class RNNSeqBaseTest : public TypePropOpTest { return std::make_shared(X, H_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); } - OPENVINO_SUPPRESS_DEPRECATED_START - template < - typename T = TOp, - typename std::enable_if::value || std::is_same::value, - bool>::type = true> + template ::value, bool>::type = true> std::shared_ptr make_rnn_seq_based_op(RNNSeqParams& p, bool use_default_ctor = false) { p.gates_count = 4; p.outputs_size = 3; @@ -109,19 +105,12 @@ class RNNSeqBaseTest : public TypePropOpTest { op->set_direction(p.direction); op->set_hidden_size(p.hidden_size.get_max_length()); auto inputs = OutputVector{X, H_t, C_t, sequence_lengths, W, R, B}; - if (ov::is_type(op)) { - const auto P = - make_shared(p.et, - PartialShape{p.num_directions, p.hidden_size * (p.gates_count - 1)}); - inputs.push_back(P); - } op->set_arguments(inputs); op->validate_and_infer_types(); return op; } return std::make_shared(X, H_t, C_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); } - OPENVINO_SUPPRESS_DEPRECATED_END }; TYPED_TEST_SUITE_P(RNNSeqBaseTest); @@ -284,9 +273,7 @@ REGISTER_TYPED_TEST_SUITE_P(RNNSeqBaseTest, interval_symbols_dims_shape_infer_REVERSE, interval_symbols_dims_shape_infer_BIDIRECTIONAL); -OPENVINO_SUPPRESS_DEPRECATED_START -using RNNSeqBaseTypes = Types; -OPENVINO_SUPPRESS_DEPRECATED_END +using RNNSeqBaseTypes = Types; INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, RNNSeqBaseTest, RNNSeqBaseTypes); } // namespace rnn_seq_test diff --git a/src/core/tests/type_prop/squeeze.cpp b/src/core/tests/type_prop/squeeze.cpp index c7d81fd97c2786..7be05de1876d9f 100644 --- a/src/core/tests/type_prop/squeeze.cpp +++ b/src/core/tests/type_prop/squeeze.cpp @@ -7,193 +7,261 @@ #include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/op/broadcast.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/unsqueeze.hpp" #include "sequence_generator.hpp" using namespace std; using namespace ov; using namespace testing; -TEST(type_prop, squeeze_axes_invalid_value) { +namespace { + +template +class SqueezelOperator : public TypePropOpTest {}; + +using SqueezeTypes = ::testing::Types; + +TYPED_TEST_SUITE(SqueezelOperator, SqueezeTypes); + +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_value) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::u64, Shape{2}, vector{0, 2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 3, 4})); } -TEST(type_prop, squeeze_single_input) { +TYPED_TEST(SqueezelOperator, squeeze_single_input) { auto param = make_shared(element::f32, PartialShape{1, -1, 3, 4}); - auto s = make_shared(param); - EXPECT_EQ(s->get_output_partial_shape(0), PartialShape::dynamic()); + const auto squeeze = this->make_op(param); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_axes_invalid_rank) { +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_rank) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::i32, Shape{2, 1}, vector{0, 2}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), NodeValidationFailure, HasSubstr("Second input (axes) should not be of rank higher than 1.")); } -TEST(type_prop, squeeze_incorrect_negative_axes) { +TYPED_TEST(SqueezelOperator, squeeze_incorrect_negative_axes) { auto param = make_shared(element::f32, Shape{1, 4, 1, 4, 1, 8}); auto axes_node = make_shared(element::i64, Shape{2}, vector{-6, -10}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), ov::Exception, HasSubstr("Axis -10 out of the tensor rank range")); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 2, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 2, 4})); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { auto param = std::make_shared(ov::element::f32, PartialShape{}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, -1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{3}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + using SqueezeTypePropTestParam = std::tuple, // Squeeze axis PartialShape // Expected shape @@ -288,26 +356,44 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_default_axes, TEST_P(SqueezeTest, partial_shape_dimension_propagation_const_axis_i32) { const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_parameter_axes_no_data) { const auto axes_node = std::make_shared(element::u64, PartialShape{Shape{axes.size()}}); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_dynamic_axes) { const auto axes_node = std::make_shared(element::u64, PartialShape::dynamic()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } } TEST_P(SqueezeTest, symbols_propagation) { @@ -321,9 +407,14 @@ TEST_P(SqueezeTest, symbols_propagation) { param = make_shared(element::f32, p_shape); const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } } using SqueezeShapeTests = SqueezeTest; @@ -336,10 +427,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, TEST_P(SqueezeShapeTests, shape_dimension_propagation_const_axis_i64) { param = std::make_shared(element::f64, p_shape.to_shape()); const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f64); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } } using SqueezeNoAxesTest = SqueezeTest; @@ -350,10 +447,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, PrintToStringParamName()); TEST_P(SqueezeNoAxesTest, partial_shape_dimension_propagation_no_axes) { - const auto squeeze = std::make_shared(param); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeScalarAxisTest = SqueezeTest; @@ -368,25 +471,35 @@ INSTANTIATE_TEST_SUITE_P( TEST_P(SqueezeScalarAxisTest, axis_value_as_vector) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeScalarAxisTest, axis_value_as_integer) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes.front()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeBoundTest = UnSqueezeBoundTest; -INSTANTIATE_TEST_SUITE_P( - type_prop_bounds_propagate, - SqueezeBoundTest, +const auto test_values_in = Values(std::make_tuple(PartialShape::dynamic(6), PartialShape::dynamic(1)), std::make_tuple(PartialShape{Dimension(-1)}, PartialShape{Dimension(-1)}), std::make_tuple(PartialShape{Dimension::dynamic(), 8}, PartialShape{Dimension::dynamic()}), @@ -394,34 +507,136 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(PartialShape{Dimension(20, -1), Dimension::dynamic()}, PartialShape{{20, -1}}), std::make_tuple(PartialShape{Dimension(-1, 5), Dimension::dynamic()}, PartialShape{Dimension(-1, 5)}), std::make_tuple(PartialShape{15}, PartialShape{15}), - std::make_tuple(PartialShape{2, 6}, PartialShape{2})), - PrintToStringParamName()); + std::make_tuple(PartialShape{2, 6}, PartialShape{2})); + +INSTANTIATE_TEST_SUITE_P(type_prop_bounds_propagate, SqueezeBoundTest, test_values_in, PrintToStringParamName()); /** * \brief Check symbol and dynamic value propagation. * * Test use evaluate symbol, lower/upper. */ -TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value) { +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v0) { PartialShape symboled_shape = PartialShape{p_shape}; in_symbols = set_shape_symbols(symboled_shape); - constexpr auto et = element::i64; - const auto symboled_param = std::make_shared(et, symboled_shape); - const auto symboled_shape_of = std::make_shared(symboled_param); + const auto squeeze = create_squeeze(symboled_shape); + const auto bc = std::make_shared(param, squeeze); + + EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); + const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); + EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); +} - const auto zero = std::vector{0}; - const auto axis = std::make_shared(et, Shape{}, zero); - const auto indices = std::make_shared(et, Shape{}, zero); - const auto gather = std::make_shared(symboled_shape_of, indices, axis); - const auto axis_1 = std::make_shared(et, Shape{2}, std::vector{0, 1}); - const auto unsqueeze = std::make_shared(gather, axis_1); - const auto squeeze = std::make_shared(unsqueeze, axis); +/** + * \brief Check symbol and dynamic value propagation. + * + * Test use evaluate symbol, lower/upper. + */ +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v15) { + PartialShape symboled_shape = PartialShape{p_shape}; + + in_symbols = set_shape_symbols(symboled_shape); + const auto squeeze = create_squeeze(symboled_shape); const auto bc = std::make_shared(param, squeeze); EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); } + +using SqueezeAxesDynamicRankTestParam = decltype(std::tuple_cat(SqueezeTypePropTestParam{}, std::make_tuple(false))); +class SqueezeAxesDynamicRank : public ::testing::TestWithParam { +protected: + ov::PartialShape p_shape{}, exp_shape{}; + std::vector axes{}; + bool allow_axis_skip{}; +}; + +INSTANTIATE_TEST_SUITE_P( + SqueezeAxesDynamicRankTests, + SqueezeAxesDynamicRank, + ::testing::Values( + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape{1, 2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, true), + + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, true), + + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, false), + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, true), + + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, false), + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, true))); + +TEST_P(SqueezeAxesDynamicRank, squeeze_axes_dynamic_rank_param) { + const auto& params = GetParam(); + p_shape = std::get<0>(params); + axes = std::get<1>(params); + exp_shape = std::get<2>(params); + allow_axis_skip = std::get<3>(params); + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::u64, Shape{axes.size()}, axes); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_single_axis) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, Shape{1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_axes) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, PartialShape{-1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +} // namespace diff --git a/src/core/tests/type_prop/stft.cpp b/src/core/tests/type_prop/stft.cpp index 702b000d2dd560..2969af4e5a43bd 100644 --- a/src/core/tests/type_prop/stft.cpp +++ b/src/core/tests/type_prop/stft.cpp @@ -104,6 +104,13 @@ INSTANTIATE_TEST_SUITE_P( type_prop_stft_shape, TypePropSTFTTestP, testing::Values( + std::make_tuple(PartialShape{16}, PartialShape{16}, 16, 16, true, PartialShape{9, 1, 2}), + std::make_tuple(PartialShape{48}, PartialShape{16}, 16, 16, false, PartialShape{3, 9, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, false, PartialShape{16, 6, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, true, PartialShape{6, 16, 2}), + std::make_tuple(PartialShape{48}, PartialShape{8}, 16, 4, true, PartialShape{9, 9, 2}), + std::make_tuple(PartialShape{{48, 56}}, PartialShape{7}, 11, 3, true, PartialShape{6, {13, 16}, 2}), + std::make_tuple(PartialShape{-1}, PartialShape{7}, 11, 3, true, PartialShape{6, {1, -1}, 2}), std::make_tuple(PartialShape{1, 16}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 1, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 3, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, false, PartialShape{1, 3, 9, 2}), @@ -139,16 +146,16 @@ TEST_F(TypePropSTFTTest, signal_incompatible_shape) { const auto frame_size = std::make_shared(element::i64, PartialShape{}); const auto frame_step = std::make_shared(element::i64, PartialShape{}); { - const auto signal = std::make_shared(element::f32, PartialShape{48}); + const auto signal = std::make_shared(element::f32, PartialShape{}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } { const auto signal = std::make_shared(element::f32, PartialShape{-1, 4, 48}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } } diff --git a/src/core/tests/visitors/op/lstm_sequence.cpp b/src/core/tests/visitors/op/lstm_sequence.cpp index 0d386ba7bc4ccf..f9819d9ae84bc9 100644 --- a/src/core/tests/visitors/op/lstm_sequence.cpp +++ b/src/core/tests/visitors/op/lstm_sequence.cpp @@ -61,63 +61,3 @@ TEST(attributes, lstm_sequence_op) { EXPECT_EQ(g_lstm_sequence->get_clip(), lstm_sequence->get_clip()); EXPECT_EQ(g_lstm_sequence->get_direction(), lstm_sequence->get_direction()); } - -OPENVINO_SUPPRESS_DEPRECATED_START -TEST(attributes, lstm_sequence_v1_op) { - NodeBuilder::opset().insert(); - - const size_t batch_size = 4; - const size_t num_directions = 2; - const size_t seq_length = 8; - const size_t input_size = 16; - const size_t hidden_size = 64; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = - make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; - const ov::op::LSTMWeightsFormat weights_format = ov::op::LSTMWeightsFormat::FICO; - const std::vector activations_alpha = {1, 2, 3}; - const std::vector activations_beta = {4, 5, 6}; - const std::vector activations = {"tanh", "sigmoid", "tanh"}; - const float clip_threshold = 0.5f; - const bool input_forget = true; - - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - P, - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget); - NodeBuilder builder(lstm_sequence, {X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}); - auto g_lstm_sequence = ov::as_type_ptr(builder.create()); - - EXPECT_EQ(g_lstm_sequence->get_hidden_size(), lstm_sequence->get_hidden_size()); - EXPECT_EQ(g_lstm_sequence->get_activations(), lstm_sequence->get_activations()); - EXPECT_EQ(g_lstm_sequence->get_activations_alpha(), lstm_sequence->get_activations_alpha()); - EXPECT_EQ(g_lstm_sequence->get_activations_beta(), lstm_sequence->get_activations_beta()); - EXPECT_EQ(g_lstm_sequence->get_clip_threshold(), lstm_sequence->get_clip_threshold()); - EXPECT_EQ(g_lstm_sequence->get_direction(), lstm_sequence->get_direction()); - EXPECT_EQ(g_lstm_sequence->get_input_forget(), lstm_sequence->get_input_forget()); - EXPECT_EQ(g_lstm_sequence->get_weights_format(), lstm_sequence->get_weights_format()); -} -OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/core/tests/visitors/op/sorted_search.cpp b/src/core/tests/visitors/op/sorted_search.cpp index 860c9528d0e9aa..10d544527f3714 100644 --- a/src/core/tests/visitors/op/sorted_search.cpp +++ b/src/core/tests/visitors/op/sorted_search.cpp @@ -22,7 +22,7 @@ TEST(attributes, search_sorted_op) { auto g_op = ov::as_type_ptr(builder.create()); // attribute count - const auto expected_attr_count = 1; + const auto expected_attr_count = 2; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); // space_to_depth attributes diff --git a/src/core/tests/visitors/op/squeeze.cpp b/src/core/tests/visitors/op/squeeze.cpp index 6eb1674b26329a..be596a5fb1dc67 100644 --- a/src/core/tests/visitors/op/squeeze.cpp +++ b/src/core/tests/visitors/op/squeeze.cpp @@ -6,7 +6,16 @@ #include "unary_ops.hpp" +namespace v0 { using Types = ::testing::Types, UnaryOperatorType>; INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v0 + +namespace v15 { +using Types = ::testing::Types, + UnaryOperatorTypeWithAttribute>; + +INSTANTIATE_TYPED_TEST_SUITE_P(visitor_single_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v15 diff --git a/src/core/tests/visitors/op/unary_ops.hpp b/src/core/tests/visitors/op/unary_ops.hpp index 3bef2429983e9f..6cc2afda62e253 100644 --- a/src/core/tests/visitors/op/unary_ops.hpp +++ b/src/core/tests/visitors/op/unary_ops.hpp @@ -9,12 +9,17 @@ #include "openvino/op/parameter.hpp" #include "visitors/visitors.hpp" -template +template class UnaryOperatorType { public: using op_type = T; static constexpr ov::element::Type_t element_type = ELEMENT_TYPE; + static constexpr int expected_attr_count = ATTRIBUTES_COUNT; }; + +template +using UnaryOperatorTypeWithAttribute = UnaryOperatorType; + template class UnaryOperatorVisitor : public testing::Test {}; @@ -43,7 +48,7 @@ TYPED_TEST_P(UnaryOperatorVisitor, No_Attribute_4D) { EXPECT_NO_THROW(auto g_op_func = ov::as_type_ptr(builder.create())); - const auto expected_attr_count = 0; + const auto expected_attr_count = TypeParam::expected_attr_count; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); } diff --git a/src/frontends/jax/src/op/erfc.cpp b/src/frontends/jax/src/op/erfc.cpp new file mode 100644 index 00000000000000..5a38577f868d35 --- /dev/null +++ b/src/frontends/jax/src/op/erfc.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +OutputVector translate_erfc(const NodeContext& context) { + num_inputs_check(context, 1, 1); + auto x = context.get_input(0); + + // create const one of the same type as x + auto const_one = create_same_type_const_scalar(x, 1); + Output res = make_shared(x); + res = make_shared(const_one, res); + return {res}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov diff --git a/src/frontends/jax/src/op_table.cpp b/src/frontends/jax/src/op_table.cpp index 500226594fea13..98f22452c5afab 100644 --- a/src/frontends/jax/src/op_table.cpp +++ b/src/frontends/jax/src/op_table.cpp @@ -45,6 +45,7 @@ OP_CONVERTER(translate_convert); OP_CONVERTER(translate_convolution); OP_CONVERTER(translate_copy); OP_CONVERTER(translate_dot_general); +OP_CONVERTER(translate_erfc); OP_CONVERTER(translate_integer_pow); OP_T_CONVERTER(translate_reduce_op); OP_CONVERTER(translate_reduce_window_max); @@ -72,6 +73,7 @@ const std::map get_supported_ops_jaxpr() { {"dot_general", op::translate_dot_general}, {"eq", op::translate_binary_op}, {"erf", op::translate_1to1_match_1_input}, + {"erfc", op::translate_erfc}, {"exp", op::translate_1to1_match_1_input}, {"ge", op::translate_binary_op}, {"gt", op::translate_binary_op}, diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp index ad755c958703cb..80ec9c6c9f390c 100644 --- a/src/frontends/onnx/frontend/src/core/graph.cpp +++ b/src/frontends/onnx/frontend/src/core/graph.cpp @@ -446,10 +446,6 @@ void Graph::set_friendly_names(const Node& onnx_node, const ov::OutputVector& ov // null node does not have tensor if (!ov::op::util::is_null(ov_subgraph_outputs[i])) { ov_subgraph_outputs[i].get_tensor().set_names({onnx_node.output(static_cast(i))}); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(ov_subgraph_outputs[i].get_tensor(), - onnx_node.output(static_cast(i))); - OPENVINO_SUPPRESS_DEPRECATED_END } } } diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp new file mode 100644 index 00000000000000..c4144be9b5ff44 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "core/operator_set.hpp" +#include "exceptions.hpp" +#include "openvino/frontend/exception.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/sigmoid.hpp" +#include "utils/common.hpp" + +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace onnx { +namespace com_microsoft { +namespace opset_1 { +ov::OutputVector quick_gelu(const ov::frontend::onnx::Node& node) { + // Original Documentation: + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QuickGelu + // Goal: Compute x * Sigmoid(alpha * x) + common::default_op_checks(node, 1); + + const auto inputs = node.get_ov_inputs(); + const auto& x = inputs[0]; + + // Constrain input type to float16, float, double (f64), bfloat16 + auto element_type = x.get_element_type(); + CHECK_VALID_NODE(node, + element_type == ov::element::f16 || element_type == ov::element::f32 || + element_type == ov::element::f64 || element_type == ov::element::bf16, + "Unsupported input x type, accepted FP16, FP32, FP64, BFP16 but got: ", + element_type); + + // Get attribute from node + const float alpha = node.get_attribute_value("alpha"); + + // Numpy broadcasting rule is automatically applied with mismatched shapes according to: + // https://docs.openvino.ai/2022.3/openvino_docs_ops_arithmetic_Multiply_1.html "Tensor with dimension of size 1 + // will be implicitly broadcasted to match the size of the second tensor." Convert alpha to tensor with size 1 + const auto alpha_tensor = std::make_shared(ov::element::f32, Shape{1}, alpha); + + auto alpha_x = std::make_shared(alpha_tensor, x); + auto sig_alpha_x = std::make_shared(alpha_x); + auto result = std::make_shared(x, sig_alpha_x); + + return {result}; +} // func end + +ONNX_OP("QuickGelu", OPSET_SINCE(1), com_microsoft::opset_1::quick_gelu, MICROSOFT_DOMAIN); + +} // namespace opset_1 +} // namespace com_microsoft +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index b09bc73467bc10..47fcc7af60bf61 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -18,6 +18,7 @@ #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" #include "utils/common.hpp" #include "utils/reshape.hpp" using namespace ov::op; @@ -221,19 +222,8 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { FRONT_END_GENERAL_CHECK(src_x.get_partial_shape().is_static(), "DequantizeLinear cannot operate with dynamic shapes of input X"); - const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); - - if (inputs.size() > 2) { - zp = inputs[2]; - if (zp.get_element_type() != scale.get_element_type()) { - zp = std::make_shared(zp, scale); - } - zp = std::make_shared(zp, unsqueezed_axes); - } - const auto axis = node.get_attribute_value("axis", 1); const auto block_size = static_cast(node.get_attribute_value("block_size", 0)); - const auto scale_type = scale.get_element_type(); FRONT_END_GENERAL_CHECK(axis == 0, "Axis != 0 isn't supported"); FRONT_END_GENERAL_CHECK(block_size > 0, "block_size must be greater than zero"); @@ -241,16 +231,31 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { src_x.get_shape()[0] % block_size == 0, "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); - const auto& x = src_x.get_element_type() == scale_type ? src_x : std::make_shared(src_x, scale); + ov::Output broadcastable_x = op::util::reshape( + src_x, + Shape{static_cast(src_x.get_shape()[0]) / block_size, block_size, src_x.get_shape()[1]}); + + const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); + + const auto scale_type = scale.get_element_type(); + if (inputs.size() > 2) { + zp = inputs[2]; + if (zp.get_element_type() != scale.get_element_type()) { + zp = std::make_shared(zp, scale_type); + disable_constant_folding(zp.get_node_shared_ptr()); + } + zp = std::make_shared(zp, unsqueezed_axes); + } + + const auto& x = src_x.get_element_type() == scale_type ? broadcastable_x + : std::make_shared(broadcastable_x, scale_type); // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] - ov::Output broadcastable_x = - op::util::reshape(x, Shape{static_cast(x.get_shape()[0]) / block_size, block_size, x.get_shape()[1]}); // Adding additional dimension for broadcasting scale = std::make_shared(scale, unsqueezed_axes); if (zp.get_node_shared_ptr()) { - broadcastable_x = std::make_shared(broadcastable_x, zp); + broadcastable_x = std::make_shared(x, zp); } const auto& scaled_x = std::make_shared(broadcastable_x, scale); diff --git a/src/frontends/onnx/frontend/src/op/lstm.cpp b/src/frontends/onnx/frontend/src/op/lstm.cpp index 6b934db7d13071..1c0efbbe6b9454 100644 --- a/src/frontends/onnx/frontend/src/op/lstm.cpp +++ b/src/frontends/onnx/frontend/src/op/lstm.cpp @@ -211,41 +211,19 @@ ov::OutputVector lstm(const ov::frontend::onnx::Node& node) { LSTMAttributes attributes{node}; std::shared_ptr lstm_sequence; - if ((input_map.at(LSTMInput::LSTM_INPUT_P).get_names() != std::unordered_set({"P_blank"})) || - (attributes.m_input_forget == true)) { - OPENVINO_SUPPRESS_DEPRECATED_START - lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), - input_map.at(LSTMInput::LSTM_INPUT_INIT_H), - input_map.at(LSTMInput::LSTM_INPUT_INIT_C), - input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), - input_map.at(LSTMInput::LSTM_INPUT_W), - input_map.at(LSTMInput::LSTM_INPUT_R), - input_map.at(LSTMInput::LSTM_INPUT_B), - input_map.at(LSTMInput::LSTM_INPUT_P), - attributes.m_hidden_size, - attributes.m_direction, - ov::op::LSTMWeightsFormat::FICO, - attributes.m_activation_alpha, - attributes.m_activation_beta, - attributes.m_activations, - attributes.m_clip_threshold, - attributes.m_input_forget); - OPENVINO_SUPPRESS_DEPRECATED_END - } else { - lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), - input_map.at(LSTMInput::LSTM_INPUT_INIT_H), - input_map.at(LSTMInput::LSTM_INPUT_INIT_C), - input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), - input_map.at(LSTMInput::LSTM_INPUT_W), - input_map.at(LSTMInput::LSTM_INPUT_R), - input_map.at(LSTMInput::LSTM_INPUT_B), - attributes.m_hidden_size, - attributes.m_direction, - attributes.m_activation_alpha, - attributes.m_activation_beta, - attributes.m_activations, - attributes.m_clip_threshold); - } + lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), + input_map.at(LSTMInput::LSTM_INPUT_INIT_H), + input_map.at(LSTMInput::LSTM_INPUT_INIT_C), + input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), + input_map.at(LSTMInput::LSTM_INPUT_W), + input_map.at(LSTMInput::LSTM_INPUT_R), + input_map.at(LSTMInput::LSTM_INPUT_B), + attributes.m_hidden_size, + attributes.m_direction, + attributes.m_activation_alpha, + attributes.m_activation_beta, + attributes.m_activations, + attributes.m_clip_threshold); const auto Y = lstm_sequence->output(0); const auto Y_h = lstm_sequence->output(1); diff --git a/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt new file mode 100644 index 00000000000000..4fb110fd485833 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt @@ -0,0 +1,52 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + name: "test_quick_gelu" + node { + input: "X" + output: "Y" + op_type: "QuickGelu" + attribute { + name: "alpha" + f: 0.9974269270896912 + type: FLOAT + } + domain: "com.microsoft" + } + input { + name: "X" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + domain: "com.microsoft" + version: 1 +} \ No newline at end of file diff --git a/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt b/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt deleted file mode 100644 index 57b723e44b6914..00000000000000 --- a/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt +++ /dev/null @@ -1,235 +0,0 @@ -ir_version: 5 -graph { - node { - input: "X" - input: "W" - input: "R" - input: "B" - input: "sequence_lens" - input: "initial_h" - input: "initial_c" - input: "P" - output: "Y" - output: "Y_h" - output: "" - name: "node1" - op_type: "LSTM" - attribute { - name: "direction" - s: "bidirectional" - type: STRING - } - attribute { - name: "input_forget" - i: 0 - type: INT - } - attribute { - name: "activations" - strings: "sigmoid" - strings: "tanh" - strings: "tanh" - strings: "sigmoid" - strings: "tanh" - strings: "tanh" - type: STRINGS - } - attribute { - name: "hidden_size" - i: 2 - type: INT - } - attribute { - name: "clip" - f: 9999 - type: FLOAT - } - doc_string: "LSTM" - domain: "" - } - input { - name: "X" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "W" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "R" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "B" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 16 - } - } - } - } - } - input { - name: "sequence_lens" - type { - tensor_type { - elem_type: 6 - shape { - dim { - dim_value: 1 - } - } - } - } - } - input { - name: "initial_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "initial_c" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "P" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 6 - } - } - } - } - } - output { - name: "Y" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } -} -opset_import { - domain: "" - version: 7 -} diff --git a/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt b/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt deleted file mode 100644 index 2a218f089240a5..00000000000000 --- a/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt +++ /dev/null @@ -1,191 +0,0 @@ -ir_version: 4 -producer_name: "OpenVINO ONNX Frontend" -graph { - node { - input: "X" - input: "W" - input: "R" - input: "B" - input: "" - input: "" - input: "" - input: "P" - output: "Y" - output: "Y_h" - output: "Y_c" - op_type: "LSTM" - attribute { - name: "clip" - f: 0.10000000149011612 - type: FLOAT - } - attribute { - name: "direction" - s: "forward" - type: STRING - } - attribute { - name: "hidden_size" - i: 2 - type: INT - } - attribute { - name: "input_forget" - i: 0 - type: INT - } - } - name: "compute_graph" - input { - name: "X" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "W" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "R" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "B" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 16 - } - } - } - } - } - input { - name: "P" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 6 - } - } - } - } - } - output { - name: "Y" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_c" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } -} -opset_import { - version: 7 -} diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index da8189926a4546..900fc025d8d9ab 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -1330,3 +1330,29 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_matmulnbits_3x17) { } test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_quickgelu) { + const auto model = convert_model("com.microsoft/quick_gelu.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + const std::vector input_X{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + const std::vector output{0.7305524f, + 1.7605114f, + 2.8566725f, + 3.9273243f, + 4.9661055f, + 5.984934f, + 6.9935064f, + 7.997261f, + 8.998864f, + 9.999535f}; + + test_case.add_input(Shape{2, 5}, input_X); + test_case.add_expected_output(Shape{2, 5}, output); + + if (std::string("${BACKEND_NAME}") == std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(0.0001f); + } else { + test_case.run(); + } +} diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp index 5979466849d5e6..b61fd1d36f99e0 100644 --- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp @@ -202,72 +202,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const) { test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } -OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes) { - auto model = convert_model("lstm_fwd_with_clip_peepholes.onnx"); - - auto test_case = ov::test::TestCase(model, s_device); - test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); // X - test_case.add_input({-0.494659f, // W - 0.0453352f, - -0.487793f, - 0.417264f, - -0.0175329f, - 0.489074f, - -0.446013f, - 0.414029f, - -0.0091708f, - -0.255364f, - -0.106952f, - -0.266717f, - -0.0888852f, - -0.428709f, - -0.283349f, - 0.208792f}); // W - test_case.add_input({0.146626f, - -0.0620289f, - -0.0815302f, - 0.100482f, - -0.219535f, - -0.306635f, - -0.28515f, - -0.314112f, - -0.228172f, - 0.405972f, - 0.31576f, - 0.281487f, - -0.394864f, - 0.42111f, - -0.386624f, - -0.390225f}); // R - - test_case.add_input({0.381619f, - 0.0323954f, - -0.14449f, - 0.420804f, - -0.258721f, - 0.45056f, - -0.250755f, - 0.0967895f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f}); // B - test_case.add_input({0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f}); // P - - test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.02280854f, 0.02744377f, -0.03516197f, 0.03875681f}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.03516197f, 0.03875681f}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.07415761f, 0.07395997f}); // Y_c_data - - // We have to enlarge tolerance bits to 3 - it's only one bit more than default value. - // The discrepancies may occur at most on 7th decimal position. - test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); -} - OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq) { auto model = convert_model("lstm_fwd_mixed_seq.onnx"); @@ -397,50 +331,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_large_batch_no_clip) { test_case.run(); } -OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_bdir_short_input_seq_peepholes) { - auto model = convert_model("lstm_bdir_short_input_seq.onnx"); - - auto test_case = ov::test::TestCase(model, s_device); - - // X - test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); - // W - test_case.add_input( - {-0.494659f, 0.0453352f, -0.487793f, 0.417264f, -0.0175329f, 0.489074f, -0.446013f, 0.414029f, - -0.0091708f, -0.255364f, -0.106952f, -0.266717f, -0.0888852f, -0.428709f, -0.283349f, 0.208792f, - -0.494659f, 0.0453352f, -0.487793f, 0.417264f, -0.0175329f, 0.489074f, -0.446013f, 0.414029f, - -0.0091708f, -0.255364f, -0.106952f, -0.266717f, -0.0888852f, -0.428709f, -0.283349f, 0.208792f}); - // R - test_case.add_input({0.146626f, -0.0620289f, -0.0815302f, 0.100482f, -0.219535f, -0.306635f, -0.28515f, - -0.314112f, -0.228172f, 0.405972f, 0.31576f, 0.281487f, -0.394864f, 0.42111f, - -0.386624f, -0.390225f, 0.146626f, -0.0620289f, -0.0815302f, 0.100482f, -0.219535f, - -0.306635f, -0.28515f, -0.314112f, -0.228172f, 0.405972f, 0.31576f, 0.281487f, - -0.394864f, 0.42111f, -0.386624f, -0.390225f}); - // B - test_case.add_input({0.381619f, 0.0323954f, -0.14449f, 0.420804f, -0.258721f, 0.45056f, -0.250755f, - 0.0967895f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.381619f, 0.0323954f, -0.14449f, 0.420804f, -0.258721f, - 0.45056f, -0.250755f, 0.0967895f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f}); - // sequence_lens - test_case.add_input({1}); - // initial_h - test_case.add_input({0.0f, 0.0f, -0.0306872f, 0.028035f}); - // initial_c - test_case.add_input({0.0f, 0.0f, -0.07243599f, 0.0467052f}); - // P - test_case.add_input( - {0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f, 0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f}); - - // Y - test_case.add_expected_output(Shape{2, 2, 1, 2}, - {-0.0251062f, 0.0561262f, -0.0318928f, 0.0762679f, 0.0f, 0.0f, 0.0f, 0.0f}); - // Y_h - test_case.add_expected_output(Shape{2, 1, 2}, {-0.0251062f, 0.0561262f, -0.0318928f, 0.0762679f}); - - test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); -} - OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_mixed_seq_reverse) { auto model = convert_model("lstm_mixed_seq_reverse.onnx"); diff --git a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest index b2e87a2588ec65..3eadb698ca0a03 100644 --- a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest +++ b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest @@ -121,9 +121,6 @@ onnx_model_lp_norm_default_dynamic onnx_instance_normalization_dynamic # Legacy tests with unsupported features from opset4 LSTM/GRU/RNN -# Peepholes input unsupported -onnx_model_lstm_fwd_with_clip_peepholes -onnx_model_lstm_bdir_short_input_seq_peepholes # Activation function hardsigmoid is not supported onnx_model_gru_fwd_activations_relu_hardsigmoid onnx_model_lstm_fwd_hardsigmoid_activation diff --git a/src/frontends/pytorch/src/op/lerp.cpp b/src/frontends/pytorch/src/op/lerp.cpp new file mode 100644 index 00000000000000..67922da3e4578d --- /dev/null +++ b/src/frontends/pytorch/src/op/lerp.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_lerp(const NodeContext& context) { + // Tensor = aten::lerp(%lhs.1, %rhs.1, %self.weight) + num_inputs_check(context, 3, 3); + Output start; + Output end; + std::tie(start, end) = get_inputs_with_promoted_types(context, 0, 1); + + Output weight = context.get_input(2); + auto scale = context.mark_node(std::make_shared(end, start)); + weight = context.mark_node(std::make_shared(weight, scale)); + auto delta = context.mark_node(std::make_shared(scale, weight)); + return {context.mark_node(std::make_shared(start, delta))}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/stft.cpp b/src/frontends/pytorch/src/op/stft.cpp new file mode 100644 index 00000000000000..b7e4858c2f8fcc --- /dev/null +++ b/src/frontends/pytorch/src/op/stft.cpp @@ -0,0 +1,93 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/stft.hpp" + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_stft(const NodeContext& context) { + // schema: aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool + // normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor + // + // Note: aten::stft doesn't have "center" and "pad_mode" attrs like torch.stft, so the number of the inputs is lower + // and index of any input after the "window" is smaller accordingly + + num_inputs_check(context, 2, 8); + + auto input = context.get_input(0); + auto n_fft = context.get_input(1); + + ov::Output hop_length; + if (!context.input_is_none(2)) { + hop_length = context.get_input(2); + } else { + // Defualt floor(n_fft / 4) + const auto four = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 4)); + const auto four_cast = context.mark_node(std::make_shared(four, n_fft)); + hop_length = context.mark_node(std::make_shared(n_fft, four_cast)); + } + + ov::Output win_length; + if (!context.input_is_none(3)) { + win_length = context.get_input(3); + } else { + win_length = n_fft; + } + + ov::Output window; + if (!context.input_is_none(4)) { + window = context.get_input(4); + } else { + const auto one = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 1)); + const auto one_cast = context.mark_node(std::make_shared(one, input)); + const auto zero = context.mark_node(std::make_shared(ov::element::i32, Shape{1}, 0)); + const auto win_length_cast = + context.mark_node(std::make_shared(win_length, ov::element::i64)); + const auto win_len_vec = context.mark_node(std::make_shared(win_length_cast, zero)); + window = context.mark_node(std::make_shared(one_cast, win_len_vec)); + } + + bool normalized = false; + if (!context.input_is_none(5)) { + normalized = context.const_input(5); + } + PYTORCH_OP_CONVERSION_CHECK(!normalized, + "aten::stft conversion is currently supported with normalized=False only."); + + bool onesided = true; + if (!context.input_is_none(6)) { + onesided = context.const_input(6); + } + PYTORCH_OP_CONVERSION_CHECK(onesided, "aten::stft conversion is currently supported with onesided=True only."); + + bool return_complex = false; + if (!context.input_is_none(7)) { + return_complex = context.const_input(7); + } + PYTORCH_OP_CONVERSION_CHECK(!return_complex, + "aten::stft conversion is currently supported with return_complex=False only."); + + // Perform STFT + constexpr bool transpose_frames = true; + auto stft = context.mark_node(std::make_shared(input, window, n_fft, hop_length, transpose_frames)); + return {stft}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 66c76e33032ef6..607f0bd32db80d 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -121,6 +121,7 @@ OP_CONVERTER(translate_inverse); OP_CONVERTER(translate_is_nonzero); OP_CONVERTER(translate_layer_norm); OP_CONVERTER(translate_len); +OP_CONVERTER(translate_lerp); OP_CONVERTER(translate_linalg_cross); OP_CONVERTER(translate_linalg_norm); OP_CONVERTER(translate_linalg_matrix_norm); @@ -220,6 +221,7 @@ OP_CONVERTER(translate_square); OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_std); OP_CONVERTER(translate_std_mean); +OP_CONVERTER(translate_stft); OP_CONVERTER(translate_sub); OP_CONVERTER(translate_sub_); OP_CONVERTER(translate_sum); @@ -329,12 +331,15 @@ OP_CONVERTER(translate_zeros_like_fx); const std::unordered_map get_supported_ops_ts() { return { {"aten::__and__", op::translate_bitwise_and}, + {"aten::__iand__", op::inplace_op}, {"aten::__derive_index", op::translate_derive_index}, {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, {"aten::__or__", op::translate_bitwise_or}, + {"aten::__ior__", op::inplace_op}, {"aten::__range_length", op::translate_range_length}, {"aten::__xor__", op::translate_bitwise_xor}, + {"aten::__ixor__", op::inplace_op}, {"aten::_convolution", op::translate_convolution}, {"aten::_convolution_mode", op::translate_convolution_mode}, {"aten::_native_multi_head_attention", op::translate_native_multi_head_attention}, @@ -508,6 +513,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::le", op::translate_1to1_match_2_inputs_align_types}, {"aten::leaky_relu", op::translate_1to1_match_2_inputs}, {"aten::len", op::translate_len}, + {"aten::lerp", op::translate_lerp}, // lift op is torchscript specific op responsible for tensors coping with guarantee of new memory allocation {"aten::lift", op::skip_node}, {"aten::lift_fresh", op::skip_node}, @@ -649,6 +655,7 @@ const std::unordered_map get_supported_ops_ts() { // aten::stack - Supported in limited set of patterns {"aten::std", op::translate_std}, {"aten::std_mean", op::translate_std_mean}, + {"aten::stft", op::translate_stft}, {"aten::sub", op::translate_sub}, {"aten::sub_", op::translate_sub_}, {"aten::sum", op::translate_sum}, @@ -746,6 +753,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten._native_batch_norm_legit.no_stats", op::translate_batch_norm_legit_no_stats_fx}, {"aten._native_batch_norm_legit_functional.default", op::translate_batch_norm_legit_fx}, {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, + {"aten._safe_softmax.default", op::translate_softmax_fx}, {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, {"aten._scaled_dot_product_flash_attention_for_cpu.default", op::translate_scaled_dot_product_attention_fx}, {"aten._softmax.default", op::translate_softmax_fx}, diff --git a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp index 34b2a82152ccfc..cbdc506671aa67 100644 --- a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp @@ -235,6 +235,9 @@ bool pass::SwitchMergeResolver::run_on_model(const shared_ptr& m) { auto else_body = make_shared(else_results, else_params); auto if_op = make_shared(cond); + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + if_op->get_rt_info()["tf_switch_merge_if"] = true; + set_cf_marker(if_cf_marker, if_op); if_op->set_then_body(then_body); if_op->set_else_body(else_body); diff --git a/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp b/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp index 5cbf4c92b8a35c..e082be4943977c 100644 --- a/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp +++ b/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp @@ -199,8 +199,13 @@ OutputVector translate_tensor_list_length_op(const NodeContext& node) { auto tensor_list_shape = make_shared(input_handle, element::i32); auto list_length = make_shared(tensor_list_shape, zero_const, one_const, one_const); - set_node_name(node.get_name(), list_length); - return {list_length}; + // output of TensorListLength must be a scalar + // after Slice operation it is a 1D tensor with one element + auto scalar_shape = make_shared(element::i32, Shape{0}, std::vector{}); + auto list_length_scalar = make_shared(list_length, scalar_shape, false); + + set_node_name(node.get_name(), list_length_scalar); + return {list_length_scalar}; } OutputVector translate_tensor_list_concat_v2_op(const NodeContext& node) { diff --git a/src/frontends/tensorflow_common/src/op/tobool.cpp b/src/frontends/tensorflow_common/src/op/tobool.cpp index 9f1082d5df87da..9025a35f549799 100644 --- a/src/frontends/tensorflow_common/src/op/tobool.cpp +++ b/src/frontends/tensorflow_common/src/op/tobool.cpp @@ -9,6 +9,8 @@ #include "openvino/op/logical_and.hpp" #include "openvino/op/logical_or.hpp" #include "openvino/op/not_equal.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_logical_and.hpp" #include "openvino/op/reduce_prod.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" @@ -22,47 +24,49 @@ namespace tensorflow { namespace op { OutputVector translate_tobool_op(const NodeContext& node) { // (rank(x) == 0 && x != 0) || (rank > 0 && ReduceProd(ShapeOf(x))) > 0 - default_op_checks(node, 1, {"ToBool"}); auto x = node.get_input(0); // prepare auxiliary zero and zero constants of the same type as the inputs - auto zero = create_same_type_const_scalar(x, 0); - auto zero_2 = make_shared(element::i32, Shape{}, 0); - auto true_const = make_shared(element::boolean, Shape{}, true); + auto zero_x = create_same_type_const_scalar(x, 0); + auto zero_i64 = make_shared(element::i64, Shape{}, 0); + auto one_i64 = make_shared(element::i64, Shape{}, 1); auto false_const = make_shared(element::boolean, Shape{}, false); // compute a mask to get rank(x) == 0 - auto x_rank = compute_subgraph_scalar_rank(x, element::i32); + auto x_rank = compute_subgraph_scalar_rank(x, element::i64, true); + // 1. try to evaluate if it satisfy non-zero scalar input // compute rank(x) == 0 - auto is_zero = make_shared(x_rank, zero_2); - + auto is_rank_zero = make_shared(x_rank, zero_i64); // compute mask to get x != 0 - auto is_not_zero = make_shared(x, zero); - + auto is_x_not_zero = make_shared(x, zero_x)->output(0); // compute (rank(x) == 0 && x != 0) - auto logical_and = make_shared(is_zero, is_not_zero); - // compute rank(x) > 0 - auto greater_than_zero = make_shared(x_rank, zero_2); + auto scalar_cond = make_shared(is_rank_zero, is_x_not_zero)->output(0); + // generate reduce_axes + auto reduce_axes = make_shared(zero_i64, x_rank, one_i64); + scalar_cond = make_shared(scalar_cond, reduce_axes, false); + // correct result for empty tensor, for which scalar_cond is still equal to True + scalar_cond = make_shared(is_rank_zero, scalar_cond, false_const); + // 2. try to evaluate if it is non-scalar input tensor and not empty tensor + // compute rank(x) > 0 + auto rank_greater_than_zero = make_shared(x_rank, zero_i64); // compute ShapeOf(x) - auto cond_shape = make_shared(x, element::i32); + auto x_shape = make_shared(x, element::i64); // compute ReduceProd(ShapeOf(x))) and axis - auto axis = make_shared(element::i32, Shape{}, 0); - auto reduce_prod = make_shared(cond_shape, axis); - + auto reduce_axis = make_shared(element::i32, Shape{}, 0); + auto num_elems = make_shared(x_shape, reduce_axis, false); // compute ReduceProd(ShapeOf(x))) > 0 - auto greater_than__zero_2 = make_shared(reduce_prod, zero_2); + auto num_elems_greater_than_zero = make_shared(num_elems, zero_i64); // compute (rank > 0 && ReduceProd(ShapeOf(x))) > 0 - auto logical_and_2 = make_shared(greater_than_zero, greater_than__zero_2); - - auto logical_or = make_shared(logical_and, logical_and_2); + // it will be a scalar + auto non_scalar_tensor_not_empty = make_shared(rank_greater_than_zero, num_elems_greater_than_zero); - auto tobool = make_shared(logical_or, true_const, false_const); - set_node_name(node.get_name(), tobool); - return tobool->outputs(); + auto to_bool = make_shared(scalar_cond, non_scalar_tensor_not_empty); + set_node_name(node.get_name(), to_bool); + return to_bool->outputs(); } } // namespace op } // namespace tensorflow } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/top_k.cpp b/src/frontends/tensorflow_common/src/op/top_k.cpp index c916fe5b746327..4d3bfdcf64bcaf 100644 --- a/src/frontends/tensorflow_common/src/op/top_k.cpp +++ b/src/frontends/tensorflow_common/src/op/top_k.cpp @@ -3,6 +3,7 @@ // #include "common_op_table.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/topk.hpp" using namespace std; @@ -14,22 +15,35 @@ namespace tensorflow { namespace op { NamedOutputVector translate_top_k_base_op(const NodeContext& node, const ov::Output& k_input, - int min_input_size) { + int min_input_size, + const ov::element::Type& index_type = ov::element::i32) { default_op_checks(node, min_input_size, {"TopK", "TopKV2", "TOPK_V2"}); auto input = node.get_input(0); // retrieve k attribute bool sorted = node.get_attribute("sorted", true); + auto topk_index_type = index_type; + if (index_type == ov::element::i16) { + // v11::TopK supports only int32 and int64 output index type + topk_index_type = ov::element::i32; + } auto top_k = make_shared(input, k_input, -1, ov::op::v11::TopK::Mode::MAX, sorted ? v11::TopK::SortType::SORT_VALUES : v11::TopK::SortType::SORT_INDICES, - ov::element::i32, + topk_index_type, true); + auto values = top_k->output(0); + auto indices = top_k->output(1); + if (index_type != topk_index_type) { + // satisfy the requested output index type + indices = make_shared(indices, index_type)->output(0); + } set_node_name(node.get_name(), top_k); - return {{"values", top_k->output(0)}, {"indices", top_k->output(1)}}; + return {{"values", values}, {"indices", indices}}; } + NamedOutputVector translate_top_k_op(const NodeContext& node) { // retrieve k attribute auto k = node.get_attribute("k"); @@ -39,8 +53,9 @@ NamedOutputVector translate_top_k_op(const NodeContext& node) { NamedOutputVector translate_top_k_v2_op(const NodeContext& node) { default_op_checks(node, 2, {"TopKV2", "TOPK_V2"}); + auto index_type = node.get_attribute("index_type", ov::element::i32); auto k_input = node.get_input(1); - return translate_top_k_base_op(node, k_input, 1); + return translate_top_k_base_op(node, k_input, 1, index_type); } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp b/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp index 7c5c0a196aa018..d3ca3156ed1347 100644 --- a/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp +++ b/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp @@ -6,6 +6,7 @@ #include "openvino/op/unique.hpp" using namespace std; +using namespace ov; using namespace ov::op; namespace ov { @@ -19,8 +20,9 @@ OutputVector translate_unique_with_counts_op(const NodeContext& node) { // get input 'x' from node and node name auto x = node.get_input(0); auto node_name = node.get_name(); + auto out_idx = node.get_attribute("out_idx", element::i32); - auto unique = make_shared(x, false, ov::element::i32, ov::element::i32); + auto unique = make_shared(x, false, out_idx, out_idx); set_node_name(node_name, unique); // story 'y', 'idx', and 'count' outputs from Unique in separate variables @@ -34,4 +36,4 @@ OutputVector translate_unique_with_counts_op(const NodeContext& node) { } // namespace op } // namespace tensorflow } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 627314748bbe9c..5674c75dd546d7 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -580,6 +580,12 @@ static constexpr Property dynamic_quantization */ static constexpr Property kv_cache_precision{"KV_CACHE_PRECISION"}; +/** + * @brief This property scales down activations to prevent overflows when inference precision is f16. + * @ingroup ov_runtime_cpp_prop_api + */ +static constexpr Property activations_scale_factor{"ACTIVATIONS_SCALE_FACTOR"}; + } // namespace hint /** diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 6965b7a25ce512..04909c7d8f5a5a 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -143,7 +143,7 @@ else() endif() ov_option(ENABLE_MLAS_FOR_CPU "Enable MLAS for OpenVINO CPU Plugin" ${ENABLE_MLAS_FOR_CPU_DEFAULT}) -if(RISCV64_THEAD) +if(RISCV64_XUANTIE) set(ENABLE_SHL_FOR_CPU_DEFAULT ON) else() set(ENABLE_SHL_FOR_CPU_DEFAULT OFF) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 421dca07747932..adcaeaaaa31a6f 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -421,6 +421,13 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { streamsChanged = true; } +#if defined(OV_CPU_WITH_SHL) + // TODO: multi-stream execution is unsafe when SHL is used: + // The library uses global static variables as flags and counters. + streams = 1; + streamsChanged = true; +#endif + this->modelType = modelType; CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 953f94cb3d5776..e20369c9cca215 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -256,7 +256,8 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"LLMMLP", Type::LLMMLP}, {"QKVProjection", Type::QKVProjection}, {"RMS", Type::RMS}, - {"SearchSorted", Type::SearchSorted} + {"SearchSorted", Type::SearchSorted}, + {"LoraSubgraph", Type::LoRA} }; return type_to_name_tbl; } @@ -389,6 +390,7 @@ std::string NameFromType(const Type type) { CASE(QKVProjection); CASE(RMS); CASE(SearchSorted); + CASE(LoRA); CASE(Unknown); } #undef CASE diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index c0a2acc3329a9c..d6ac9947a8fb5d 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -134,6 +134,7 @@ enum class Type { QKVProjection, RMS, SearchSorted, + LoRA }; enum class Algorithm { diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 45118763a3eaf9..f3f3a379fc2af7 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -371,9 +371,20 @@ void Graph::Activate(const std::vector& externalInputMemory, std::tie(m_executableGraphNodes, m_executableSyncNodesInds) = ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes); - status = hasDynNodes ? (parallel_get_max_threads() > 1 ? Status::ReadyDynamic : Status::ReadyDynamicSeq) - : Status::ReadyStatic; - + if (hasDynNodes) { + status = Status::ReadyDynamic; + // Here we use the following heuristic: if the number of sync nodes is less than 10 times of the number of exec + // nodes, it does make sense to use Sequential dynamic shapes processing due to the high overheads on context + // switching when the dynamic shapes are being processed in parallel and there are a lot of sync points. Also + // this rule works for short graphs (usually subgraphs) when the amount of nodes is to low to process them in + // parallel. + const auto exec2sync = m_executableGraphNodes.size() / m_executableSyncNodesInds.size(); + if (exec2sync < 10 || parallel_get_max_threads() < 2) { + status = Status::ReadyDynamicSeq; + } + } else { + status = Status::ReadyStatic; + } CPU_DEBUG_CAP_ENABLE(serialize(*this)); } @@ -1286,23 +1297,40 @@ class UpdateNodes : public UpdateNodesBase { if (origin_nested_levels < 2) { set_max_nested_levels(2); } + // In OpenMP, an exception that is thrown in a parallel region must be caught and handled in the same region by the same thread. + // Therefore, need to pass the error message and throw a new exception outside the parallel region. + const char* what = nullptr; #pragma omp parallel #pragma omp sections { #pragma omp section { - updateDynParams(startCounter, stopIndx); + try { + updateDynParams(startCounter, stopIndx); + } catch (std::exception& e) { + what = e.what(); + } catch (...) { + what = "[ CPU ] Could not update dynamic parameters."; + } } #pragma omp section { - updateShapes(startCounter, stopIndx); + try { + updateShapes(startCounter, stopIndx); + } catch (std::exception& e) { + what = e.what(); + } catch (...) { + what = "[ CPU ] Could not update shapes."; + } } } if (origin_nested_levels != 2) { set_max_nested_levels(origin_nested_levels); } + + OPENVINO_ASSERT(what == nullptr, what); } }; #endif diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index b3634800fb2e05..d50ccc152c9186 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -49,8 +49,16 @@ class Graph { ~Graph(); - bool IsReady() { - return one_of(status, Status::ReadyStatic, Status::ReadyDynamic, Status::ReadyDynamicSeq); + bool IsStatic() const { + return Status::ReadyStatic == status; + } + + bool IsDynamic() const { + return one_of(status, Status::ReadyDynamic, Status::ReadyDynamicSeq); + } + + bool IsReady() const { + return IsStatic() || IsDynamic(); } const Config & getConfig() const { @@ -193,7 +201,6 @@ class Graph { return graphHasDynamicInput; } - Status getStatus() const {return status;} const std::unordered_map& getInternalStateNodes() const; /** @@ -210,6 +217,10 @@ class Graph { void Activate(const std::vector& externalInputMemory = {}, const std::vector& externalOutputMemory = {}); + const std::unordered_map& getOutputNodesMemBlocksMap() const { + return outputNodesMemBlocksMap; + } + protected: void ForgetGraphData() { status = Status::NotReady; @@ -273,7 +284,6 @@ class Graph { template void InferDynamic(SyncInferRequest* request, int numaId, UpdateStrategy&& update); - friend class intel_cpu::SyncInferRequest; friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); private: diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 6b3175e24d9dcb..ab7eb223ba17ce 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -183,6 +183,10 @@ void GraphOptimizer::ApplyCommonGraphOptimizations(Graph &graph) { MatchSdpaKvCache(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "DropRedundantMemoryOutput"); + DropRedundantMemoryOutput(graph); + graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); graph.RemoveDroppedEdges(); } @@ -3186,5 +3190,117 @@ void GraphOptimizer::MatchSdpaKvCache(Graph &graph) { } } +void GraphOptimizer::DropRedundantMemoryOutput(Graph &graph) { + // When we have a MemoryInput->MemoryOutput pair, that means that the state is immediately populated with the init + // subgraph values when the init subgraph exists. In all the other cases the state is simply a read only object. + // We can optimize such a case removing the MemoryOutput node and transferring the state values update + // responsibility to a special type of the MemoryInput node - MemoryInputSingle + auto& graphNodes = graph.GetNodes(); + + auto isSuitableMemInput = [](const NodePtr& node) -> bool { + if (Type::MemoryInput != node->getType()) { + return false; + } + + CPU_GRAPH_OPTIMIZER_SCOPE(DropRedundantMemoryOutput_isSuitableMemInput); + + auto memInputBase = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(memInputBase, + "Unexpectedly wrong dynamic type of node: ", + node->getName(), + " of type: ", + node->getTypeStr()); + + auto id = memInputBase->getId(); + + NodePtr MemoryOutput = nullptr; + auto&& childEdges = node->getChildEdgesAtPort(0); + for (auto&& item : childEdges) { + auto childNode = item->getChild(); + + if (Type::MemoryOutput == childNode->getType()) { + auto memOutputBase = std::dynamic_pointer_cast(childNode); + OPENVINO_ASSERT(memInputBase, + "Unexpectedly wrong dynamic type of node: ", + node->getName(), + " of type: ", + node->getTypeStr()); + + if (memOutputBase->getId() != id) { + return false; // an Assign node from different Variable is attached + } + + if (MemoryOutput && MemoryOutput != childNode) { + //only one child MemoryOutput is expected + return false; + } + MemoryOutput = childNode; + } + } + return nullptr != MemoryOutput; + }; + + for (size_t i = 0; i < graphNodes.size(); i++) { + auto node = graphNodes[i]; + if (!isSuitableMemInput(node)) { + continue; + } + + CPU_GRAPH_OPTIMIZER_SCOPE(DropRedundantMemoryOutput_Node); + + auto memInputNode = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); + + ov::optional inputShape; + ov::optional inputPrc; + + if (!node->getParentEdges().empty()) { + inputShape = ov::optional(node->getInputShapeAtPort(0)); + inputPrc = ov::optional(node->getOriginalInputPrecisionAtPort(0)); + } + + //search for the MemoryOutputNode + NodePtr memoryOutputNode; + for (auto&& edge : node->getChildEdgesAtPort(0)) { + auto child = edge->getChild(); + if (Type::MemoryOutput == child->getType()) { + memoryOutputNode = child; + break; + } + } + OPENVINO_ASSERT(memoryOutputNode, "Corresponding MemoryOutput has not been found"); + + graph.RemoveEdge(memoryOutputNode->getParentEdgeAt(0)); + // there are no output edges from MemoryOutput nodes + + // now replace the existing MemoryInput with a special type that works without the corresponding MemoryOutput + auto memInputSingle = std::make_shared(memInputNode->getId(), + memInputNode->getName(), + memInputNode->getTypeStr(), + memInputNode->getOutputShapeAtPort(0), + memInputNode->getOriginalOutputPrecisionAtPort(0), + graph.getGraphContext(), + inputShape, + inputPrc); + + graph.AddNode(memInputSingle); + + if (!memInputNode->getParentEdges().empty()) { + auto parentEdge = memInputNode->getParentEdgeAt(0); + auto parent = parentEdge->getParent(); + const auto inputNum = parentEdge->getInputNum(); + graph.RemoveEdge(parentEdge); + graph.CreateEdge(parent, memInputSingle, inputNum, 0); + } + + for (auto&& edge : memInputNode->getChildEdgesAtPort(0)) { + auto child = edge->getChild(); + const auto outputNum = edge->getOutputNum(); + graph.RemoveEdge(edge); + graph.CreateEdge(memInputSingle, child, 0, outputNum); + } + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph_optimizer.h b/src/plugins/intel_cpu/src/graph_optimizer.h index 0a85a253ba8d66..886296a7c0053b 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.h +++ b/src/plugins/intel_cpu/src/graph_optimizer.h @@ -52,6 +52,7 @@ class GraphOptimizer { void RemoveMemoryInputConvert(Graph &graph); void RemoveConvertMemoryOutput(Graph &graph); void MatchSdpaKvCache(Graph &graph); + void DropRedundantMemoryOutput(Graph &graph); bool canBeInplaced(const NodePtr& parentNode, const NodePtr& childNode); // Method checks that after the sequential execution of Transpose and Reorder nodes, diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index f255a46efe7d0a..f0b817dcda859c 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -140,7 +140,7 @@ void SyncInferRequest::infer() { throw_if_canceled(); // update output control blocks, if any, in order to refresh internal buffers - if (Graph::Status::ReadyDynamic == m_graph->getStatus()) { + if (m_graph->IsDynamic()) { for (auto&& item : m_outputControlBlocks) { item.second.update(); } @@ -178,7 +178,7 @@ void SyncInferRequest::change_default_ptr() { std::unordered_set inputPtrs; std::function& tensor)> changeInpPtr; - if (Graph::Status::ReadyDynamic == m_graph->getStatus()) { + if (m_graph->IsDynamic()) { changeInpPtr = [&inputPtrs](const EdgePtr &edge, ov::SoPtr& tensor) { change_edge_ptr(edge, tensor); inputPtrs.insert(tensor->data()); @@ -278,8 +278,8 @@ void SyncInferRequest::change_default_ptr() { change_edge_ptr(parentEdge, it.second); } - if (Graph::Status::ReadyDynamic == m_graph->getStatus()) { - const auto &outMemBlocksMap = m_graph->outputNodesMemBlocksMap; + if (m_graph->IsDynamic()) { + const auto &outMemBlocksMap = m_graph->getOutputNodesMemBlocksMap(); for (auto&& item : outMemBlocksMap) { const auto& name = item.first; @@ -476,7 +476,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn ov::SoPtr tensor; if (type == ov::ISyncInferRequest::FoundPort::Type::INPUT) { - OPENVINO_ASSERT(m_graph->inputNodesMap.find(port_index) != m_graph->inputNodesMap.end(), + OPENVINO_ASSERT(m_graph->GetInputNodesMap().find(port_index) != m_graph->GetInputNodesMap().end(), "Tensor with index: ", port_index, " exists in CPU plugin graph, but absents in model inputs"); @@ -509,7 +509,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn } if (type == ov::ISyncInferRequest::FoundPort::Type::OUTPUT) { - const auto& outMap = m_graph->outputNodesMap; + const auto& outMap = m_graph->GetOutputNodesMap(); auto output = outMap.find(port_index); OPENVINO_ASSERT(output != outMap.end(), "Tensor with index: ", diff --git a/src/plugins/intel_cpu/src/memory_state.cpp b/src/plugins/intel_cpu/src/memory_state.cpp index f5f76fe42feb48..bf77917497de77 100644 --- a/src/plugins/intel_cpu/src/memory_state.cpp +++ b/src/plugins/intel_cpu/src/memory_state.cpp @@ -156,6 +156,49 @@ MemoryPtr VariableStateDoubleBuffer::internal_state_mem() const { return prime_mem(); } +VariableStateSingleBuffer::VariableStateSingleBuffer(const std::string& name, + const MemoryPtr& external_buffer, + const MemoryDescPtr& external_desc) + : VariableStateBase(name, external_desc) { + OPENVINO_ASSERT(external_buffer); + m_internal_mem = external_buffer; + m_internal_desc = m_internal_mem->getDescPtr(); + auto&& shape = m_internal_desc->getShape(); + + if (shape.isStatic()) { + m_internal_mem->nullify(); + } else { + // in the case of the original desc has dynamic shape we create an empty tensor + auto new_desc = to_static(m_internal_desc); + m_internal_mem->redefineDesc(new_desc); + } +} +MemoryPtr VariableStateSingleBuffer::input_mem() { + return m_internal_mem; +} +MemoryPtr VariableStateSingleBuffer::output_mem() { + return m_internal_mem; +} +MemoryDescPtr VariableStateSingleBuffer::internal_desc() const { + return m_internal_desc; +} + +void VariableStateSingleBuffer::reset_impl() { + auto new_desc = to_static(m_internal_desc); + if (m_internal_mem) { + m_internal_mem->redefineDesc(new_desc); + m_internal_mem->nullify(); + } +} + +MemoryPtr VariableStateSingleBuffer::internal_state_mem() const { + return m_internal_mem; +} + +void VariableStateSingleBuffer::commit_impl() { + // nothing to do +} + VariableStateKVcache::VariableStateKVcache( const std::string& name, const MemoryDescPtr& external_desc, diff --git a/src/plugins/intel_cpu/src/memory_state.h b/src/plugins/intel_cpu/src/memory_state.h index b4c52903d12f31..e7493f327e93fa 100644 --- a/src/plugins/intel_cpu/src/memory_state.h +++ b/src/plugins/intel_cpu/src/memory_state.h @@ -94,6 +94,27 @@ class VariableStateDoubleBuffer : public VariableStateBase { size_t buffer_num = 0; }; +class VariableStateSingleBuffer : public VariableStateBase { +public: + VariableStateSingleBuffer(const std::string& name, + const MemoryPtr& external_buffer, + const MemoryDescPtr& external_desc); + + MemoryPtr input_mem() override; + MemoryPtr output_mem() override; + MemoryDescPtr internal_desc() const override; + +private: + void reset_impl() override; + void commit_impl() override; + + MemoryPtr internal_state_mem() const override; + +private: + MemoryDescPtr m_internal_desc; //mem desc required by the graph internal tensor + MemoryPtr m_internal_mem; +}; + class VariableStateKVcache : public VariableStateBase { public: VariableStateKVcache(const std::string& name, diff --git a/src/plugins/intel_cpu/src/nodes/composite.cpp b/src/plugins/intel_cpu/src/nodes/composite.cpp index b38a56649bd60a..a1ceabd6942db1 100644 --- a/src/plugins/intel_cpu/src/nodes/composite.cpp +++ b/src/plugins/intel_cpu/src/nodes/composite.cpp @@ -15,11 +15,23 @@ namespace intel_cpu { namespace node { bool Composite::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { - return ov::is_type(op); + try { + if (!ov::is_type(op)) { + errorMessage = "Unknown SubGraph operation : " + std::string(op->get_type_info().name) + " with name '" + + op->get_friendly_name() + "'"; + } + } catch (...) { + return false; + } + return true; } Composite::Composite(const std::shared_ptr& op, const GraphContext::CPtr& context) : Node(op, context, InternalDynShapeInferFactory()) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } const auto& subModel = ov::as_type_ptr(op); OPENVINO_ASSERT(subModel, "Attempt to create SubGraph node from an invalid op type: ", op); @@ -27,7 +39,7 @@ Composite::Composite(const std::shared_ptr& op, const GraphContext::CP } void Composite::selectOptimalPrimitiveDescriptor() { - // for the input configution, just always use the parent configuration + // for the input configuration, just always use the parent configuration std::vector inConfs; std::vector graphInputConfig; @@ -38,14 +50,14 @@ void Composite::selectOptimalPrimitiveDescriptor() { } std::vector graphOutputConfig; - for (size_t i = 0; i < getParentEdges().size(); i++) { + for (size_t i = 0; i < outputShapes.size(); i++) { graphOutputConfig.emplace_back(node::Input::OutputConfig{true, true}); } // configure the inner graph to get the information about output memory descriptors m_graph.Init(m_body, context, graphInputConfig, graphOutputConfig); - // for the output decriptors, use the configuration of the graph's output nodes + // for the output descriptors, use the configuration of the graph's output nodes auto outputDescriptors = m_graph.getOutputMemoryDescriptors(); std::vector outConfs; @@ -89,9 +101,6 @@ void Composite::execute(dnnl::stream) { void Composite::executeDynamicImpl(dnnl::stream strm) { execute(strm); - if (!inputShapesModified()) - return; - // since the shape inference is not performed for the composite node // a memory of the extra child edges, attached to the output ports // has to be updated after an inference of the inner graph finished diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index 78bb6fc0563e60..3161c9a0e87a84 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -84,7 +84,8 @@ void CTCLoss::execute(dnnl::stream strm) { std::vector decodedTargetLenB(batchNum, 0); std::vector> targetDB(batchNum); std::vector>> logProbabilitiesB(batchNum); - std::vector errorMsgB(parallel_get_max_threads()); + const auto threads_num = parallel_get_max_threads(); + std::vector errorMsgB(threads_num); auto threadBody_1 = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); @@ -153,7 +154,7 @@ void CTCLoss::execute(dnnl::stream strm) { } // for batch }; // threadBody_1 - parallel_nt(0, threadBody_1); + parallel_nt(threads_num, threadBody_1); if (returnCode != 0) { std::string resErr(""); for (auto& err : errorMsgB) { diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 8a7f95268b4f3a..cb340afc029304 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -426,8 +426,10 @@ std::vector Deconvolution::getAvailableFormatsForDims(const else if (dims.getRank() == 2) return {memory::format_tag::nc}; else if (dims.getRank() == 3) - return {memory::format_tag::tnc, memory::format_tag::ntc, - memory::format_tag::ncw, memory::format_tag::nCw8c, memory::format_tag::nCw16c }; + return {memory::format_tag::ncw, + memory::format_tag::nCw8c, + memory::format_tag::nCw16c, + memory::format_tag::nwc}; else if (dims.getRank() == 4) return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc }; diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 5c3a358dff9d38..c2d23bf9adc89e 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -1503,7 +1503,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { fullWorkAmount *= jep.dims[i]; } - size_t minimalConcurrency = parallel_get_max_threads(); + m_threads_num = static_cast(parallel_get_max_threads()); size_t minimalJitWorkAmount = 256; size_t currentJitWorkAmount = jep.dims[jep.dims.size() - 1]; int collapsedDims = 0; @@ -1516,6 +1516,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { for (size_t j = 1; j < inpDims.size(); j++) { if (inpDims[j].back() != inpDims[0].back()) { hasDifferentDims = true; + break; } } @@ -1538,7 +1539,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { } size_t nextJitWorkAmount = currentJitWorkAmount * jep.dims[jep.dims.size() - 2]; - if (fullWorkAmount / nextJitWorkAmount >= minimalConcurrency) { + if (fullWorkAmount / nextJitWorkAmount >= m_threads_num) { currentJitWorkAmount = nextJitWorkAmount; collapsedDims++; @@ -1622,8 +1623,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { if (_pKernel->jep_.input_size == optimalTensorRank) { // execute Optimized 6D - parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], - [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) { + auto d6_loop = [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) { auto args = jit_eltwise_call_args_indexes(); args.indexes[0] = i0; args.indexes[1] = i1; @@ -1632,7 +1632,11 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { args.indexes[4] = i4; (*_pKernel)(&args_ptrs, &args); - }); + }; + + parallel_nt_static(m_threads_num, [&](const int ithr, const int nthr) { + for_5d(ithr, nthr, dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], d6_loop); + }); } else { // execute Optimized Generic if (_pKernel->jep_.use_runtime_ptrs) { @@ -1642,7 +1646,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { _schedulerWorkAmount *= dims_out[i]; } } - parallel_nt(0, [&](const int ithr, const int nthr) { + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; splitter(_schedulerWorkAmount, nthr, ithr, start, end); @@ -1676,6 +1680,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { std::unique_ptr _pKernel; size_t _schedulerWorkAmount = 0; size_t _batchDimIdx = 0; + size_t m_threads_num = 0lu; public: static const int optimalTensorRank = 6; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp index 440af52749bc9c..1bc0585930387f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_convert.cpp @@ -27,12 +27,11 @@ bool ACLConvertExecutor::init(const ConvertParams& convertParams, if (!isCopyOp && dstPrecision == DataType::S8) { dstPrecision = DataType::QASYMM8_SIGNED; } - auto srcDims = srcDesc->getShape().getStaticDims(); - auto dstDims = dstDesc->getShape().getStaticDims(); - auto srcDataLayout = getAclDataLayoutByMemoryDesc(srcDesc); - auto dstDataLayout = getAclDataLayoutByMemoryDesc(dstDesc); - auto srcTensorInfo = TensorInfo(shapeCast(collapse_dims_to_max_rank(srcDims)), 1, srcPrecision, srcDataLayout); - auto dstTensorInfo = TensorInfo(shapeCast(collapse_dims_to_max_rank(dstDims)), 1, dstPrecision, dstDataLayout); + // Use 1D TensorInfo, since UNKNOWN DataLayout may have accuracy issues + auto srcDims1D = convertParams.size; + auto dstDims1D = convertParams.size; + auto srcTensorInfo = TensorInfo(TensorShape(srcDims1D), 1, srcPrecision); + auto dstTensorInfo = TensorInfo(TensorShape(dstDims1D), 1, dstPrecision); if (isCopyOp) { Status s = NECopy::validate(&srcTensorInfo, &dstTensorInfo); if (!s) { diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 81f6f36b84dd89..d2629fe8fe6811 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -253,6 +253,7 @@ void Gather::createPrimitive() { if (isInPlace()) { return; } + m_threads_num = parallel_get_max_threads(); #if defined(OPENVINO_ARCH_X86_64) uint64_t idxElPerVec = 1; if (!isDynamicNode()) { @@ -294,11 +295,10 @@ void Gather::createPrimitive() { if (!isDynamicNode()) { const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); - const uint64_t nthr = parallel_get_max_threads(); - const uint64_t wpt = ((totalWork / dataElPerVec) / nthr + 1) * dataElPerVec; - execParamsPerThread.resize(nthr); + const uint64_t wpt = ((totalWork / dataElPerVec) / m_threads_num + 1) * dataElPerVec; + execParamsPerThread.resize(m_threads_num); - parallel_nt(nthr, [&](const int ithr, const int nthr) { + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { const uint64_t dstStart = std::min(wpt * ithr, totalWork); const uint64_t dstEnd = std::min(wpt * (ithr + 1), totalWork); @@ -469,7 +469,7 @@ void Gather::execute(dnnl::stream strm) { (*jitKernel)(&arg); }; - parallel_nt(0, threadBody); + parallel_nt(m_threads_num, threadBody); return; } @@ -543,7 +543,7 @@ void Gather::executeDynamicImpl(dnnl::stream strm) { (*jitKernel)(&arg); }; - parallel_nt(0, threadBody); + parallel_nt(m_threads_num, threadBody); return; } diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index 96dad228f65b59..6ee097e9a1fbab 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -110,6 +110,7 @@ class Gather : public Node { bool have_scalar_scale = false; size_t zp_group_size = 1u; size_t scale_group_size = 1u; + size_t m_threads_num = 0lu; std::shared_ptr jitKernel; }; diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 618d6b39105689..c8eed21bb312f5 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -149,11 +149,11 @@ void GridSample::createPrimitive() { } jitKernel->create_ker(); - nthr = parallel_get_max_threads(); - execParamsPerThread.resize(nthr); + m_threads_num = parallel_get_max_threads(); + execParamsPerThread.resize(m_threads_num); if (!x64::mayiuse(x64::avx512_core)) { const auto dataElPerVec = jitKernel->getDataElPerVec(); - parallel_nt(nthr, [&](const int ithr, const int nthr) { + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { auto& p = execParamsPerThread[ithr]; p.srcHeightF.resize(dataElPerVec); @@ -197,9 +197,9 @@ void GridSample::prepareParams() { const auto& srcDataShape = dataMemPtr->getStaticDims(); const auto& dstShape = dstMemPtr->getStaticDims(); const uint64_t totalWork = dstShape[2] * dstShape[3]; - const uint64_t wpt = ((totalWork / dataElPerVec) / nthr + 1) * dataElPerVec; + const uint64_t wpt = ((totalWork / dataElPerVec) / m_threads_num + 1) * dataElPerVec; - parallel_nt(nthr, [&](const int ithr, const int nthr) { + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { const uint64_t dstStart = std::min(wpt * ithr, totalWork); const uint64_t dstEnd = std::min(wpt * (ithr + 1), totalWork); @@ -303,7 +303,7 @@ void GridSample::execute(dnnl::stream strm) { (*jitKernel)(&arg); }; - parallel_nt(nthr, threadBody); + parallel_nt(m_threads_num, threadBody); } void GridSample::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp index 0d172bd5c3e055..b4468d58be9b52 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp @@ -62,7 +62,7 @@ class GridSample : public Node { ov::element::Type dataPrecision; ov::element::Type gridPrecision = ov::element::f32; - int nthr = 1; + size_t m_threads_num = 0lu; std::vector execParamsPerThread; static constexpr size_t IN_DATA = 0; diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 4ee5707e0a9e76..1f650bd8c5de17 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -430,6 +430,7 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr context, OutputConfig config) : Input(op, context) { + extMemDesc = config.desc; m_useParentMemoryDescForOutput = config.useParentMemoryDescForOutput; m_isInPlace = config.inPlace; } diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index a954ce56665d61..4d7febb17ad4b7 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -19,9 +19,17 @@ class Input : public Node { }; struct OutputConfig { + OutputConfig() = default; + OutputConfig(bool useParentMemoryDesc_, bool inPlace_) + : useParentMemoryDescForOutput(useParentMemoryDesc_), + inPlace(inPlace_) {} + + OutputConfig(MemoryDescPtr desc_, bool inPlace_) : desc(std::move(desc_)), inPlace(inPlace_) {} + // @todo better to use memory desc with any layout and undefined precision - bool useParentMemoryDescForOutput; - bool inPlace; + MemoryDescPtr desc = nullptr; + bool useParentMemoryDescForOutput = false; + bool inPlace = false; }; Input(const std::shared_ptr& op, const GraphContext::CPtr context); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 1543c168403382..25ddbb1b4246b1 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -1068,11 +1068,15 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, } }); - parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { + auto bhl_loop = [&](size_t b, size_t h, size_t pq) { auto* temp = buf_attn_score.ptr(0, b, pq, h); size_t temp_stride = buf_attn_score.stride(0); auto* dst = has_out_transpose ? output_emb.ptr(b, pq, h * SV) : output_emb.ptr(b, h, pq); attn_reduce(dst, temp, nthr, SV, temp_stride); + }; + + parallel_nt_static(nthr, [&](const int ithr, const int nthr) { + for_3d(ithr, nthr, B, H, q_len, bhl_loop); }); } @@ -1144,7 +1148,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, past_v_scale_zp, head_sum); } else { - OPENVINO_THROW("Unsupported precision: ", query.get_precision()); + OPENVINO_THROW("Unsupported precision: ", present_key.get_precision()); } #else if (present_key.get_precision() == ov::element::u8) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp index 719e9a6e464934..30a7870a1a4b54 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/rms_kernel.cpp @@ -123,22 +123,25 @@ void jit_rms_kernel::generate() { // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma // sum(x^2) align(16); - Xbyak::Label loop_4reg; - L(loop_4reg); - { - load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false); - vfmadd231ps(vmm_sum0, vmm_src, vmm_src); - load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 1); - vfmadd231ps(vmm_sum1, vmm_src, vmm_src); - load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 2); - vfmadd231ps(vmm_sum2, vmm_src, vmm_src); - load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 3); - vfmadd231ps(vmm_sum3, vmm_src, vmm_src); - - add(reg_src, vec_size * m_jcp.src_prc.size() * 4); - dec(reg_size); - jnz(loop_4reg); + if ((m_jcp.data_size / (vec_size * 4)) != 0) { + Xbyak::Label loop_4reg; + L(loop_4reg); + { + load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false); + vfmadd231ps(vmm_sum0, vmm_src, vmm_src); + load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 1); + vfmadd231ps(vmm_sum1, vmm_src, vmm_src); + load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 2); + vfmadd231ps(vmm_sum2, vmm_src, vmm_src); + load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false, vec_size * m_jcp.src_prc.size() * 3); + vfmadd231ps(vmm_sum3, vmm_src, vmm_src); + + add(reg_src, vec_size * m_jcp.src_prc.size() * 4); + dec(reg_size); + jnz(loop_4reg); + } } + // 1 ~ 3 vmm for (size_t i = m_jcp.data_size / (vec_size * 4) * 4; i < m_jcp.data_size / vec_size; i++) { load(vmm_src, reg_src, m_jcp.src_prc, vec_size, false); diff --git a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp index 13c46a7c976cfd..8df1f5498da384 100644 --- a/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp +++ b/src/plugins/intel_cpu/src/nodes/llm_mlp.cpp @@ -53,19 +53,19 @@ class LinearKsplit2 { OPENVINO_ASSERT((N % REG_BLK_N_SIZE) == 0); OPENVINO_ASSERT((K % reg_blk_K_size) == 0); - auto nthr = parallel_get_max_threads(); + m_threads_num = parallel_get_max_threads(); auto num_blk_N = N / REG_BLK_N_SIZE; - works.resize(nthr); + works.resize(m_threads_num); auto K_splits = 2; // split task on more cores is better on TBB - auto valid_nthr = nthr / 2; + auto valid_nthr = m_threads_num / 2; auto blkN_per_thread = (num_blk_N) / valid_nthr; auto blkN_leftover = num_blk_N - (blkN_per_thread * valid_nthr); auto start_blkN = 0; used_nthr = 0; - for (int ithr = 0; ithr < nthr; ithr += K_splits) { + for (int ithr = 0; ithr < m_threads_num; ithr += K_splits) { auto blkN = std::min(num_blk_N - start_blkN, blkN_per_thread); if (blkN_leftover > 0) { blkN_leftover--; @@ -106,7 +106,7 @@ class LinearKsplit2 { wbuffer.alloc(works, weight_element_size); - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { if (is_quantized) { @@ -125,7 +125,7 @@ class LinearKsplit2 { float * w_scale) { static ReduceAdd2bh jit_reduce2cvt(true, std::is_same::value); - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; auto& workC = work.m_C; if (work) { @@ -165,6 +165,9 @@ class LinearKsplit2 { } }); } + +private: + int m_threads_num = 0; }; template @@ -205,18 +208,18 @@ class LinearGateUp { // in unit of 32 OPENVINO_ASSERT((N % REG_BLK_N_SIZE) == 0); OPENVINO_ASSERT((K % reg_blk_K_size) == 0); - auto nthr = parallel_get_max_threads(); + m_threads_num = parallel_get_max_threads(); auto num_blk_N = N / REG_BLK_N_SIZE; - works.resize(nthr); + works.resize(m_threads_num); // split task on more cores is better on TBB - auto valid_nthr = nthr; + auto valid_nthr = m_threads_num; auto blkN_per_thread = (num_blk_N) / valid_nthr; auto blkN_leftover = num_blk_N - (blkN_per_thread * valid_nthr); auto start_blkN = 0; used_nthr = 0; - for (int ithr = 0; ithr < nthr; ithr ++) { + for (int ithr = 0; ithr < m_threads_num; ithr ++) { auto blkN = std::min(num_blk_N - start_blkN, blkN_per_thread); if (blkN_leftover > 0) { blkN_leftover--; @@ -243,7 +246,7 @@ class LinearGateUp { wbuffer.alloc(works, weight_element_size); DEBUG_LOG("Linear N,K=", N, ",", K, " used_nthr=", used_nthr); - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { if (quantized_int8) @@ -267,7 +270,7 @@ class LinearGateUp { const LLMMLPNode::Config& config, MatrixDynQuantPerRow& src_dq, float * w_scale) { - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { work.run(M, pA, strideA_in_bytes); @@ -303,6 +306,9 @@ class LinearGateUp { } }); } + +private: + int m_threads_num = 0; }; template @@ -384,8 +390,8 @@ struct LLMMLP::Executor : public LLMMLP::ExecutorBase { reinterpret_cast(ptr)); }); - auto nthr = parallel_get_max_threads(); - for (int ithr = 0; ithr < nthr; ithr++) { + m_threads_num = parallel_get_max_threads(); + for (size_t ithr = 0lu; ithr < m_threads_num; ithr++) { auto C1_size = gate_up.works[ithr].set_C(M, reinterpret_cast(cur_scratch_base)); auto C2_size = down.works[ithr].set_C(M, reinterpret_cast(cur_scratch_base)); auto max_C_size = std::max(C1_size, C2_size); @@ -482,6 +488,9 @@ struct LLMMLP::Executor : public LLMMLP::ExecutorBase { dstC += BM * strideC / sizeof(T); } } + +private: + size_t m_threads_num = 0lu; }; #else template diff --git a/src/plugins/intel_cpu/src/nodes/lora.cpp b/src/plugins/intel_cpu/src/nodes/lora.cpp new file mode 100644 index 00000000000000..52509cdfc44a13 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/lora.cpp @@ -0,0 +1,126 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "lora.h" + +#include "nodes/input.h" +#include "cpu_memory.h" +#include "ov_ops/lora_subgraph.hpp" +#include "utils/debug_capabilities.h" +#include "shape_inference/shape_inference_pass_through.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +bool LoRA::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (!ov::is_type(op)) { + errorMessage = "Unknown LoRA operation : " + std::string(op->get_type_info().name) + " with name '" + + op->get_friendly_name() + "'"; + } + } catch (...) { + return false; + } + return true; +} + +LoRA::LoRA(const std::shared_ptr& op, const GraphContext::CPtr& context) + : Node(op, context, PassThroughShapeInferFactory()) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + } + const auto& loraModel = ov::as_type_ptr(op); + OPENVINO_ASSERT(loraModel, + "Attempt to create LoRA node from an invalid op type: ", + op, + " with name ", + op->get_friendly_name()); + + m_body = loraModel->get_function(); +} + +void LoRA::selectOptimalPrimitiveDescriptor() { + // for the input configuration, just always use the parent configuration + std::vector inConfs; + std::vector graphInputConfig; + + auto mainInputDesc = getParentOutputMemDesc(getParentEdgeAt(0)); + auto mainInputPrc = mainInputDesc->getPrecision(); // we have to align precision across all the inputs + + inConfs.emplace_back(mainInputDesc); + graphInputConfig.emplace_back(node::Input::InputConfig{mainInputDesc, true}); + + for (size_t i = 1; i < getParentEdges().size(); i++) { + auto desc = getParentOutputMemDesc(getParentEdgeAt(i))->cloneWithNewPrecision(mainInputPrc); + inConfs.emplace_back(desc); + graphInputConfig.emplace_back(node::Input::InputConfig{desc, true}); + } + + std::vector graphOutputConfig; + // enforce the same memory descriptor on the output as on the input to allow inPlace memory + graphOutputConfig.emplace_back(node::Input::OutputConfig{inConfs.front().getMemDesc(), true}); + + // configure the inner graph to get the information about output memory descriptors + m_graph.Init(m_body, context, graphInputConfig, graphOutputConfig); + + // for the output descriptors, use the configuration of the graph's output nodes + auto outputDescriptors = m_graph.getOutputMemoryDescriptors(); + + const auto& desc = outputDescriptors.front(); + + // just a sanity check + CPU_NODE_ASSERT(desc->isCompatible(*(inConfs.front().getMemDesc())), "Unexpected input/output descriptor mismatch"); + + std::vector outConfs; + + outConfs.emplace_back(desc, BlockedMemoryDesc::FULL_MASK, 0); // use the memory from the first input inPlace + + const NodeConfig config(inConfs, outConfs); + + supportedPrimitiveDescriptors.clear(); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::undef); + + selectPrimitiveDescriptorByIndex(0); +} + +// @todo add ascii diagram for memory mapping / reuse +void LoRA::createPrimitive() { + CPU_NODE_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(), + "Number of node inputs must be equal the number of inner graph's inputs"); + + std::vector inputMemory; + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + auto srcEdgeMem = getSrcMemoryAtPort(i); + auto mem = std::make_shared(getEngine(), srcEdgeMem->getDescPtr(), srcEdgeMem->getMemoryBlock()); + subgraphMemoryPtrs.push_back(mem); + inputMemory.emplace_back(std::move(mem)); + } + + CPU_NODE_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(), + "Number of node outputs must be equal the number of inner graph's outputs"); + + std::vector outputMemory{getDstMemoryAtPort(0)}; + m_graph.Activate(inputMemory, outputMemory); +} + +void LoRA::execute(dnnl::stream) { + m_graph.Infer(); +} + +void LoRA::executeDynamicImpl(dnnl::stream strm) { + execute(strm); +} + +void LoRA::prepareParams() { + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + // since the external and internal descriptors are compatible, we may pass the descriptor + subgraphMemoryPtrs[i]->redefineDesc(getSrcMemoryAtPort(i)->getDescPtr()); + } +} + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/lora.h b/src/plugins/intel_cpu/src/nodes/lora.h new file mode 100644 index 00000000000000..27701daf9034f2 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/lora.h @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "graph.h" +#include "node.h" + +namespace ov { +namespace intel_cpu { +namespace node { + +class LoRA : public Node { +public: + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + LoRA(const std::shared_ptr& op, const GraphContext::CPtr& context); + + bool created() const override { + return getType() == Type::LoRA; + } + + void getSupportedDescriptors() override{}; + void selectOptimalPrimitiveDescriptor() override; + void createPrimitive() override; + void prepareParams() override; + void execute(dnnl::stream) override; + void executeDynamicImpl(dnnl::stream strm) override; + +private: + std::shared_ptr m_body; + std::vector subgraphMemoryPtrs; + Graph m_graph; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 756fbc5b578f61..565597bdcc2a9e 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -377,7 +377,8 @@ bool MemoryInputBase::isSupportedOperation(const std::shared_ptr } MemoryInputBase::MemoryInputBase(const std::shared_ptr& op, const GraphContext::CPtr ctx) - : Input(op, ctx), MemoryStateNode(op) { + : Input(op, ctx), + MemoryStateNode(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); @@ -385,6 +386,7 @@ MemoryInputBase::MemoryInputBase(const std::shared_ptr& op, const Grap if (created()) { context->getMemoryStatesRegister()->registerInput(this); } + executeHook = &MemoryInputBase::assignState; } MemoryInputBase::MemoryInputBase(const std::string id, @@ -394,8 +396,10 @@ MemoryInputBase::MemoryInputBase(const std::string id, const ov::element::Type& output_prc, const GraphContext::CPtr context, const ov::optional& input_shape, - const ov::optional& input_prc) : - Input(output_shape, output_prc, name, type, context), MemoryStateNode(id) { + const ov::optional& input_prc, + MemoryInputBase::mode mode) + : Input(output_shape, output_prc, name, type, context), + MemoryStateNode(id) { outputShapes.emplace_back(output_shape); addOriginalOutputPrecision(output_prc); if (input_shape) { @@ -411,6 +415,17 @@ MemoryInputBase::MemoryInputBase(const std::string id, if (created()) { context->getMemoryStatesRegister()->registerInput(this); } + + // this important to prevent identifying it as a const when it's on a const path + constant = ConstantType::StrictNoConst; + + if (mode::read_value_assign == mode) { + executeHook = &MemoryInputBase::assignState; + } else if (mode::single_read_value == mode) { + executeHook = &MemoryInputBase::bypassAssignState; + } else { + THROW_CPU_NODE_ERR("Unexpected MemoryInput mode"); + } } MemoryInputBase::~MemoryInputBase() { @@ -513,15 +528,26 @@ void MemoryInputBase::assignState(MemStatePtr newState) { } void MemoryInputBase::execute(dnnl::stream strm) { - getOutputNode().assignState(getAssignedState()); + assert(executeHook && "executeHook is not initialized!"); + (this->*executeHook)(); runStatic(strm); } void MemoryInputBase::executeDynamicImpl(dnnl::stream strm) { - getOutputNode().assignState(getAssignedState()); + assert(executeHook && "executeHook is not initialized!"); + (this->*executeHook)(); runDynamic(strm); } +void MemoryInputBase::assignState() { + getOutputNode().assignState(getAssignedState()); +} + +void MemoryInputBase::bypassAssignState() { + // nothing to do + return; +} + bool MemoryInput::needInitGraphProcessing() const { return !getParentEdges().empty() && getAssignedState()->is_reset_state(); } @@ -828,6 +854,89 @@ void MemoryInputSDPA::resolveInPlaceEdges(Edge::LOOK look) { } } +MemoryInputSingle::MemoryInputSingle(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional& input_shape, + const ov::optional& input_prc) + : MemoryInput(id, + name, + type, + output_shape, + output_prc, + context, + input_shape, + input_prc, + MemoryInputBase::mode::single_read_value) {} + +MemStatePtr MemoryInputSingle::makeState() const { + // assume ov::Tensor is always dense + auto original_desc = + std::make_shared(getOriginalOutputPrecisionAtPort(0), outputShapes.at(0)); + + auto mem_desc = getBaseMemDescAtOutputPort(0); + const auto& eng = getEngine(); + + auto state_name = getId(); + + // Remove suffix with pair ID. Internal information. + auto suffix_idx = state_name.find("/id="); + if (suffix_idx != std::string::npos) { + state_name = state_name.substr(0, suffix_idx); + } + + return std::make_shared(state_name, + std::make_shared(eng, mem_desc), + original_desc); +} + +void MemoryInputSingle::runStatic(dnnl::stream strm) { + MemoryInput::runStatic(strm); + if (needInitGraphProcessing()) { + // since there is no corresponding MemoryOutput node, we need to update the state here + auto result = getDstMemoryAtPort(0); // only one output port + auto stateMem = getAssignedState()->output_mem(); + CPU_NODE_ASSERT(stateMem, " state memory has nullptr"); + if (result->getData() != stateMem->getData()) { + stateMem->load(*result); + } + } + getAssignedState()->commit(); // since we don't use MemoryOutput, commit must be called to change the reset state +} + +void MemoryInputSingle::runDynamic(dnnl::stream strm) { + MemoryInput::runDynamic(strm); + if (needInitGraphProcessing()) { + // since there is no corresponding MemoryOutput node, we need to update the state here + auto result = getDstMemoryAtPort(0); // only one output port + auto state = getAssignedState(); + auto stateMem = state->output_mem(); + CPU_NODE_ASSERT(stateMem, " state memory has nullptr"); + + const auto& newShape = result->getShape(); + const auto& stateShape = stateMem->getShape(); + + if (stateShape.isDynamic() || stateShape.getStaticDims() != newShape.getStaticDims()) { + auto extMemDesc = state->internal_desc(); + auto newExternDesc = extMemDesc->cloneWithNewDims(newShape.getStaticDims()); + stateMem->redefineDesc(newExternDesc); + } + + if (result->getData() != stateMem->getData()) { + stateMem->load(*result); + } + } + getAssignedState()->commit(); // since we don't use MemoryOutput, commit must be called to change the reset state +} + +bool MemoryInputSingle::isSupportedOperation(const std::shared_ptr& op, + std::string& errorMessage) noexcept { + return MemoryInput::isSupportedOperation(op, errorMessage); +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index c158d738a36148..f503a8d58386a5 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -120,16 +120,14 @@ class MemoryOutputStub : public MemoryOutputBase { }; class MemoryInputBase : public Input, public MemoryStateNode { +public: + enum class mode { + read_value_assign, + single_read_value + }; + public: MemoryInputBase(const std::shared_ptr& op, const GraphContext::CPtr context); - MemoryInputBase(const std::string id, - const std::string& name, - const std::string& type, - const Shape& output_shape, - const ov::element::Type& output_prc, - const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc); ~MemoryInputBase() override; @@ -152,6 +150,17 @@ class MemoryInputBase : public Input, public MemoryStateNode { MemoryOutputBase& getOutputNode(); void assignState(MemStatePtr newState) override final; // NOLINT +protected: + MemoryInputBase(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional& input_shape, + const ov::optional& input_prc, + mode mode = mode::read_value_assign); + protected: virtual void runStatic(dnnl::stream strm) = 0; virtual void runDynamic(dnnl::stream strm) = 0; @@ -160,12 +169,20 @@ class MemoryInputBase : public Input, public MemoryStateNode { return state; } +private: + using executeHookPtr = void (MemoryInputBase::*)(void); + +private: + void assignState(); + void bypassAssignState(); + private: /** * @brief keeps reference to output sibling node */ MemoryOutputBase* outputNode = nullptr; MemStatePtr state = nullptr; + executeHookPtr executeHook; }; class MemoryInput : public MemoryInputBase { @@ -179,16 +196,38 @@ class MemoryInput : public MemoryInputBase { MemStatePtr makeState() const override; -private: +protected: + bool needInitGraphProcessing() const; void runStatic(dnnl::stream strm) override; void runDynamic(dnnl::stream strm) override; + +private: void assignStateHook() override {/*pass*/} - bool needInitGraphProcessing() const; private: ProxyMemoryBlockPtr memBlock = nullptr; }; +class MemoryInputSingle : public MemoryInput { +public: + MemoryInputSingle(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional& input_shape, + const ov::optional& input_prc); + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + MemStatePtr makeState() const override; + +private: + void runStatic(dnnl::stream strm) override; + void runDynamic(dnnl::stream strm) override; +}; + class MemoryInputSDPA : public MemoryInputBase { public: MemoryInputSDPA(const std::string id, diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index 7d082e99fa4f6a..9364058c5d19a2 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -934,7 +934,7 @@ void MHA::prepareParams() { bool isAMXSupported = mayiuse(avx512_core_amx); - size_t numThreads = parallel_get_max_threads(); + m_threads_num = parallel_get_max_threads(); size_t matmulOptimalM = 32; @@ -1072,21 +1072,21 @@ void MHA::prepareParams() { bufferCompensation1Size = rnd_up(N1, N1_blk); if (brgCopyAKernel0) { - bufferMatMul0In0.resize(numThreads * bufferMatMul0In0Size); + bufferMatMul0In0.resize(m_threads_num * bufferMatMul0In0Size); } - bufferMatMul0In1.resize(numThreads * bufferMatMul0In1Size); - bufferMatMul0Out.resize(numThreads * bufferMatMul0OutSize); - bufferMatMul1In1.resize(numThreads * bufferMatMul1In1Size); - bufferMatMul1Out.resize(numThreads * bufferMatMul1OutSize); + bufferMatMul0In1.resize(m_threads_num * bufferMatMul0In1Size); + bufferMatMul0Out.resize(m_threads_num * bufferMatMul0OutSize); + bufferMatMul1In1.resize(m_threads_num * bufferMatMul1In1Size); + bufferMatMul1Out.resize(m_threads_num * bufferMatMul1OutSize); if (brgemmCtx0.is_with_comp) { - bufferCompensation0.resize(numThreads * bufferCompensation0Size); + bufferCompensation0.resize(m_threads_num * bufferCompensation0Size); } if (brgemmCtx1.is_with_comp) { - bufferCompensation1.resize(numThreads * bufferCompensation1Size); + bufferCompensation1.resize(m_threads_num * bufferCompensation1Size); } if (brgemmCtx0.is_with_amx || brgemmCtx1.is_with_amx) { - wsp.resize(numThreads * wsp_size_per_thread); + wsp.resize(m_threads_num * wsp_size_per_thread); } { @@ -1224,7 +1224,7 @@ void MHA::mhaImpl() { auto outPrcSize = outputPrecision.size(); - parallel_for2d(dimsMatMul0Out[0], dimsMatMul0Out[1], [&](size_t i0, size_t i1) { + auto spatial_loop = [&](size_t i0, size_t i1) { size_t threadNum = parallel_get_thread_num(); auto pTranspose0In0_aux = pTranspose0In0 + (i0 * strTranspose0In0[0] + i1 * strTranspose0In0[2]) * inputPrecisions[0].size(); // order 0213 @@ -1417,6 +1417,10 @@ void MHA::mhaImpl() { (*convertReorderKernel)(&call_args); } } + }; + + parallel_nt_static(m_threads_num, [&](const int ithr, const int nthr) { + for_2d(ithr, nthr, dimsMatMul0Out[0], dimsMatMul0Out[1], spatial_loop); }); } diff --git a/src/plugins/intel_cpu/src/nodes/mha.h b/src/plugins/intel_cpu/src/nodes/mha.h index cd272c086e2190..36afe20224299a 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.h +++ b/src/plugins/intel_cpu/src/nodes/mha.h @@ -238,6 +238,8 @@ class MHA : public Node { std::unique_ptr mulAddSoftmaxKernel; std::unique_ptr convertReorderKernel; std::unique_ptr convertTransposeKernel; + + size_t m_threads_num = 0lu; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index 61aa4738b8f81f..76471b0cca741d 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -2417,9 +2417,9 @@ void MVN::MVNJitExecutor::mvn_nspc(const uint8_t* src_data, uint8_t* dst_data, c const size_t H = shape5d[3]; const size_t W = shape5d[4]; - size_t threads_num = parallel_get_max_threads(); + const size_t threads_num = parallel_get_max_threads(); size_t aux_buffer_size = mvnAttrs.execAcrossChannels_ ? 1 : rnd_up(C, blk_size) + blk_size; - parallel_for(N, [&](size_t b) { + auto b_loop = [&](size_t b) { std::vector mean_buffer(aux_buffer_size * threads_num, 0.f); std::vector variance_buffer; if (mvnAttrs.normalizeVariance_) { @@ -2429,7 +2429,7 @@ void MVN::MVNJitExecutor::mvn_nspc(const uint8_t* src_data, uint8_t* dst_data, c // kernel_type: 0 for mean, 1 for variance, 2 for normalization auto worker = [&](const bool across_channel, const int kernel_type) { - parallel_nt(0, [&](const int ithr, const int nthr) { + parallel_nt(threads_num, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; splitter(D * H * W, nthr, ithr, start, end); @@ -2512,6 +2512,10 @@ void MVN::MVNJitExecutor::mvn_nspc(const uint8_t* src_data, uint8_t* dst_data, c } worker(false, 2); } + }; + + parallel_nt_static(threads_num, [&](const int ithr, const int nthr) { + for_1d(ithr, nthr, N, b_loop); }); } @@ -2529,15 +2533,15 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co const size_t H = shape5d[3]; const size_t W = shape5d[4]; - size_t CB = div_up(C, blk_size); + const size_t CB = div_up(C, blk_size); - size_t C0 = W * blk_size; - size_t C1 = C0 * H; - size_t C2 = C1 * D; - size_t C3 = C2 * CB; - size_t C5 = C * D * H * W; + const size_t C0 = W * blk_size; + const size_t C1 = C0 * H; + const size_t C2 = C1 * D; + const size_t C3 = C2 * CB; + const size_t C5 = C * D * H * W; - size_t threads_num = parallel_get_max_threads(); + const size_t threads_num = parallel_get_max_threads(); size_t aux_buffer_size = mvnAttrs.execAcrossChannels_ ? blk_size : rnd_up(C, blk_size); aux_buffer_size += blk_size; std::vector mean_buffer(aux_buffer_size * threads_num); @@ -2562,7 +2566,11 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co // // | // // \|/ ///////////////////////////////// - auto mean_buffer_ptr = &mean_buffer[aux_buffer_size * static_cast(parallel_get_thread_num())]; + auto thread_idx = static_cast(parallel_get_thread_num()); + if (thread_idx >= threads_num) { + return mean_internal; + } + auto mean_buffer_ptr = &mean_buffer[aux_buffer_size * thread_idx]; for (size_t i = 0; i < blk_size; i++) mean_buffer_ptr[i] = 0.f; @@ -2651,7 +2659,7 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co // one thread for one C*W size(the same H) to get C size result for the same H, added to last group result // keep the compute order the same as planar - parallel_for2d(D, H, [&](size_t thr_idx, size_t d, size_t h) { + auto dh_loop = [&](size_t thr_idx, size_t d, size_t h) { for (size_t cb = 0; cb < CB; cb++) { size_t src_offset = b_offset + cb * C2 + d * C1 + h * C0; auto mean_buffer_ptr = &mean_buffer[blk_size * cb + aux_buffer_size * thr_idx]; @@ -2665,6 +2673,10 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co arg.post_op_data = post_ops_data_; (*mvn_mean_kernel)(&arg); } + }; + + parallel_nt_static(threads_num, [&](const int ithr, const int nthr) { + for_2d(ithr, nthr, D, H, dh_loop); }); for (size_t i = 1; i < threads_num; i++) { @@ -2678,7 +2690,7 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co for (size_t i = 0; i < variance_buffer.size(); i++) variance_buffer[i] = 0.f; - parallel_for2d(D, H, [&](size_t thr_idx, size_t d, size_t h) { + auto dh_loop = [&](size_t thr_idx, size_t d, size_t h) { for (size_t cb = 0; cb < CB; cb++) { size_t src_offset = b_offset + cb * C2 + d * C1 + h * C0; auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; @@ -2694,7 +2706,12 @@ void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, co arg.post_op_data = post_ops_data_; (*mvn_variance_kernel)(&arg); } + }; + + parallel_nt_static(threads_num, [&](const int ithr, const int nthr) { + for_2d(ithr, nthr, D, H, dh_loop); }); + for (size_t i = 1; i < threads_num; i++) { for (size_t c = 0; c < C; c++) variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; diff --git a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp index 3260b12f1b5b4b..00c8b6f9b17c0b 100644 --- a/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp +++ b/src/plugins/intel_cpu/src/nodes/qkv_proj.cpp @@ -60,6 +60,7 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { MemoryPtr m_scratchMem; uint8_t* m_scratch_base = nullptr; int m_M = 0; + size_t m_threads_num = 0lu; MatrixDynQuantPerRow m_quant_act; @@ -79,11 +80,11 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { auto K = w0.size(1); OPENVINO_ASSERT((K % cache_blk_k_size) == 0); - auto nthr = parallel_get_max_threads(); + m_threads_num = parallel_get_max_threads(); auto num_blk_K = K / cache_blk_k_size; int stride_in_bytes = K * weight_element_size; - works.resize(nthr); + works.resize(m_threads_num); int cur_work_id = 0; auto create_works = [&](void* pw, int output_id, int N, int valid_nthr) { @@ -119,7 +120,7 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { auto proj_size0 = m_node->m_config.proj_size0; auto proj_size1 = m_node->m_config.proj_size1; auto proj_size2 = m_node->m_config.proj_size2; - auto n_group_workers = allocate_workers({proj_size0, proj_size1, proj_size2}, nthr); + auto n_group_workers = allocate_workers({proj_size0, proj_size1, proj_size2}, m_threads_num); if (m_node->m_config.weights_combined) { auto* ptr_weights = reinterpret_cast(w0.ptr_v()); @@ -140,7 +141,7 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { wbuffer.alloc(works, weight_element_size); - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { if (quantized_int8) @@ -237,7 +238,7 @@ struct QKVProjection::Executor : public QKVProjection::ExecutorBase { strideA = m_quant_act.K; } - ov::parallel_nt_static(0, [&](const size_t ithr, const size_t nthr) { + ov::parallel_nt_static(m_threads_num, [&](const size_t ithr, const size_t nthr) { auto& work = works[ithr]; if (work) { work.run(BM, pA, strideA); diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index b40c50f957514f..6cfc94a02b9f3b 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2742,12 +2742,12 @@ inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) { (*reduce_post_kernel)(&arg); }); } else if (layout == ReduceLayoutType::reduce_nspc) { - size_t num_threads = static_cast(parallel_get_max_threads()); + const size_t num_threads = static_cast(parallel_get_max_threads()); size_t OP = OB * OC >= num_threads ? OB * OC : OB * OC * OD; if (OP < num_threads && OW > blk_size) OP *= OH; size_t work_amount = OB * OC * OD * OH * OW / OP; - parallel_for(OP, [&](size_t op) { + auto op_loop = [&](size_t op) { const uint8_t *in_p = in_ptr + op * work_amount * intermediate_data_size; uint8_t *out_p = out_ptr + op * work_amount * dst_data_size; auto arg = jit_reduce_post_call_args(); @@ -2759,6 +2759,10 @@ inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) { arg.divisor = &divisor; arg.post_op_data = static_cast(postOpsDataPtrs.data()); (*reduce_post_kernel)(&arg); + }; + + parallel_nt_static(num_threads, [&](const int ithr, const int nthr) { + for_1d(ithr, nthr, OP, op_loop); }); } else { size_t OCB = div_up(OC, blk_size); diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 43b8f041184a70..185815acd8c294 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -14,7 +14,7 @@ Reference::Reference(const std::shared_ptr& op, const GraphContext::CP Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ovCoreNode(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { OPENVINO_THROW_NOT_IMPLEMENTED( - "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented"); + "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"); } setType(Type::Reference); diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index cab8bb3ad46325..679907c2d9cf28 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -33,8 +33,6 @@ static rnn_direction ieDirection2dnnl(const std::shared_ptr& op) ov::op::RecurrentSequenceDirection direction = ov::op::RecurrentSequenceDirection::FORWARD; if (ov::is_type(op)) { direction = ov::as_type_ptr(op)->get_direction(); - } else if (ov::is_type(op)) { - direction = ov::as_type_ptr(op)->get_direction(); } else if (ov::is_type(op)) { direction = ov::as_type_ptr(op)->get_direction(); } else if (ov::is_type(op)) { @@ -75,14 +73,13 @@ static dnnl::algorithm ie2dnnl(const std::shared_ptr& op) { else return dnnl::algorithm::vanilla_augru; } else if (one_of(op->get_type_info(), - ov::op::v0::LSTMCell::get_type_info_static(), - ov::op::v4::LSTMCell::get_type_info_static(), - ov::op::v0::LSTMSequence::get_type_info_static(), - ov::op::v5::LSTMSequence::get_type_info_static())) { + ov::op::v0::LSTMCell::get_type_info_static(), + ov::op::v4::LSTMCell::get_type_info_static(), + ov::op::v5::LSTMSequence::get_type_info_static())) { return dnnl::algorithm::vanilla_lstm; } else if (one_of(op->get_type_info(), - ov::op::v0::RNNCell::get_type_info_static(), - ov::op::v5::RNNSequence::get_type_info_static())) { + ov::op::v0::RNNCell::get_type_info_static(), + ov::op::v5::RNNSequence::get_type_info_static())) { return dnnl::algorithm::vanilla_rnn; } else { OPENVINO_THROW("Operation ", @@ -201,16 +198,15 @@ bool RNNKey::operator==(const RNNKey& rhs) const { bool RNN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), - ov::op::v3::GRUCell::get_type_info_static(), - ov::op::internal::AUGRUCell::get_type_info_static(), - ov::op::internal::AUGRUSequence::get_type_info_static(), - ov::op::v0::LSTMCell::get_type_info_static(), - ov::op::v4::LSTMCell::get_type_info_static(), - ov::op::v0::RNNCell::get_type_info_static(), - ov::op::v5::GRUSequence::get_type_info_static(), - ov::op::v0::LSTMSequence::get_type_info_static(), - ov::op::v5::LSTMSequence::get_type_info_static(), - ov::op::v5::RNNSequence::get_type_info_static())) { + ov::op::v3::GRUCell::get_type_info_static(), + ov::op::internal::AUGRUCell::get_type_info_static(), + ov::op::internal::AUGRUSequence::get_type_info_static(), + ov::op::v0::LSTMCell::get_type_info_static(), + ov::op::v4::LSTMCell::get_type_info_static(), + ov::op::v0::RNNCell::get_type_info_static(), + ov::op::v5::GRUSequence::get_type_info_static(), + ov::op::v5::LSTMSequence::get_type_info_static(), + ov::op::v5::RNNSequence::get_type_info_static())) { errorMessage = "Unsupported sequence operation."; return false; } @@ -239,9 +235,7 @@ bool RNN::isSupportedOperation(const std::shared_ptr& op, std::s errorMessage = "Node expects 6 inputs. Actual: " + std::to_string(op->get_input_size()); return false; } - } else if (one_of(op->get_type_info(), - ov::op::v0::LSTMSequence::get_type_info_static(), - ov::op::v5::LSTMSequence::get_type_info_static())) { + } else if (one_of(op->get_type_info(), ov::op::v5::LSTMSequence::get_type_info_static())) { if (op->get_input_size() != 7) { errorMessage = "Node expects 7 inputs. Actual: " + std::to_string(op->get_input_size()); return false; @@ -264,7 +258,6 @@ bool RNN::isSupportedOperation(const std::shared_ptr& op, std::s if (one_of(rnnCellBase->get_type_info(), ov::op::v0::LSTMCell::get_type_info_static(), ov::op::v4::LSTMCell::get_type_info_static(), - ov::op::v0::LSTMSequence::get_type_info_static(), ov::op::v5::LSTMSequence::get_type_info_static())) { if (rnnCellBase->get_activations() != std::vector{"sigmoid", "tanh", "tanh"}) { errorMessage = "Not supported activation functions"; @@ -294,9 +287,6 @@ bool RNN::isSupportedOperation(const std::shared_ptr& op, std::s if (auto gru_seq = ov::as_type_ptr(op)) { direction = gru_seq->get_direction(); seqLenIdx = 2; - } else if (auto lstm_seq = ov::as_type_ptr(op)) { - direction = lstm_seq->get_direction(); - seqLenIdx = 3; } else if (auto lstm_seq = ov::as_type_ptr(op)) { direction = lstm_seq->get_direction(); seqLenIdx = 3; @@ -441,9 +431,7 @@ RNN::RNN(const std::shared_ptr& op, const GraphContext::CPtr context) } else if (op->get_type_info() == ov::op::internal::AUGRUSequence::get_type_info_static()) { sIdx = 2; wIdx = 3; rIdx = 4; bIdx = 5; aIdx = 6; yIdx = 0; hoIdx = 1; - } else if (one_of(op->get_type_info(), - ov::op::v0::LSTMSequence::get_type_info_static(), - ov::op::v5::LSTMSequence::get_type_info_static())) { + } else if (one_of(op->get_type_info(), ov::op::v5::LSTMSequence::get_type_info_static())) { sIdx = 3; wIdx = 4; rIdx = 5; bIdx = 6; yIdx = 0; hoIdx = 1; coIdx = 2; } diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index eb1797279e1415..27f9426dca6af9 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -1076,7 +1076,7 @@ void ROIAlign::executeSpecified() { int bufSize = rnd_up(C, 16); size_t threadsNum = parallel_get_max_threads(); workingBuf.resize(bufSize * threadsNum, 0.f); - parallel_for3d(realRois, pooledH, pooledW, [&](int n, int yBinInd, int xBinInd) { + auto rhw_loop = [&](int n, int yBinInd, int xBinInd) { int numSamplesROI = numSamples[n]; // each sample have 4 values for srcAddressList and weight size_t binOffset = numSamplesROI * BLIParamsNum * pooledW * yBinInd + numSamplesROI * BLIParamsNum * xBinInd; @@ -1095,6 +1095,10 @@ void ROIAlign::executeSpecified() { arg.dst = static_cast(&dst[dstOffset]); arg.src_stride = lastBlockDim * W * H; // only valid for blk, nspc generate inside (*roi_align_kernel)(&arg); + }; + + parallel_nt_static(threadsNum, [&](const int ithr, const int nthr) { + for_3d(ithr, nthr, realRois, pooledH, pooledW, rhw_loop); }); } else { // one lane for one sample generation, then pooling all samples. diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index e229ff4bb72c57..f9f853230c4dd6 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -217,6 +217,7 @@ struct MHAKernel { size_t wsp_size_per_thread = 0; using tag = dnnl::memory::format_tag; using dt = dnnl::memory::data_type; + size_t m_threads_num = 0lu; struct brgemmKey { size_t M; size_t N; @@ -315,21 +316,21 @@ struct MHAKernel { wv_gemm_ptr = wv_result.first; - size_t nthr = static_cast(parallel_get_max_threads()); + m_threads_num = static_cast(parallel_get_max_threads()); // wsp is used to compute beta when K is blocked wsp_size_per_thread = wv_gemm_ptr->get_wsp_size(); - wsp.resize(nthr * wsp_size_per_thread); + wsp.resize(m_threads_num * wsp_size_per_thread); // allocate scratch a/b, notice get_scratch_a_size/get_scratch_b_size returns in bytes size_t data_size = sizeof(T); - qk_scratch_a.resize({nthr, qk_gemm_ptr->get_scratch_a_size() / data_size}); - wv_scratch_a.resize({nthr, wv_gemm_ptr->get_scratch_a_size() / data_size}); + qk_scratch_a.resize({m_threads_num, qk_gemm_ptr->get_scratch_a_size() / data_size}); + wv_scratch_a.resize({m_threads_num, wv_gemm_ptr->get_scratch_a_size() / data_size}); qk_scratch_b.resize({B, Hk, qk_gemm_ptr->get_scratch_b_size() / data_size}); wv_scratch_b.resize({B, Hk, wv_gemm_ptr->get_scratch_b_size() / data_size}); const size_t m_block_size = qk_gemm_ptr->get_mblk_size(); - weight_score.resize({static_cast(parallel_get_max_threads()), H, m_block_size, kv_len}); + weight_score.resize({m_threads_num, H, m_block_size, kv_len}); if (has_out_transpose) { fp32_out.resize({B, q_len, H, head_size_v}); } else { @@ -367,7 +368,7 @@ struct MHAKernel { }); // attention - parallel_for3d(B, H, m_blocks, [&](size_t ithr, size_t b, size_t h, size_t m_blk) { + auto bhb_loop = [&](size_t ithr, size_t b, size_t h, size_t m_blk) { auto m_start = m_blk * m_block_size; auto m_end = std::min(m_start + m_block_size, q_len); auto m_cnt = m_end - m_start; @@ -456,6 +457,10 @@ struct MHAKernel { 1); } } + }; + + parallel_nt_static(m_threads_num, [&](const int ithr, const int nthr) { + for_3d(ithr, nthr, B, H, m_blocks, bhb_loop); }); } @@ -652,12 +657,14 @@ struct MHAKernel { size_t m_block_size; // buffer to hold qk temp std::vector qk_buffers; + size_t m_threads_num = 0lu; MHAKernel() = delete; explicit MHAKernel(GraphContext::CPtr ctx): context(ctx) { m_block_size = 4; select_nfltmax_at_0 = false; - qk_buffers.resize(parallel_get_max_threads()); + m_threads_num = parallel_get_max_threads(); + qk_buffers.resize(m_threads_num); } PlainTensor causal_mask; @@ -699,7 +706,7 @@ struct MHAKernel { auto m_blocks = (q_len + m_block_size - 1) / m_block_size; - parallel_for3d(B, H, m_blocks, [&](size_t b, size_t h, size_t m_blk) { + auto bhb_loop = [&](size_t b, size_t h, size_t m_blk) { auto thread_id = parallel_get_thread_num(); if (thread_id < 0) OPENVINO_THROW("The calling thread isn't initialized!"); @@ -801,6 +808,10 @@ struct MHAKernel { has_out_transpose ? &output_emb.at({b, m_start, h * head_size_v}) : &output_emb.at({b, h, m_start}), has_out_transpose ? output_emb.stride(1) : output_emb.stride(2), 1); + }; + + parallel_nt_static(m_threads_num, [&](const int ithr, const int nthr) { + for_3d(ithr, nthr, B, H, m_blocks, bhb_loop); }); } }; diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index 4f974cfe5e9748..13671c22d102ae 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -348,6 +348,7 @@ StridedSlice::StridedSliceCommonExecutor::StridedSliceCommonExecutor(const Strid dimsNormalization(); dimsGluing(); indicesCalculation(); + m_threads_num = parallel_get_max_threads(); } void StridedSlice::StridedSliceCommonExecutor::orderParametersByLayouts(const BlockedMemoryDescCPtr& blockedMemoryDesc) { @@ -642,8 +643,7 @@ void StridedSlice::StridedSliceCommonExecutor::dimsGluing() { for (size_t idx = secondDim.first + 1; idx < secondDim.second; idx++) params.attrs.begin[1] /= dstBlockedDimsBefore[idx]; - const size_t maxThreads = parallel_get_max_threads(); - if (params.dstBlockedDims[0] < maxThreads) { + if (params.dstBlockedDims[0] < m_threads_num) { params.dstBlockedDims[1] /= realDstDim; params.srcBlockedDims[1] /= realSrcDim; params.dstBlockedDims.insert(params.dstBlockedDims.begin() + 1, realDstDim); @@ -682,8 +682,7 @@ void StridedSlice::StridedSliceCommonExecutor::indicesCalculation() { dstIndices.resize(workAmount, 0); // should choose more optimal thread count - const size_t nthr = parallel_get_max_threads(); - nThreads = nthr > workAmount ? workAmount : nthr; + nThreads = m_threads_num > workAmount ? workAmount : m_threads_num; if (params.isOptimized) { indicesCalculationForOptimized(); diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.h b/src/plugins/intel_cpu/src/nodes/strided_slice.h index 5c5950520bda7d..bf698643271d7a 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.h +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.h @@ -122,6 +122,7 @@ class StridedSlice : public Node { size_t workAmount = 0lu; size_t lastDstDim = 0lu; size_t srcShift = 0lu; + size_t m_threads_num = 0lu; }; using executorPtr = std::shared_ptr; executorPtr execPtr = nullptr; diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 9a3b9788b838d2..dcf2b0f8ffd5ee 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -938,7 +938,7 @@ int TensorIterator::getNumIteration(const std::vector& inputPortMap, co } bool TensorIterator::runAsDynamic() const { - return isDynamicNode() || Graph::Status::ReadyDynamic == sub_graph.getStatus(); + return isDynamicNode() || sub_graph.IsDynamic(); } bool TensorIterator::created() const { diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 16cf1b974d8561..4a8e8205510fcf 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -108,6 +108,7 @@ #include "nodes/transpose.h" #include "nodes/unique.hpp" #include "nodes/causal_mask_preprocess.h" +#include "nodes/lora.h" namespace ov { namespace intel_cpu { @@ -221,6 +222,7 @@ Node::NodesFactory::NodesFactory() : Factory("NodesFactory") { INTEL_CPU_NODE(Composite, Type::SubModel); INTEL_CPU_NODE(ScaledDotProductAttention, Type::ScaledDotProductAttention); INTEL_CPU_NODE(SearchSorted, Type::SearchSorted); + INTEL_CPU_NODE(LoRA, Type::LoRA); #if defined(OPENVINO_ARCH_X86_64) INTEL_CPU_NODE(FakeQuantize, Type::FakeQuantize); INTEL_CPU_NODE(GridSample, Type::GridSample); diff --git a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp index bb2d5e5e84b267..8e2e794eacc939 100644 --- a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp @@ -407,6 +407,7 @@ using IStaticShapeInferFactory = template <> const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ // opset15 + _OV_OP_SHAPE_INFER_MASK_REG(op::v15::Squeeze, ShapeInferTA, util::bit::mask(1)), _OV_OP_SHAPE_INFER_MASK_REG(op::v15::SearchSorted, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(op::v15::StringTensorUnpack, ShapeInferTA, util::bit::mask(0)), _OV_OP_SHAPE_INFER_MASK_REG(op::v15::StringTensorPack, ShapeInferTA, util::bit::mask(0, 1)), @@ -531,7 +532,6 @@ const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ _OV_OP_SHAPE_INFER_MASK_REG(opset1::GroupConvolutionBackpropData, ShapeInferPaddingTA, util::bit::mask(2)), _OV_OP_SHAPE_INFER_MASK_REG(opset1::Interpolate, ShapeInferTA, util::bit::mask(1)), _OV_OP_SHAPE_INFER_MASK_REG(opset1::LSTMCell, ShapeInferTA, util::bit::mask()), - _OV_OP_SHAPE_INFER_MASK_REG(opset1::LSTMSequence, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(opset1::MatMul, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(opset1::MaxPool, ShapeInferPaddingTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(opset1::NonMaxSuppression, ShapeInferTA, util::bit::mask(2)), diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp index 72ca09a8152298..916e7b21ffa484 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.cpp @@ -124,14 +124,12 @@ ov::intel_cpu::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() { ov::intel_cpu::OptimizeLSTMSequenceTransposes::OptimizeLSTMSequenceTransposes() { MATCHER_SCOPE(OptimizeLSTMSequenceTransposes); - auto lstmSequenceNgraph = ov::pass::pattern::wrap_type(); + auto lstmSequenceNgraph = ov::pass::pattern::wrap_type(); ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher &m) { auto checkSequence = [](const std::shared_ptr& node) { if (auto lstm5 = ov::as_type_ptr(node)) { return lstm5->get_direction() != ov::op::RecurrentSequenceDirection::BIDIRECTIONAL; - } else if (auto lstm1 = ov::as_type_ptr(node)) { - return lstm1->get_direction() != ov::op::RecurrentSequenceDirection::BIDIRECTIONAL; } else { return false; } diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index fcf38440b8aa4b..9dd1da2d471e5a 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -37,6 +37,7 @@ #include "transformations/common_optimizations/move_eltwise_up_data_movement.hpp" #include "transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.hpp" #include "transformations/common_optimizations/rms_fusion.hpp" +#include "transformations/common_optimizations/lora_subgraph_fusion.hpp" #include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/fp16_compression/mark_floatpoint_range.hpp" @@ -693,6 +694,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ov::pass::EnableDecompressionConvertConstantFolding); CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::LoraSubgraphFusion); manager.run_passes(model); } diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index f7fd337afa932e..814e8d19311a8c 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -20,12 +20,7 @@ void ModelSerializer::operator<<(const std::shared_ptr& model) { auto serialize_info = [&](std::ostream& stream) { pugi::xml_document xml_doc; pugi::xml_node root = xml_doc.append_child("cnndata"); - pugi::xml_node outputs = root.append_child("outputs"); - for (const auto& out : model->get_results()) { - auto out_node = outputs.append_child("out"); - const auto name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor()); - out_node.append_attribute("name").set_value(name.c_str()); - } + root.append_child("outputs"); xml_doc.save(stream); }; @@ -44,26 +39,15 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn } } -void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) { - pugi::xml_node outputs = root.child("outputs"); - auto nodes_it = outputs.children("out").begin(); - size_t size = model->outputs().size(); - for (size_t i = 0lu; i < size; ++nodes_it, i++) { - std::string name = nodes_it->attribute("name").value(); - if (name.empty()) - continue; - auto result = model->output(i).get_node_shared_ptr(); - ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name); - } -} + void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} -void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (auto mmap_buffer = dynamic_cast(m_istream.rdbuf())) { - auto buffer = mmap_buffer->get_buffer(); - process_mmap(model, buffer); - } else { - process_stream(model); - } + void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (auto mmap_buffer = dynamic_cast(m_istream.rdbuf())) { + auto buffer = mmap_buffer->get_buffer(); + process_mmap(model, buffer); + } else { + process_stream(model); + } } void ModelDeserializer::process_mmap(std::shared_ptr& model, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/rms_norm.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/rms_norm.cpp index da7bbaaaf848d1..38aeedb5e451cf 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/rms_norm.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/rms_norm.cpp @@ -24,6 +24,17 @@ const std::vector> shapes{ {ov::Shape{1024 + 16 + 1}, ov::Shape{1024 + 16 + 1}}} }, }, + // small data size + { + // data shape + {ov::test::InputShape{ov::PartialShape{-1, -1, 31}, + {ov::Shape{1, 8, 31}, ov::Shape{2, 3, 31}}} + }, + // scale shape + {ov::test::InputShape{ov::PartialShape{31}, + {ov::Shape{31}, ov::Shape{31}}} + }, + }, // scale is scalar { // data shape diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp index d05e7840562191..d74ab99fb3d5ab 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp @@ -238,7 +238,7 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterfaceget_element_type() == element::f16) { ov::Tensor t{ov::element::f16, shape}; - strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); + strided_iota(static_cast(t.data()), t.get_size(), val, 0.0f); inputs.insert({param, t}); } else { ov::Tensor t{ov::element::bf16, shape}; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp new file mode 100644 index 00000000000000..20620e42ceddc7 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp @@ -0,0 +1,313 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/cpu_test_utils.hpp" + +namespace ov { +namespace test { + +namespace { +constexpr auto t4_name = "lora/MatMul.B"; +constexpr auto t5_name = "lora/MatMul.alpha"; +constexpr auto t6_name = "lora/MatMul.A"; +constexpr auto netType = ov::element::f32; + +enum class StatesPolicy { + EMPTY_TENSORS, + RANDOM_TENSORS, + UNDEFINED +}; +std::ostream &operator<<(std::ostream& os, StatesPolicy states_policy) { + switch (states_policy) { + case StatesPolicy::EMPTY_TENSORS: + return os << "empty_tensors"; + case StatesPolicy::RANDOM_TENSORS: + return os << "random_tensors"; + case StatesPolicy::UNDEFINED: + return os << "undefined"; + default: + OPENVINO_THROW("Unexpected states policy"); + } +} + +} // namespace + +using LoraPatternParams = std::tuple; // states filling policy + +class LoraPatternBaseCPUTest : public SubgraphBaseTest, public testing::WithParamInterface { +public: +static std::string getTestCaseName(testing::TestParamInfo obj) { + ov::element::Type states_precision; + StatesPolicy states_policy; + std::tie(states_precision, states_policy) = obj.param; + + std::ostringstream result; + result << "states_precision=" << states_precision << "_states_policy=" << states_policy; + return result.str(); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + std::tie(states_precision, states_policy) = this->GetParam(); + init_function(); + } + +protected: + virtual void init_function() = 0; + + std::pair create_states(const std::vector& shapes, + const std::vector names) { + ov::OutputVector state_outs; + ov::SinkVector assigns; + auto create_state = [&](const ov::PartialShape& shape, const std::string name) { + auto variable = std::make_shared( + ov::op::util::VariableInfo{shape, states_precision, name}); + auto read_value = std::make_shared(variable); + auto assign = std::make_shared(read_value, variable); + assigns.push_back(assign); + if (states_precision == netType) + state_outs.push_back(read_value); + else + state_outs.push_back(std::make_shared(read_value, netType)); + }; + OPENVINO_ASSERT(shapes.size() == names.size()); + for (size_t i = 0; i < shapes.size(); ++i) + create_state(shapes[i], names[i]); + return std::make_pair(state_outs, assigns); + } + + void run_test() { + switch (states_policy) { + case StatesPolicy::EMPTY_TENSORS: + run_test_empty_tensors(); + break; + case StatesPolicy::RANDOM_TENSORS: + run_test_random_tensors(); + break; + default: + OPENVINO_THROW("Unexpected states policy: ", states_policy); + } + } + + void run_test_empty_tensors() { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + generate_inputs(targetStaticShapes.front()); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + + inferRequest.infer(); + auto outputs = function->outputs(); + + auto tx_result = inferRequest.get_tensor(outputs[0]); + auto tz_result = inferRequest.get_tensor(outputs[1]); + ov::test::utils::compare(tx_result, tz_result, 1e-4, 1e-4); + } + + void run_test_random_tensors() { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + + // use the Template plugin as a reference + + auto compiledReferenceModel = core->compile_model(function, ov::test::utils::DEVICE_TEMPLATE); + auto inferRequestRef = compiledReferenceModel.create_infer_request(); + ASSERT_TRUE(inferRequestRef); + + generate_inputs(targetStaticShapes.front()); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + inferRequestRef.set_tensor(input.first, input.second); + } + + constexpr size_t lora_order = 25lu; + constexpr int infer_count = 6lu; + + std::unordered_map stateShapes; + std::unordered_map initStateShapes; + + auto&& states = inferRequest.query_state(); + for (auto&& state : states) { + auto shape = state.get_state().get_shape(); + initStateShapes.insert({state.get_name(), shape}); + std::for_each(shape.begin(), shape.end(), [=](ov::Shape::value_type& x) { + if (0 == x) { + x = lora_order; + } + }); + stateShapes.insert({state.get_name(), std::move(shape)}); + } + + for (int i = 0; i < infer_count; ++i) { + // set states + + if (i == 3) { + // reset states on the 3rd iteration + for (auto&& item : states) { + item.reset(); + } + + for (auto&& item : inferRequestRef.query_state()) { + // Template plugin doesn't support reset state for dynamic shape states + item.get_state().set_shape(initStateShapes.at(item.get_name())); + } + } else if (!(i & 0x1)) { // every even call + // generate and set state tensors + for (auto&& item : states) { + auto&& refStates = inferRequestRef.query_state(); + using ov::test::utils::InputGenerateData; + const auto& shape = stateShapes.at(item.get_name()); + auto tensor = + ov::test::utils::create_and_fill_tensor(states_precision, shape, InputGenerateData{0, 10, 1, i}); + item.set_state(tensor); + auto itr = std::find_if(refStates.begin(), refStates.end(), [&](const ov::VariableState& state) { + return state.get_name() == item.get_name(); + }); + ASSERT_FALSE(itr == refStates.end()); + itr->set_state(tensor); + } + } + + inferRequest.infer(); + inferRequestRef.infer(); + auto outputs = function->outputs(); + + auto tx_result = inferRequest.get_tensor(outputs[0]); + auto tz_result = inferRequest.get_tensor(outputs[1]); + + auto tx_result_ref = inferRequestRef.get_tensor(outputs[0]); + auto tz_result_ref = inferRequestRef.get_tensor(outputs[1]); + + ov::test::utils::compare(tx_result_ref, tx_result, 1e-4, 1e-4); + ov::test::utils::compare(tz_result_ref, tz_result, 1e-4, 1e-4); + } + } + + StatesPolicy states_policy = StatesPolicy::UNDEFINED; + ov::element::Type states_precision = ov::element::undefined; +}; + +class LoraPatternMatmulCPUTest : public LoraPatternBaseCPUTest { +protected: + void init_function() override { + ov::PartialShape shape_x = {-1, -1, K}; + ov::PartialShape shape_w = {N, K}; + + auto param_y = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + + // "Main" matrix multiplication from the original transformer model + auto tx = std::make_shared(param_y, param_w, false, true); + + // LoRA parameters from states + auto states = create_states({{N, -1}, {1, -1}, {-1, K}}, {t4_name, t5_name, t6_name}); + + // Apply LoRA parameters to the current activations + auto t5810 = std::make_shared(param_y, states.first[2], false, true); + auto t5811 = std::make_shared(t5810, states.first[1]); + auto t5812 = std::make_shared(t5811, states.first[0], false, true); + + // Mix LoRA part into normally computed activations after the "main" MatMul + auto tz = std::make_shared(tx, t5812); + + auto result_x = std::make_shared(tx); + auto result_z = std::make_shared(tz); + + function = std::make_shared(ov::ResultVector({result_x, result_z}), + states.second, + ov::ParameterVector({param_y, param_w})); + } + + static constexpr size_t K = 563ul; // Weights matrix K dimension + static constexpr size_t N = 2048ul; // Weights matrix N dimension +}; + +class LoraPatternConvolutionCPUTest : public LoraPatternBaseCPUTest { +public: + void init_function() override { + ov::PartialShape shape_x = {-1, num_channels, -1, -1}; + + auto param_y = std::make_shared(netType, shape_x); + + // Original Convolution that is modified by LoRA adapter later + auto tx = ov::test::utils::make_convolution(param_y, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels); + + // LoRA parameters from states + auto states = create_states({{num_channels, -1}, {1, -1}, {-1, num_channels}}, {t4_name, t5_name, t6_name}); + + // LoRA pattern with additional Transposes to move channel dimensions into positions where MatMul can be applied + auto t4940 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + + auto t4941 = std::make_shared(param_y, t4940); + auto t4942 = std::make_shared(t4941, states.first[2], false, true); + auto t4943 = std::make_shared(t4942, states.first[1]); + auto t4944 = std::make_shared(t4943, states.first[0], false, true); + + auto t4945 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + auto t4946 = std::make_shared(t4944, t4945); + + // Mix LoRA part into normally computed activations after the "main" MatMul + auto tz = std::make_shared(tx, t4946); + + auto result_x = std::make_shared(tx); + auto result_z = std::make_shared(tz); + + function = std::make_shared(ov::ResultVector({result_x, result_z}), + states.second, + ov::ParameterVector({param_y})); + } + +protected: + static constexpr size_t num_channels = 64ul; +}; + +TEST_P(LoraPatternMatmulCPUTest, CompareWithRefs) { + targetStaticShapes = {{{{1, 20, K}}, {{N, K}}}}; + run_test(); + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "LoRA", 1); + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "MatMul", 1); +} + +TEST_P(LoraPatternConvolutionCPUTest, CompareWithRefs) { + targetStaticShapes = {{{1, num_channels, 10, 15}}}; + run_test(); + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "LoRA", 1); + CPUTestUtils::CheckNumberOfNodesWithType(compiledModel, "MatMul", 0); +} + +const ov::element::TypeVector states_precisions {ov::element::f32, ov::element::f16}; +const std::vector states_policies {StatesPolicy::EMPTY_TENSORS, StatesPolicy::RANDOM_TENSORS}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_LoRA_CPU_MatMul, LoraPatternMatmulCPUTest, + ::testing::Combine( + ::testing::ValuesIn(states_precisions), + ::testing::ValuesIn(states_policies)), + LoraPatternBaseCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_LoRA_CPU_Conv, LoraPatternConvolutionCPUTest, + ::testing::Combine( + ::testing::ValuesIn(states_precisions), + ::testing::ValuesIn(states_policies)), + LoraPatternBaseCPUTest::getTestCaseName); + +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/read_value_assign.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/read_value_assign.cpp new file mode 100644 index 00000000000000..c6e976b321f703 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/read_value_assign.cpp @@ -0,0 +1,182 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/node_builders/constant.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/cpu_test_utils.hpp" + +/* The main purpose of this test set is to test ReadValue->Assign direct connection optimizations, i.e. + dropping the MemoryOutput node. +*/ + +namespace ov { +namespace test { + +using namespace CPUTestUtils; + +// ┌────────┐ ┌────────┐ +// │ Param2 │ │ Param1 │ |---------------| +// └───┬────┘ └────┬───┘ | | +// │ |-----------|┌─────────┐ | +// │ | │ │Constant │ | +// │ | │ └───┬─────┘ | +// │ | ┌───┴────┐ │ | +// │ | │Multiply├─────┘ | +// │ | └───┬────┘ | <- Optional Init Subgraph +// │ | │ ┌─────────┐ | +// │ | │ │Constant │ | +// │ | │ └───┬─────┘ | +// │ | ┌───┴────┐ │ | +// │ | │ Add ├─────┘ | +// │ | └───┬────┘ | +// │ | │ | +// │ |---------------------------| +// │ │ +// │ │ +// │ │ +// │ ┌─────┴─────┐ +// │ │ ReadValue │ +// │ └─────┬─────┘ +// │ │ \ +// │ ┌──┴──┐ \ +// └────────┤ Add │ \┌────────┐ +// └──┬──┘ │ Assign │ +// │ └────────┘ +// │ +// ┌────┴────┐ +// │ Result1 │ +// └─────────┘ + +typedef std::tuple< + bool, // include init subgraph + CPUSpecificParams +> ReadValueAssignTestParams; + +class ReadValueAssignTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + bool use_init_subgraph = false; + CPUSpecificParams cpu_params; + std::tie(use_init_subgraph, cpu_params) = obj.param; + + std::ostringstream results; + results << "Init_Graph=" << (use_init_subgraph ? "True" : "False") << "_"; + results << CPUTestsBase::getTestCaseName(cpu_params); + return results.str(); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + + const ov::Shape tensor_shape = {3, 32, 7, 7}; + + InputShape param1_shape = {{-1, 32, -1, -1}, {tensor_shape}}; + InputShape param2_shape = {{-1, -1, -1, -1}, {tensor_shape}}; + + bool use_init_subgraph = false; + CPUSpecificParams cpu_params; + std::tie(use_init_subgraph, cpu_params) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpu_params; + selectedType = makeSelectedTypeStr(selectedType, net_prc); + + init_input_shapes({param1_shape, param2_shape}); + + ov::ParameterVector params; + params.push_back(std::make_shared(net_prc, inputDynamicShapes[0])); + params.push_back(std::make_shared(net_prc, inputDynamicShapes[1])); + std::shared_ptr last_node = params.front(); + + if (use_init_subgraph) { + //build init subgraph + auto const1 = utils::make_constant(net_prc, tensor_shape); + auto const2 = utils::make_constant(net_prc, tensor_shape); + auto multiply = utils::make_eltwise(last_node, const1, utils::EltwiseTypes::MULTIPLY); + auto add = utils::make_eltwise(multiply, const2, utils::EltwiseTypes::ADD); + last_node = add; + } + + const std::string variable_name("variable0"); + auto variable = std::make_shared( + ov::op::util::VariableInfo{inputDynamicShapes[0], net_prc, variable_name}); + + auto read = std::make_shared(last_node, variable); + auto assign = std::make_shared(read, variable); + auto add = utils::make_eltwise(params[1], read, utils::EltwiseTypes::ADD); + + add->get_rt_info() = getCPUInfo(); + auto res = std::make_shared(add); + + function = + std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), params, "ReadValueAssign"); + } + +protected: + const ov::Shape tensor_shape = {3, 32, 7, 7}; + const ElementType net_prc = element::f32; +}; + +TEST_P(ReadValueAssignTest, CompareWithRefs) { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + + // use the Template plugin as a reference + + auto compiledReferenceModel = core->compile_model(function, ov::test::utils::DEVICE_TEMPLATE); + auto inferRequestRef = compiledReferenceModel.create_infer_request(); + ASSERT_TRUE(inferRequestRef); + + generate_inputs(targetStaticShapes.front()); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + inferRequestRef.set_tensor(input.first, input.second); + } + + constexpr int infer_count = 3lu; + + auto&& states = inferRequest.query_state(); + auto&& refStates = inferRequestRef.query_state(); + + for (int i = 0; i < infer_count; ++i) { + // set states + + if (i & 0x1) { + //reset every odd iteration + states.front().reset(); + refStates.front().reset(); + } else { + // generate and set state tensors every even iteration + using ov::test::utils::InputGenerateData; + + auto tensor = + ov::test::utils::create_and_fill_tensor(net_prc, tensor_shape, InputGenerateData{0, 10, 1, i}); + states.front().set_state(tensor); + refStates.front().set_state(tensor); + } + + inferRequest.infer(); + inferRequestRef.infer(); + auto outputs = function->outputs(); + + auto result = inferRequest.get_tensor(outputs[0]); + + auto result_ref = inferRequestRef.get_tensor(outputs[0]); + + ov::test::utils::compare(result, result_ref, 1e-4, 1e-4); + } + CheckNumberOfNodesWithTypes(compiledModel, {"MemoryOutput", "Assign"}, 0); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_ReadValue_Assign, + ReadValueAssignTest, + ::testing::Combine(::testing::Values(true, false), + ::testing::Values(CPUSpecificParams{{nchw, nchw}, {nchw}, {""}, "any_type"}, + CPUSpecificParams{{nhwc, nhwc}, {nhwc}, {""}, "any_type"})), + ReadValueAssignTest::getTestCaseName); +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/stft.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/stft.cpp index b5fce57fb22f0c..91505d131d6aa5 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/stft.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/stft.cpp @@ -16,6 +16,12 @@ const std::vector data_type = {ov::element::f32, ov::element: const std::vector step_size_type = {ov::element::i32, ov::element::i64}; const std::vector> input_shapes = { + { // Static shapes + {{}, {{128}}}, // 1st input + {{}, {{8}}}, // 2nd input + {{}, {{}}}, // 3rd input + {{}, {{}}} // 4th input + }, { // Static shapes {{}, {{1, 128}}}, // 1st input {{}, {{8}}}, // 2nd input @@ -34,6 +40,12 @@ const std::vector> input_shapes = { {{}, {{}}}, // 3rd input {{}, {{}}} // 4th input }, + { // Dynamic dims in the first and second input shape + {{-1}, {{128}}}, // 1st input + {{-1}, {{8}}}, // 2nd input + {{}, {{}}}, // 3rd input + {{}, {{}}} // 4th input + }, { // Dynamic dims in the first and second input shape {{-1, -1}, {{1, 128}, {2, 226}}}, // 1st input {{-1}, {{8}, {16}}}, // 2nd input diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index e7c006ab97427f..90820d550df179 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -237,7 +237,6 @@ std::vector disabledTestPatterns() { R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)", R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)", R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)", - R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)", R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)", R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)", R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)", @@ -358,8 +357,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_VariableState/OVInferRequestVariableStateTest.*)"); // Issue: 141705 retVector.emplace_back(R"(.*smoke_arm_Deconv_2D_Planar_FP16/DeconvolutionLayerCPUTest.*INFERENCE_PRECISION_HINT=f16.*)"); - // Issue: 154882 - retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)"); + retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*u8.*)"); #endif #if defined(OPENVINO_ARCH_ARM) @@ -539,6 +537,7 @@ std::vector disabledTestPatterns() { // Skip fp16 tests for paltforms that don't support fp16 precision retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); retVector.emplace_back(R"(.*Prc=f16.*)"); + retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*HasShapeOf=1.*)"); } else { // Issue 117407 retVector.emplace_back( @@ -563,7 +562,7 @@ std::vector disabledTestPatterns() { // ignored for not supported bf16 platforms retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)"); retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)"); retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)"); } // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems @@ -572,6 +571,11 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); + + retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)"); } #ifdef SNIPPETS_LIBXSMM_TPP // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index f5057137f9b65c..176f0cb4d46aed 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -4,44 +4,26 @@ #include "snippets/matmul.hpp" -#include "common_test_utils/test_constants.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { -static inline std::vector> quantized_precisions() { - std::vector> prc = {}; - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - return prc; -} - static inline std::vector> precisions() { - std::vector> prc = { - {element::f32, element::f32}, - }; + std::vector> prc = precision_f32(2); // Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP - auto quant = quantized_precisions(); + auto quant = quantized_precisions_if_supported(); std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); #endif return prc; } - std::vector> input_shapes{ { {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} }, { {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} }, @@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized, ::testing::Combine( ::testing::ValuesIn(input_shapes_bias), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph ::testing::Values(1), // Tokenized MatMul+Bias @@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] @@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp index 79db0b1546b2a8..63f5176684ccc1 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp @@ -1,60 +1,70 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "snippets/mha.hpp" -#include "common_test_utils/test_constants.hpp" -#include "internal_properties.hpp" -#include "utils/cpu_test_utils.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) namespace { -const auto& inputShapes_4D = STATIC_SHAPES( - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, - {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, - {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); - -const auto& inputShapes_3D = STATIC_SHAPES( - {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, - {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, - {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); - -static inline bool is_bf16_supported() { - return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); -} - -static inline std::vector> precision_f32(size_t count) { - std::vector> prc; - prc.emplace_back(std::vector(count, element::f32)); - return prc; -} - -static inline std::vector> precision_bf16(size_t count) { - std::vector> prc; - if (is_bf16_supported()) - prc.emplace_back(std::vector(count, element::bf16)); - return prc; +std::vector> transposedShape_4D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, + {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, + {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); + if (with_dynamic) { + std::vector> dynamic_shapes = {{ + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {2, 2, 16, 128}, {2, 1, 128, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + }}; + shapes.insert(shapes.end(), dynamic_shapes.begin(), dynamic_shapes.end()); + } + return shapes; } -static ov::AnyMap enable_callback() { - return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +std::vector> transposedShape_3D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, + {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); + if (with_dynamic) { + shapes.push_back({ + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {68, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{128, 1, 64}, {128, 1, 64}, {13, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{1, 128, 128}, {1, 128, 128}, {1, 68, 13}}}, + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {13, 6, 87}}}, + }); + } + return shapes; } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -62,27 +72,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -std::vector> inputShapes_4D_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D_WithScalarMul, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_dynamic), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), + ::testing::Values(true), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -90,13 +85,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_3D), + ::testing::Combine(::testing::ValuesIn(transposedShape_3D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // [122706]: Subgraph + 4 Transpose ::testing::Values(2), // decomposed Transpose + MHA @@ -104,111 +98,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -const auto& splitm_static_shapes = STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_static, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_static_shapes), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_static, - MHA, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes - ::testing::Values(1), // MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_4d = { - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - }, - { - {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_3d = { - { - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - }, - { - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(5), // Subgraph + 4 Transpose - ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D_WithScalarMul, + MHA, + ::testing::Combine(::testing::ValuesIn(transposedShape_3D(false)), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // [122706]: Subgraph + 4 Transpose + ::testing::Values(2), // decomposed Transpose + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::ValuesIn(precision_bf16(4)), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), + ::testing::ValuesIn(precision_bf16_if_supported(4)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false, true}), ::testing::Values(MHA::default_thread_count), @@ -220,7 +126,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({false}), @@ -231,321 +137,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), MHA::getTestCaseName); -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAMulAdd, - MHAMulAdd, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapeSelect = STATIC_SHAPES( - // without broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, - // with broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, - {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} -); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA, - MHASelect, - ::testing::Combine(::testing::ValuesIn(inputShapeSelect), - ::testing::ValuesIn(precision_f32(6)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(2), // Less + MHA - ::testing::Values(2), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesWOTranspose_4D = STATIC_SHAPES( - {{1, 12, 197, 64}, {1, 12, 64, 197}, {1, 12, 197, 64}}, - {{1, 12, 12, 64}, {1, 12, 64, 48}, {1, 12, 48, 64}}); -const auto& inputShapesWOTranspose_3D = STATIC_SHAPES( - {{12, 197, 64}, {12, 64, 197}, {12, 197, 64}}, - {{12, 128, 100}, {12, 100, 128}, {12, 128, 100}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeOnInputs_4D, - MHAWOTransposeOnInputs, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(true), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesWOTranspose_3D_dynamic{ - { - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - }, - { - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 2}, {2, 64, 9}}}, - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - }, -}; - - - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_DynMHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D_dynamic), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAINT8MatMul, - MHAINT8MatMul, - ::testing::Combine(::testing::ValuesIn(std::vector>(inputShapes_4D.begin(), - inputShapes_4D.begin() + 2)), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul - ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAQuantMatMul0, - MHAQuantMatMul0, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 128, 768}, {1, 128, 768}, {1, 1, 1, 128}, {1, 128, 768}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(9), // FQx2 on inputs + MHA + Transpose on output + 4 Reshapes + Deq Mul - ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul_4D, - MHAFQAfterMatMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // MHA + Transpose on output + Deq Mul - ::testing::Values(2), // MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAFQ, - MHAFQ, - ::testing::Combine(::testing::ValuesIn(STATIC_SHAPES({{1, 64, 12, 64}, - {1, 64, 12, 64}, - {1, 1, 1, 64}, - {1, 64, 12, 64}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(7), // Transposex2 + Subgraphsx5 - ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesTransposedB { - { - {{}, {{1, 12, 12, 64}}}, - {{}, {{1, 12, 48, 64}}}, - {{}, {{1, 12, 48, 64}}} - }, - { - {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, - {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, - {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, - }, - { - {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHATransposedB, - MHATransposedB, - ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesExtractedReshape = STATIC_SHAPES( - {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWithExtractedReshape, - MHAWithExtractedReshape, - ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // False is not supported for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA - ::testing::Values(2), // Extracted Add + MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapes_4D_WithMul_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{1}, {{1}, {1}, {1}, {1} }}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D_WithMul, - MHAWithDynamicMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_WithMul_dynamic), - ::testing::ValuesIn(precision_f32(5)), - ::testing::Values(ov::element::f32), - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHAWithDynamicMul::getTestCaseName); - } // namespace } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp new file mode 100644 index 00000000000000..f3c1439395650a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapesExtractedReshape = SNIPPETS_TESTS_STATIC_SHAPES( + {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWithExtractedReshape, + MHAWithExtractedReshape, + ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // False is not supported for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA + ::testing::Values(2), // Extracted Add + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp new file mode 100644 index 00000000000000..4bf35e2daa690d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAMulAdd, + MHAMulAdd, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({false}), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp new file mode 100644 index 00000000000000..0c731b74565863 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesQuantized { + { + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 16, 1, 1}}}, + {{}, {{1, 128, 16, 64}}} + }, + { + {{}, {{2, 68, 6, 92}}}, + {{}, {{2, 68, 6, 92}}}, + {{}, {{1, 1, 68, 68}}}, + {{}, {{2, 68, 6, 92}}} + }, + // K, N are static + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 1, 128}, {2, 1, 128, 128}, {1, 12, 1, 1}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAINT8MatMul, + MHAINT8MatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAQuantMatMul0, + MHAQuantMatMul0, + ::testing::Combine( + ::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // FQx2 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQAfterMatMul_4D, + MHAFQAfterMatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // MHA + Transpose on output + Deq Mul + ::testing::Values(2), // MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQ, + MHAFQ, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(7), // Transposex2 + Subgraphsx5 + ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp new file mode 100644 index 00000000000000..3fc1417d20b102 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapeSelect = SNIPPETS_TESTS_STATIC_SHAPES( + // without broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, + // with broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, + {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA, + MHASelect, + ::testing::Combine(::testing::ValuesIn(inputShapeSelect), + ::testing::ValuesIn(precision_f32(6)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(2), // Less + MHA + ::testing::Values(2), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp new file mode 100644 index 00000000000000..bb5f7fe2fa5b52 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +static ov::AnyMap enable_callback() { + return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_static, + MHA, + ::testing::Combine(::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_static, + MHA, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes + ::testing::Values(1), // MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_4d = { + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + }, + { + {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_3d = { + { + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + }, + { + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(5), // Subgraph + 4 Transpose + ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp new file mode 100644 index 00000000000000..45260df3cab280 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesTransposedB { + { + {{}, {{1, 12, 12, 64}}}, + {{}, {{1, 12, 48, 64}}}, + {{}, {{1, 12, 48, 64}}} + }, + { + {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, + {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, + {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, + }, + { + {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHATransposedB, + MHATransposedB, + ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp new file mode 100644 index 00000000000000..7876d737af2281 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> transposedShape_4D_WithMul { + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + {PartialShape{-1, -1, 100, 200}, {{1, 4, 100, 200}, {2, 2, 100, 200}, {1, 4, 100, 200}}}, + {PartialShape{-1, -1, -1, 200}, {{1, 4, 64, 200}, {2, 2, 16, 200}, {1, 4, 72, 200}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, + {PartialShape{1}, {{1}, {1}, {1}, {1} }}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::f32), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul_EnforceBF16, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::bf16), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(8), // MHA + 1 Transpose on output + 6 Converts around + ::testing::Values(7), // MHA + 6 Converts around + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp new file mode 100644 index 00000000000000..0967ef27087674 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp @@ -0,0 +1,151 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> originalShape_4D { + { {{}, {{1, 12, 197, 64}}}, {{}, {{1, 12, 64, 197}}}, {{}, {{1, 12, 197, 64}}} }, + { {{}, {{1, 12, 12, 64}}}, {{}, {{1, 12, 64, 48}}}, {{}, {{1, 12, 48, 64}}} }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {1, 12, 100, 197}, {1, 3, 64, 128}, {1, 12, 600, 197}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + }, + { + {PartialShape{1, 4, -1, -1}, {{1, 4, 384, 64}, {1, 4, 197, 64}, {1, 4, 384, 560}}}, + {PartialShape{1, 4, -1, -1}, {{1, 4, 64, 128}, {1, 4, 64, 197}, {1, 4, 560, 384}}}, + {PartialShape{1, 4, -1, 64}, {{1, 4, 128, 64}, {1, 4, 197, 64}, {1, 4, 384, 64}}}, + } +}; + +std::vector> originalShape_3D { + { {{}, {{12, 197, 64}}}, {{}, {{12, 64, 197}}}, {{}, {{12, 197, 64}}} }, + { {{}, {{12, 128, 100}}}, {{}, {{12, 100, 128}}}, {{}, {{12, 128, 100}}} }, + { + {PartialShape{-1, -1, 64}, {{2, 9, 64}, {1, 64, 64}, {2, 64, 64}}}, + {PartialShape{-1, 64, 124}, {{2, 64, 124}, {1, 64, 124}, {2, 64, 124}}}, + {PartialShape{-1, 124, 64}, {{2, 124, 64}, {1, 124, 64}, {2, 124, 64}}}, + }, + { + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + }, + { + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 4}, {2, 64, 9}}}, + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeOnInputs_4D, + MHAWOTransposeOnInputs, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp index c05087283305e4..ea7de9ccb209ad 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp @@ -6,36 +6,28 @@ #include "common_test_utils/test_constants.hpp" #include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { static inline std::vector> precisions(bool only_fp32 = true) { - std::vector> prc = { - {element::f32, element::f32}, - }; -// Note: low precisions are not supported by TPP yet (ticker: 130010) + std::vector> prc = precision_f32(2); +// Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP if (!only_fp32) { - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto quant = quantized_precisions_if_supported(); + std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); } #endif return prc; } namespace transpose_zero_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -84,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, } // namespace transpose_zero_input namespace transpose_first_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -126,7 +118,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, } // namespace transpose_first_input namespace transpose_output { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( @@ -195,7 +187,7 @@ static inline std::vector> precisions(bool only_fp32 } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), @@ -223,7 +215,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynExplicitTransposeMatMul, ExplicitTran INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp new file mode 100644 index 00000000000000..6c0d54da973086 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "internal_properties.hpp" +#include "utils/cpu_test_utils.hpp" +#include "openvino/runtime/system_conf.hpp" + +namespace ov { +namespace test { +namespace snippets { + +#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) + +static inline bool is_bf16_supported_by_brgemm() { + return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); +} + +static inline bool is_i8_supported_by_brgemm() { + return ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8(); +} + +static inline std::vector> precision_f32(size_t count) { + std::vector> prc; + prc.emplace_back(std::vector(count, element::f32)); + return prc; +} + +static inline std::vector> precision_bf16_if_supported(size_t count) { + std::vector> prc; + if (is_bf16_supported_by_brgemm()) + prc.emplace_back(std::vector(count, element::bf16)); + return prc; +} + +static inline std::vector> quantized_precisions_if_supported() { + std::vector> prc = {}; + // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms + if (is_i8_supported_by_brgemm()) { + prc.emplace_back(std::vector{element::i8, element::i8}); + prc.emplace_back(std::vector{element::u8, element::i8}); + } + return prc; +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp index 067477e146f509..a8b4160f405fd1 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp @@ -9,184 +9,6 @@ using namespace ov; using namespace ov::intel_cpu; -class LSTMSequenceV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest { -protected: - void SetUp() override { - this->output_shapes = ShapeVector(3); - } -}; - -TEST_F(LSTMSequenceV0StaticShapeInferenceTest, default_ctor) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t seq_len = 4; - constexpr size_t num_directions = 1; - constexpr size_t gates_count = 4; - - const auto op = make_op(); - - input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size, num_directions, hidden_size}, // C_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}, // B - StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P - - output_shapes = shape_inference(op.get(), input_shapes); - - EXPECT_EQ(output_shapes.size(), 3); - EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); - EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); -} - -TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_without_P) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t seq_len = 4; - constexpr size_t num_directions = 1; - constexpr size_t gates_count = 4; - - constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); - - const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); - - input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size, num_directions, hidden_size}, // C_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - output_shapes = shape_inference(op.get(), input_shapes); - - EXPECT_EQ(output_shapes.size(), 3); - EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); - EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); -} - -TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_with_P) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t seq_len = 4; - constexpr size_t num_directions = 1; - constexpr size_t gates_count = 4; - - constexpr auto direction = op::RecurrentSequenceDirection::FORWARD; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); - const auto P = std::make_shared(element::f32, PartialShape::dynamic(2)); - - const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, P, hidden_size, direction); - - input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size, num_directions, hidden_size}, // C_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}, // B - StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P - - output_shapes = shape_inference(op.get(), input_shapes); - - EXPECT_EQ(output_shapes.size(), 3); - EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); - EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); -} - -TEST_F(LSTMSequenceV0StaticShapeInferenceTest, REVERSE) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t seq_len = 4; - constexpr size_t num_directions = 1; - constexpr size_t gates_count = 4; - - constexpr auto direction = op::RecurrentSequenceDirection::REVERSE; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); - - const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); - - input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size, num_directions, hidden_size}, // C_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - output_shapes = shape_inference(op.get(), input_shapes); - EXPECT_EQ(output_shapes.size(), 3); - EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); - EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); -} - -TEST_F(LSTMSequenceV0StaticShapeInferenceTest, BIDIRECTIONAL) { - constexpr size_t batch_size = 2; - constexpr size_t input_size = 3; - constexpr size_t hidden_size = 5; - constexpr size_t seq_len = 4; - constexpr size_t num_directions = 2; - constexpr size_t gates_count = 4; - - constexpr auto direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; - - const auto X = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto H_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto C_t = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto seq_lengths = std::make_shared(element::f32, PartialShape::dynamic(1)); - const auto W = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto R = std::make_shared(element::f32, PartialShape::dynamic(3)); - const auto B = std::make_shared(element::f32, PartialShape::dynamic(2)); - - const auto op = make_op(X, H_t, C_t, seq_lengths, W, R, B, hidden_size, direction); - - input_shapes = {StaticShape{batch_size, seq_len, input_size}, // X - StaticShape{batch_size, num_directions, hidden_size}, // H_t - StaticShape{batch_size, num_directions, hidden_size}, // C_t - StaticShape{batch_size}, // seq_lengths - StaticShape{num_directions, gates_count * hidden_size, input_size}, // W - StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R - StaticShape{num_directions, gates_count * hidden_size}}; // B - - output_shapes = shape_inference(op.get(), input_shapes); - EXPECT_EQ(output_shapes.size(), 3); - EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); - EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); - EXPECT_EQ(output_shapes[2], StaticShape({batch_size, num_directions, hidden_size})); -} - class LSTMSequenceV5StaticShapeInferenceTest : public OpStaticShapeInferenceTest { protected: void SetUp() override { diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp index 69da74b10a2f45..5f790135780013 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp @@ -16,14 +16,16 @@ using namespace ov; using namespace ov::intel_cpu; using namespace testing; -class SqueezeStaticShapeInferenceAssertTest : public OpStaticShapeInferenceTest { +namespace v0 { + +class SqueezeV0StaticShapeInferenceAssertTest : public OpStaticShapeInferenceTest { protected: void SetUp() override { output_shapes = ShapeVector(1); } }; -TEST_F(SqueezeStaticShapeInferenceAssertTest, no_axes) { +TEST_F(SqueezeV0StaticShapeInferenceAssertTest, no_axes) { const auto arg = std::make_shared(element::f64, PartialShape{-1, -1}); const auto axes = std::make_shared(element::i64, PartialShape{1}); const auto op = make_op(arg, axes); @@ -35,7 +37,7 @@ TEST_F(SqueezeStaticShapeInferenceAssertTest, no_axes) { HasSubstr("Check 'constant != nullptr'")); } -TEST_F(SqueezeStaticShapeInferenceAssertTest, parameter_static_shape_axes_no_data) { +TEST_F(SqueezeV0StaticShapeInferenceAssertTest, parameter_static_shape_axes_no_data) { const auto arg = std::make_shared(element::f64, Shape{2, 1, 3, 1}); const auto axes = std::make_shared(element::i64, Shape{2}); const auto op = make_op(arg, axes); @@ -52,11 +54,11 @@ using TestParams = std::tuple; -class SqueezeStaticShapeInferenceTest : public SqueezeStaticShapeInferenceAssertTest, +class SqueezeV0StaticShapeInferenceTest : public SqueezeV0StaticShapeInferenceAssertTest, public WithParamInterface { protected: void SetUp() override { - SqueezeStaticShapeInferenceAssertTest::SetUp(); + SqueezeV0StaticShapeInferenceAssertTest::SetUp(); std::tie(input_shapes, axes, exp_shape) = GetParam(); output_shapes = ShapeVector(1); @@ -68,7 +70,7 @@ class SqueezeStaticShapeInferenceTest : public SqueezeStaticShapeInferenceAssert }; INSTANTIATE_TEST_SUITE_P(1d_shapes, - SqueezeStaticShapeInferenceTest, + SqueezeV0StaticShapeInferenceTest, Values(make_tuple(ShapeVector{{1}, {1}}, std::vector{-1}, StaticShape({})), make_tuple(ShapeVector{{6}, {1}}, std::vector{-1}, StaticShape({6})), make_tuple(ShapeVector{{1}, {1}}, std::vector{0}, StaticShape({}))), @@ -76,7 +78,7 @@ INSTANTIATE_TEST_SUITE_P(1d_shapes, INSTANTIATE_TEST_SUITE_P( multi_dim_shapes, - SqueezeStaticShapeInferenceTest, + SqueezeV0StaticShapeInferenceTest, Values(make_tuple(ShapeVector{{1, 2, 3, 1}, {2}}, std::vector{0, 3}, StaticShape({2, 3})), make_tuple(ShapeVector{{2, 1, 1, 4}, {2}}, std::vector{2, 1}, StaticShape({2, 4})), make_tuple(ShapeVector{{2, 1, 1, 4, 1}, {2}}, std::vector{0, 1, -2, -1}, StaticShape({2, 1, 4})), @@ -92,7 +94,7 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( multi_dim_shapes_repeated_axis, - SqueezeStaticShapeInferenceTest, + SqueezeV0StaticShapeInferenceTest, Values(make_tuple(ShapeVector{{2, 1, 3}, {2}}, std::vector{1, 1}, StaticShape({2, 3})), make_tuple(ShapeVector{{3, 1, 2, 1}, {3}}, std::vector{1, -1, 1}, StaticShape({3, 2})), make_tuple(ShapeVector{{3, 1, 2, 1}, {3}}, std::vector{1, -1, 1, -1}, StaticShape({3, 2})), @@ -100,7 +102,7 @@ INSTANTIATE_TEST_SUITE_P( make_tuple(ShapeVector{{2, 6, 7, 8, 1}, {2}}, std::vector{-1, -1}, StaticShape({2, 6, 7, 8}))), PrintToStringParamName()); -TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_empty_const_map) { +TEST_P(SqueezeV0StaticShapeInferenceTest, shape_inference_empty_const_map) { const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); const auto op = make_op(arg, axes_node); @@ -109,8 +111,8 @@ TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_empty_const_map) { ASSERT_EQ(output_shapes.front(), exp_shape); } -TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_with_const_map) { - const auto axes_node = std::make_shared(element::i64, Shape{1}); +TEST_P(SqueezeV0StaticShapeInferenceTest, shape_inference_with_const_map) { + const auto axes_node = std::make_shared(element::i64, ov::PartialShape::dynamic()); const auto op = make_op(arg, axes_node); const auto axes_tensor = axes.empty() ? ov::Tensor(element::i64, ov::Shape{axes.size()}) @@ -121,3 +123,115 @@ TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_with_const_map) { ASSERT_EQ(output_shapes.front(), exp_shape); } + +} // namespace v0 + +namespace v15 { + +class SqueezeV15StaticShapeInferenceAssertTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + output_shapes = ShapeVector(1); + } +}; + +TEST_F(SqueezeV15StaticShapeInferenceAssertTest, no_axes) { + const auto arg = std::make_shared(element::f64, PartialShape{-1, -1}); + const auto axes = std::make_shared(element::i64, PartialShape{1}); + const auto op = make_op(arg, axes); + + input_shapes = ShapeVector{{5, 6}, axes->get_shape()}; + + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), + NodeValidationFailure, + HasSubstr("Check 'constant != nullptr'")); +} + +TEST_F(SqueezeV15StaticShapeInferenceAssertTest, parameter_static_shape_axes_no_data) { + const auto arg = std::make_shared(element::f64, Shape{2, 1, 3, 1}); + const auto axes = std::make_shared(element::i64, Shape{2}); + const auto op = make_op(arg, axes); + + input_shapes = ShapeVector{arg->get_shape(), axes->get_shape()}; + + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), + NodeValidationFailure, + HasSubstr("Check 'constant != nullptr'")); +} + +using TestParams = std::tuple, // Squeeze axes + StaticShape // Expected shape + >; + +class SqueezeV15StaticShapeInferenceTest : public SqueezeV15StaticShapeInferenceAssertTest, + public WithParamInterface { +protected: + void SetUp() override { + SqueezeV15StaticShapeInferenceAssertTest::SetUp(); + std::tie(input_shapes, axes, exp_shape) = GetParam(); + + output_shapes = ShapeVector(1); + arg = std::make_shared(element::f32, input_shapes.front().get_shape()); + } + + std::vector axes; + std::shared_ptr arg; +}; + +INSTANTIATE_TEST_SUITE_P(1d_shapes, + SqueezeV15StaticShapeInferenceTest, + Values(make_tuple(ShapeVector{{1}, {1}}, std::vector{-1}, StaticShape({})), + make_tuple(ShapeVector{{6}, {1}}, std::vector{-1}, StaticShape({6})), + make_tuple(ShapeVector{{1}, {1}}, std::vector{0}, StaticShape({}))), + PrintToStringParamName()); + +INSTANTIATE_TEST_SUITE_P( + multi_dim_shapes, + SqueezeV15StaticShapeInferenceTest, + Values(make_tuple(ShapeVector{{1, 2, 3, 1}, {2}}, std::vector{0, 3}, StaticShape({2, 3})), + make_tuple(ShapeVector{{2, 1, 1, 4}, {2}}, std::vector{2, 1}, StaticShape({2, 4})), + make_tuple(ShapeVector{{2, 1, 1, 4, 1}, {2}}, std::vector{0, 1, -2, -1}, StaticShape({2, 1, 4})), + make_tuple(ShapeVector{{1, 3, 1, 2, 1}, {3}}, std::vector{0, 2, 4}, StaticShape({3, 2})), + make_tuple(ShapeVector{{1, 3, 1, 2, 1}, {3}}, std::vector{4, 2, 0}, StaticShape({3, 2})), + make_tuple(ShapeVector{{1, 3, 1, 2, 1}, {3}}, std::vector{2, 0, 4}, StaticShape({3, 2})), + make_tuple(ShapeVector{{10, 1, 0, 1, 3, 1, 1}, {4}}, + std::vector{1, -1, 3, -2}, + StaticShape({10, 0, 3})), + make_tuple(ShapeVector{{10, 1, 0, 1, 3, 1, 1}, {}}, std::vector{}, StaticShape({10, 0, 3})), + make_tuple(ShapeVector{{2, 1, 7, 8, 3}, {1}}, std::vector{1}, StaticShape({2, 7, 8, 3}))), + PrintToStringParamName()); + +INSTANTIATE_TEST_SUITE_P( + multi_dim_shapes_repeated_axis, + SqueezeV15StaticShapeInferenceTest, + Values(make_tuple(ShapeVector{{2, 1, 3}, {2}}, std::vector{1, 1}, StaticShape({2, 3})), + make_tuple(ShapeVector{{3, 1, 2, 1}, {3}}, std::vector{1, -1, 1}, StaticShape({3, 2})), + make_tuple(ShapeVector{{3, 1, 2, 1}, {3}}, std::vector{1, -1, 1, -1}, StaticShape({3, 2})), + make_tuple(ShapeVector{{1, 3, 1, 2, 1}, {3}}, std::vector{2, -1, 2, -1, 0}, StaticShape({3, 2})), + make_tuple(ShapeVector{{2, 6, 7, 8, 1}, {2}}, std::vector{-1, -1}, StaticShape({2, 6, 7, 8}))), + PrintToStringParamName()); + +TEST_P(SqueezeV15StaticShapeInferenceTest, shape_inference_empty_const_map) { + const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); + const auto op = make_op(arg, axes_node); + + output_shapes = shape_inference(op.get(), input_shapes); + + ASSERT_EQ(output_shapes.front(), exp_shape); +} + +TEST_P(SqueezeV15StaticShapeInferenceTest, shape_inference_with_const_map) { + const auto axes_node = std::make_shared(element::i64, ov::PartialShape::dynamic()); + const auto op = make_op(arg, axes_node); + + const auto axes_tensor = axes.empty() ? ov::Tensor(element::i64, ov::Shape{axes.size()}) + : ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const auto constant_data = std::unordered_map{{1, axes_tensor}}; + + output_shapes = shape_inference(op.get(), input_shapes, constant_data); + + ASSERT_EQ(output_shapes.front(), exp_shape); +} + +} // namespace v15 diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/stft_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/stft_shape_inference_test.cpp index 4c2ec30d16ee95..9d2960e31b71f2 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/stft_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/stft_shape_inference_test.cpp @@ -17,6 +17,26 @@ using testing::HasSubstr; class STFTShapeInferenceTest : public OpStaticShapeInferenceTest {}; +TEST_F(STFTShapeInferenceTest, all_input_as_params_1D_signal) { + const auto data_type = element::f32; + const auto step_size_type = element::i32; + const auto in_signal = std::make_shared(data_type, ov::PartialShape{-1}); + const auto in_window = std::make_shared(data_type, ov::PartialShape{-1}); + const auto in_frame_size = std::make_shared(step_size_type, ov::Shape{}); + const auto in_frame_step = std::make_shared(step_size_type, ov::Shape{}); + const auto op = make_op(in_signal, in_window, in_frame_size, in_frame_step, true); + + std::vector static_input_shapes = {StaticShape{48}, StaticShape{16}, StaticShape{}, StaticShape{}}; + int32_t frame_size = 16; + int32_t frame_step = 16; + + auto const_data = std::unordered_map{{2, {element::i32, Shape{}, &frame_size}}, + {3, {element::i32, Shape{}, &frame_step}}}; + auto acc = make_tensor_accessor(const_data); + auto static_output_shapes = shape_infer(op.get(), static_input_shapes, acc); + ASSERT_EQ(static_output_shapes[0], StaticShape({9, 3, 2})); +} + TEST_F(STFTShapeInferenceTest, all_input_as_params) { const auto data_type = element::f32; const auto step_size_type = element::i32; diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 1ce2d722922efb..c60a9946aa2386 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 1ce2d722922efb80da52a6efe2152a9aecdddebf +Subproject commit c60a9946aa2386890e5c9f5587974facb7624227 diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp index 3e8887fbb2f7ee..72623f6d120955 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp @@ -53,7 +53,7 @@ struct kernel_impl_params final { optional_layout weights_zero_points_layout = optional_layout(); optional_layout activations_zero_points_layout = optional_layout(); optional_layout compensation_layout = optional_layout(); - optional_layout state_layout = optional_layout(); + std::vector state_layouts; std::map memory_deps = {}; size_t primary_input_idx = 0; diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/indirect_sdpa.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/indirect_sdpa.hpp index b4d34a3975af6b..7c45c93c7e74f1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/indirect_sdpa.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/indirect_sdpa.hpp @@ -29,6 +29,17 @@ class IndirectSDPA : public ov::intel_gpu::op::SDPA { const std::vector& order_out, const ov::element::Type output_type = ov::element::undefined); + IndirectSDPA(const OutputVector& data_inputs, + const ov::Output& beam_table, + const bool is_causal, + const int64_t indirect_axis, + const std::vector& order_q, + const std::vector& order_k, + const std::vector& order_v, + const std::vector& order_out, + const QuantizationAttribute& quantization_attribute, + const ov::element::Type output_type = ov::element::undefined); + bool visit_attributes(ov::AttributeVisitor &visitor) override; void validate_and_infer_types() override; diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache.hpp index 402ff6e46c1607..7048d5229f25db 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache.hpp @@ -7,6 +7,7 @@ #include "openvino/op/op.hpp" #include "openvino/op/util/variable.hpp" #include "openvino/op/util/variable_extension.hpp" +#include "ov_ops/dynamic_quantize.hpp" namespace ov { namespace intel_gpu { @@ -22,16 +23,16 @@ class KVCache : public ov::op::Op, public ov::op::util::VariableExtension { KVCache(const Output& past, const Output& new_token_data, - const Output& beam_idx, const std::shared_ptr& past_values, int64_t concat_axis, - int64_t gather_axis, const ov::element::Type output_type = ov::element::undefined); KVCache(const Output& past, const Output& new_token_data, + const Output& beam_idx, const std::shared_ptr& past_values, int64_t concat_axis, + int64_t gather_axis, const ov::element::Type output_type = ov::element::undefined); bool visit_attributes(ov::AttributeVisitor& visitor) override; @@ -53,14 +54,22 @@ class KVCache : public ov::op::Op, public ov::op::util::VariableExtension { bool get_indirect() const { return m_indirect; } -private: +protected: + KVCache(const OutputVector& inputs, + const std::shared_ptr& past_values, + bool indirect, + int64_t concat_axis, + int64_t gather_axis, + const ov::element::Type output_type = ov::element::undefined); + int64_t m_concat_axis = 0; int64_t m_gather_axis = 0; bool m_indirect = false; + ov::element::Type m_output_type; }; -std::vector shape_infer(const KVCache* op, std::vector input_shapes); +std::vector shape_infer(const KVCache* op, const std::vector& input_shapes); } // namespace op } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache_compressed.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache_compressed.hpp new file mode 100644 index 00000000000000..4ee8cb388b61ea --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/kv_cache_compressed.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/op/kv_cache.hpp" +#include "ov_ops/dynamic_quantize.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +/// \brief Operator that implements Key-Values cache subgraph for large language models. +/// This operation updates data of the corresponding Variable +class KVCacheCompressed : public ov::intel_gpu::op::KVCache { +public: + OPENVINO_OP("KVCacheCompressed", "gpu_opset"); + + using QuantizationAttrs = ov::op::internal::DynamicQuantize::Attributes; + + KVCacheCompressed() = default; + + KVCacheCompressed(const OutputVector& inputs, + const std::shared_ptr& past_values, + int64_t concat_axis, + int64_t gather_axis, + const QuantizationAttrs& quantization_attrs, + const ov::element::Type output_type = ov::element::undefined); + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + bool get_kv_compressed() const { return m_compressed; } + bool get_combine_scales_and_zp() const { + return m_quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + m_quantization_attrs.output_storage_type != ov::op::internal::DynamicQuantize::OutputStorageType::Planar; + } + + QuantizationAttrs get_quantization_attrs() const { return m_quantization_attrs; } + void set_quantization_attrs(QuantizationAttrs attrs) { m_quantization_attrs = std::move(attrs); } + + std::vector get_scales_zp_output_order() const { return m_quantization_attrs.scales_zp_output_order; } + +private: + bool m_compressed; + QuantizationAttrs m_quantization_attrs = {}; +}; + +std::vector shape_infer(const KVCacheCompressed* op, + const std::vector& input_shapes); + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/read_value.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/read_value.hpp index a9c47f3fa39fa6..419c18118229ff 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/read_value.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/read_value.hpp @@ -26,6 +26,7 @@ class ReadValue : public ov::op::Op, public ov::op::util::VariableExtension { bool visit_attributes(ov::AttributeVisitor& visitor) override; void validate_and_infer_types() override; + void validate_and_infer_types(size_t output_idx, const ov::op::util::VariableInfo& variable_info); std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; @@ -33,6 +34,12 @@ class ReadValue : public ov::op::Op, public ov::op::util::VariableExtension { OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); return m_variable->get_info().variable_id; } + +protected: + ReadValue(const std::vector>& variable_initializers, const std::shared_ptr& variable) + : Op(variable_initializers) { + m_variable = variable; + } }; } // namespace op diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/read_values.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/read_values.hpp new file mode 100644 index 00000000000000..ecbda9364d7b3b --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/read_values.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/op/read_value.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +/// \brief This operation handles the OpenVINO GPU Plugin's custom variable +// representation (which can store multiple states in a single variable) at the graph level. +class ReadValues : public ReadValue { +public: + OPENVINO_OP("ReadValues", "gpu_opset"); + + ReadValues() = default; + + ReadValues(const std::shared_ptr& variable, + const std::vector& internal_states_infos); + + ReadValues(const OutputVector& variable_initializers, + const std::shared_ptr& variable, + const std::vector& internal_states_infos); + + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + void validate_and_infer_types() override; + + std::vector get_all_internal_states_info() const; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + +private: + std::vector m_internal_states_infos; +}; + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/sdpa.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/sdpa.hpp index 9f5d4dad16efd7..f7bc0d780ffd38 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/sdpa.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/sdpa.hpp @@ -8,6 +8,7 @@ #include "openvino/core/partial_shape.hpp" #include "openvino/op/op.hpp" #include "openvino/op/scaled_dot_product_attention.hpp" +#include "ov_ops/dynamic_quantize.hpp" namespace ov { namespace intel_gpu { @@ -17,6 +18,8 @@ class SDPA : public ov::op::v13::ScaledDotProductAttention { public: OPENVINO_OP("SDPA", "gpu_opset"); + using QuantizationAttribute = ov::op::internal::DynamicQuantize::Attributes; + SDPA() = default; SDPA(const OutputVector& inputs, @@ -27,6 +30,15 @@ class SDPA : public ov::op::v13::ScaledDotProductAttention { const std::vector& order_out, const ov::element::Type output_type = ov::element::undefined); + SDPA(const OutputVector& inputs, + const bool is_causal, + const std::vector& order_q, + const std::vector& order_k, + const std::vector& order_v, + const std::vector& order_out, + const QuantizationAttribute& quantization_attrs, + const ov::element::Type output_type = ov::element::undefined); + bool visit_attributes(ov::AttributeVisitor &visitor) override; void validate_and_infer_types() override; @@ -41,6 +53,10 @@ class SDPA : public ov::op::v13::ScaledDotProductAttention { std::vector get_output_transpose_order() const { return m_order_out; } ov::element::Type get_output_type() const { return m_output_type; } + bool get_kv_compressed() const { return m_compressed; } + QuantizationAttribute get_quantization_attrs() const { return m_quantization_attrs; } + size_t get_compression_inputs_num() const; + static std::vector default_order(size_t rank) { std::vector order(rank); std::iota(order.begin(), order.end(), 0); @@ -54,6 +70,9 @@ class SDPA : public ov::op::v13::ScaledDotProductAttention { std::vector m_order_v; std::vector m_order_out; ov::element::Type m_output_type; + + bool m_compressed = false; + QuantizationAttribute m_quantization_attrs = {}; }; std::vector shape_infer(const SDPA* op, diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index 640cc82c2c8c2a..0d78b480b733b0 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -61,6 +61,8 @@ class CompiledModel : public ov::ICompiledModel { const std::vector>& get_graphs() const; std::shared_ptr get_graph(size_t n) const; + void release_memory() override; + private: RemoteContextImpl::Ptr m_context; ExecutionConfig m_config; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/multi_tensor_variable_state.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/multi_tensor_variable_state.hpp index 0cad36f62e47b9..26d15d733102ad 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/multi_tensor_variable_state.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/multi_tensor_variable_state.hpp @@ -46,5 +46,31 @@ class VariableStateIndirectKVCache : public MultiTensorState { size_t m_concat_axis = 0; }; +// This is multi-tensor state for Indirect KV-Cache + Gemm pattern +// Internally it stores KV Cache state + Beam Table state + compression scales state + (optional compression zero points) +class VariableStateIndirectKVCacheCompressed : public VariableStateIndirectKVCache { +public: + VariableStateIndirectKVCacheCompressed(const VariableStateInfo& info, + std::shared_ptr context, + std::shared_ptr shape_predictor, + const std::vector& output_layouts, + size_t beam_idx, + size_t concat_idx, + bool has_zp_state); + using Ptr = std::shared_ptr; + + void set_state(const ov::SoPtr& state) override; + ov::SoPtr get_state() const override; + + VariableState::Ptr get_compression_scale_state() const; + void set_compression_scale_layout(const cldnn::layout& new_layout); + + VariableState::Ptr get_compression_zp_state() const; + void set_compression_zp_layout(const cldnn::layout& new_layout); + bool has_zp_state() const; + +private: + bool m_has_zp_state = false; +}; } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 1dbd769444b1aa..27e5540a3786ab 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -283,7 +283,9 @@ REGISTER_FACTORY(internal, FullyConnectedCompressed); REGISTER_FACTORY(internal, RMS); REGISTER_FACTORY(internal, GatherCompressed); REGISTER_FACTORY(internal, KVCache); +REGISTER_FACTORY(internal, KVCacheCompressed); REGISTER_FACTORY(internal, ReadValue); +REGISTER_FACTORY(internal, ReadValues); REGISTER_FACTORY(internal, Gemm); REGISTER_FACTORY(internal, SwiGLU); REGISTER_FACTORY(internal, IndirectGemm); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp index d93e2f86eed144..79af223e32cdaa 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp @@ -5,6 +5,8 @@ #pragma once #include "primitive.hpp" +#include "ov_ops/dynamic_quantize.hpp" + namespace cldnn { /// @brief Dynamic Quantize primitive @@ -12,26 +14,39 @@ namespace cldnn { struct dynamic_quantize : public primitive_base { CLDNN_DECLARE_PRIMITIVE(dynamic_quantize); - dynamic_quantize() : primitive_base("", {}), group_size(0) {} + using Attributes = ov::op::internal::DynamicQuantize::Attributes; + + dynamic_quantize() : primitive_base("", {}) {} /// @brief Constructs dynamic_quantize primitive /// @param id This primitive id /// @param input Input primitive id - /// @param group_size Quantization group size + /// @param group_sizes Quantization group size /// @param data_type Output data type of quantized /// @param output_size Output data size of the primitive dynamic_quantize(const primitive_id& id, const input_info& input, - const uint64_t group_size, - const std::vector data_types = {optional_data_type(data_types::f16), optional_data_type(data_types::i8)}) - : primitive_base(id, {input}, 2, data_types), - group_size(group_size) {} + const Attributes& attrs) + : primitive_base(id, {input}) + , attrs(attrs) { + num_outputs = 2; + if (attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + num_outputs++; + } - uint64_t group_size = 0; + Attributes attrs; size_t hash() const override { size_t seed = primitive::hash(); - seed = hash_combine(seed, group_size); + seed = hash_range(seed, attrs.scales_zp_output_order.begin(), attrs.scales_zp_output_order.end()); + seed = hash_range(seed, attrs.group_sizes.begin(), attrs.group_sizes.end()); + seed = hash_combine(seed, attrs.quantization_type); + seed = hash_combine(seed, attrs.quantization_dt.hash()); + seed = hash_combine(seed, attrs.scale_dt.hash()); + seed = hash_combine(seed, attrs.zp_dt.hash()); + seed = hash_combine(seed, attrs.output_storage_type); + return seed; } @@ -41,17 +56,37 @@ struct dynamic_quantize : public primitive_base { auto rhs_casted = downcast(rhs); - return group_size == rhs_casted.group_size; + return attrs.scales_zp_output_order == rhs_casted.attrs.scales_zp_output_order && + attrs.output_storage_type == rhs_casted.attrs.output_storage_type && + attrs.group_sizes == rhs_casted.attrs.group_sizes && + attrs.quantization_dt == rhs_casted.attrs.quantization_dt && + attrs.scale_dt == rhs_casted.attrs.scale_dt && + attrs.zp_dt == rhs_casted.attrs.zp_dt && + attrs.quantization_type == rhs_casted.attrs.quantization_type;; } void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << group_size; + + ob << make_data(&attrs.quantization_type, sizeof(attrs.quantization_type)); + ob << make_data(&attrs.quantization_dt, sizeof(attrs.quantization_dt)); + ob << make_data(&attrs.scale_dt, sizeof(attrs.scale_dt)); + ob << make_data(&attrs.zp_dt, sizeof(attrs.zp_dt)); + ob << make_data(&attrs.output_storage_type, sizeof(attrs.output_storage_type)); + ob << attrs.scales_zp_output_order; + ob << attrs.group_sizes; } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> group_size; + + ib >> make_data(&attrs.quantization_type, sizeof(attrs.quantization_type)); + ib >> make_data(&attrs.quantization_dt, sizeof(attrs.quantization_dt)); + ib >> make_data(&attrs.scale_dt, sizeof(attrs.scale_dt)); + ib >> make_data(&attrs.zp_dt, sizeof(attrs.zp_dt)); + ib >> make_data(&attrs.output_storage_type, sizeof(attrs.output_storage_type)); + ib >> attrs.scales_zp_output_order; + ib >> attrs.group_sizes; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/kv_cache.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/kv_cache.hpp index f87041dcff66d6..1c8f095752aca2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/kv_cache.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/kv_cache.hpp @@ -3,10 +3,14 @@ // #pragma once + +#include "primitive.hpp" + #include "openvino/core/partial_shape.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/op/util/variable.hpp" -#include "primitive.hpp" +#include "ov_ops/dynamic_quantize.hpp" + #include namespace cldnn { @@ -14,6 +18,8 @@ namespace cldnn { struct kv_cache : public primitive_base { CLDNN_DECLARE_PRIMITIVE(kv_cache) + using QuantizationAttributes = ov::op::internal::DynamicQuantize::Attributes; + kv_cache() : primitive_base("", {}) {} kv_cache(const primitive_id& id, @@ -33,11 +39,23 @@ struct kv_cache : public primitive_base { int64_t gather_axis = 0; bool indirect = false; + bool compressed = false; + QuantizationAttributes quantization_attributes; + size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, concat_axis); seed = hash_combine(seed, gather_axis); seed = hash_combine(seed, indirect); + seed = hash_combine(seed, compressed); + seed = hash_range(seed, quantization_attributes.scales_zp_output_order.begin(), quantization_attributes.scales_zp_output_order.end()); + seed = hash_range(seed, quantization_attributes.group_sizes.begin(), quantization_attributes.group_sizes.end()); + seed = hash_combine(seed, quantization_attributes.quantization_type); + seed = hash_combine(seed, quantization_attributes.quantization_dt.hash()); + seed = hash_combine(seed, quantization_attributes.scale_dt.hash()); + seed = hash_combine(seed, quantization_attributes.zp_dt.hash()); + seed = hash_combine(seed, quantization_attributes.output_storage_type);; + return seed; } @@ -50,7 +68,15 @@ struct kv_cache : public primitive_base { return variable_info == rhs_casted.variable_info && concat_axis == rhs_casted.concat_axis && gather_axis == rhs_casted.gather_axis && - indirect == rhs_casted.indirect; + indirect == rhs_casted.indirect && + compressed == rhs_casted.compressed && + quantization_attributes.scales_zp_output_order == rhs_casted.quantization_attributes.scales_zp_output_order && + quantization_attributes.output_storage_type == rhs_casted.quantization_attributes.output_storage_type && + quantization_attributes.group_sizes == rhs_casted.quantization_attributes.group_sizes && + quantization_attributes.quantization_dt == rhs_casted.quantization_attributes.quantization_dt && + quantization_attributes.scale_dt == rhs_casted.quantization_attributes.scale_dt && + quantization_attributes.zp_dt == rhs_casted.quantization_attributes.zp_dt && + quantization_attributes.quantization_type == rhs_casted.quantization_attributes.quantization_type; } void save(BinaryOutputBuffer& ob) const override { @@ -62,6 +88,14 @@ struct kv_cache : public primitive_base { ob << concat_axis; ob << gather_axis; ob << indirect; + ob << compressed; + ob << make_data(&quantization_attributes.quantization_type, sizeof(quantization_attributes.quantization_type)); + ob << make_data(&quantization_attributes.quantization_dt, sizeof(quantization_attributes.quantization_dt)); + ob << make_data(&quantization_attributes.scale_dt, sizeof(quantization_attributes.scale_dt)); + ob << make_data(&quantization_attributes.zp_dt, sizeof(quantization_attributes.zp_dt)); + ob << make_data(&quantization_attributes.output_storage_type, sizeof(quantization_attributes.output_storage_type)); + ob << quantization_attributes.scales_zp_output_order; + ob << quantization_attributes.group_sizes; } void load(BinaryInputBuffer& ib) override { @@ -76,6 +110,32 @@ struct kv_cache : public primitive_base { ib >> concat_axis; ib >> gather_axis; ib >> indirect; + ib >> compressed; + ib >> make_data(&quantization_attributes.quantization_type, sizeof(quantization_attributes.quantization_type)); + ib >> make_data(&quantization_attributes.quantization_dt, sizeof(quantization_attributes.quantization_dt)); + ib >> make_data(&quantization_attributes.scale_dt, sizeof(quantization_attributes.scale_dt)); + ib >> make_data(&quantization_attributes.zp_dt, sizeof(quantization_attributes.zp_dt)); + ib >> make_data(&quantization_attributes.output_storage_type, sizeof(quantization_attributes.output_storage_type)); + ib >> quantization_attributes.scales_zp_output_order; + ib >> quantization_attributes.group_sizes; + } + + size_t get_compression_scales_inputs_num() const { + if (compressed) { + return 1; + } else { + return 0; + } + } + + size_t get_compression_zp_inputs_num() const { + if (compressed && + quantization_attributes.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attributes.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + return 1; + } else { + return 0; + } } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp index 7d9e919f56cf13..26465692ef6352 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp @@ -22,19 +22,23 @@ struct read_value : public primitive_base { /// @param id This primitive id /// @param inputs Input parameters ids /// @param variable_id Variable id - /// @param output_layout Memory layout + /// @param output_layouts Memory layouts read_value(const primitive_id& id, const std::vector& inputs, const std::string& variable_id, - const layout& output_layout, + const std::vector& output_layouts, const ov::element::Type& user_specified_type = ov::element::undefined) - : primitive_base(id, inputs, 1, {optional_data_type{output_layout.data_type}}), + : primitive_base(id, inputs, output_layouts.size()), variable_id{variable_id}, - output_layout{output_layout}, - user_specified_type(user_specified_type) {} + output_layouts{output_layouts}, + user_specified_type(user_specified_type) { + for (size_t output_idx = 0; output_idx < output_layouts.size(); output_idx++) { + output_data_types[output_idx] = optional_data_type(output_layouts[output_idx].data_type); + } + } std::string variable_id; - layout output_layout; + std::vector output_layouts; ov::element::Type user_specified_type; bool operator==(const primitive& rhs) const override { @@ -51,7 +55,9 @@ struct read_value : public primitive_base { primitive_base::save(ob); ov::element::Type_t data_type = user_specified_type; ob << variable_id; - ob << output_layout; + ob << output_layouts.size(); + for (const auto& layout : output_layouts) + ob << layout; ob << make_data(&data_type, sizeof(ov::element::Type_t)); } @@ -59,7 +65,12 @@ struct read_value : public primitive_base { primitive_base::load(ib); ov::element::Type_t data_type = ov::element::Type_t::undefined; ib >> variable_id; - ib >> output_layout; + size_t output_layouts_size; + ib >> output_layouts_size; + output_layouts.resize(output_layouts_size); + for (size_t i = 0; i < output_layouts_size; i++) { + ib >> output_layouts[i]; + } ib >> make_data(&data_type, sizeof(ov::element::Type_t)); user_specified_type = data_type; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/scaled_dot_product_attention.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/scaled_dot_product_attention.hpp index d66012bfac8889..77e1c5ae71099e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/scaled_dot_product_attention.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/scaled_dot_product_attention.hpp @@ -5,16 +5,20 @@ #pragma once #include "primitive.hpp" +#include "ov_ops/dynamic_quantize.hpp" + namespace cldnn { struct scaled_dot_product_attention : public primitive_base { CLDNN_DECLARE_PRIMITIVE(scaled_dot_product_attention) + using QuantizationAttributes = ov::op::internal::DynamicQuantize::Attributes; + scaled_dot_product_attention() : primitive_base("", {}) {} /// @brief Constructs scaled_dot_product_attention primitive. /// @param id This primitive id. - /// @param inputs Input data primitives id (query, keys, values, [attention_mask], [scale]). + /// @param inputs Input data primitives id (query, keys, values, [attention_mask], [scale], [keys scales], [keys zp], [values scales], [values zp]). /// @param is_causal If true, assumes causal attention masking. In this case attention_mask input is ignored. scaled_dot_product_attention(const primitive_id& id, const std::vector inputs, @@ -23,18 +27,29 @@ struct scaled_dot_product_attention : public primitive_base& input_q_transpose_order = {}, const std::vector& input_k_transpose_order = {}, const std::vector& input_v_transpose_order = {}, - const std::vector& output_transpose_order = {}) + const std::vector& output_transpose_order = {}, + const QuantizationAttributes& quantization_attributes = {}, + bool is_kv_compressed = false) : primitive_base(id, inputs) , is_causal(is_causal) , indirect_axis(indirect_axis) + , is_kv_compressed(is_kv_compressed) + , quantization_attributes(quantization_attributes) , input_q_transpose_order(input_q_transpose_order) , input_k_transpose_order(input_k_transpose_order) , input_v_transpose_order(input_v_transpose_order) , output_transpose_order(output_transpose_order) { auto data_inputs_num = inputs.size(); - if (indirect_axis != -1) + if (indirect_axis != -1) { data_inputs_num--; + } + if (is_kv_compressed) { + data_inputs_num -= 2; // scales + if (quantization_attributes.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attributes.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + data_inputs_num -= 2; // zp + } has_attn_mask_input = data_inputs_num > 3; has_scale_input = data_inputs_num > 4; } @@ -44,6 +59,9 @@ struct scaled_dot_product_attention : public primitive_base input_q_transpose_order; std::vector input_k_transpose_order; std::vector input_v_transpose_order; @@ -59,6 +77,15 @@ struct scaled_dot_product_attention : public primitive_base::save(ob); ob << is_causal; + ob << is_kv_compressed; ob << has_attn_mask_input; ob << has_scale_input; ob << indirect_axis; @@ -88,11 +124,19 @@ struct scaled_dot_product_attention : public primitive_base::load(ib); ib >> is_causal; + ib >> is_kv_compressed; ib >> has_attn_mask_input; ib >> has_scale_input; ib >> indirect_axis; @@ -100,6 +144,31 @@ struct scaled_dot_product_attention : public primitive_base> input_k_transpose_order; ib >> input_v_transpose_order; ib >> output_transpose_order; + ib >> make_data(&quantization_attributes.quantization_type, sizeof(quantization_attributes.quantization_type)); + ib >> make_data(&quantization_attributes.quantization_dt, sizeof(quantization_attributes.quantization_dt)); + ib >> make_data(&quantization_attributes.scale_dt, sizeof(quantization_attributes.scale_dt)); + ib >> make_data(&quantization_attributes.zp_dt, sizeof(quantization_attributes.zp_dt)); + ib >> make_data(&quantization_attributes.output_storage_type, sizeof(quantization_attributes.output_storage_type)); + ib >> quantization_attributes.scales_zp_output_order; + ib >> quantization_attributes.group_sizes; + } + + size_t get_compression_scales_inputs_num() const { + if (is_kv_compressed) { + return 2; + } else { + return 0; + } + } + + size_t get_compression_zp_inputs_num() const { + if (is_kv_compressed && + quantization_attributes.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attributes.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + return 2; + } else { + return 0; + } } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index c65aa3e5894cb8..465ed898ecb7ec 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -143,6 +143,7 @@ class debug_configuration { int disable_primitive_fusing; // Disable primitive fusing int disable_fake_alignment; // Disable fake alignment std::vector dynamic_quantize_layers_without_onednn; // Specify Fully-connected layers which enable Dynamic quantization + int use_kv_cache_compression; // Enable KV-cache compression int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size int disable_horizontal_fc_fusion; // Disable fc horizontal fusion std::set dump_iteration; // Dump n-th execution of network. diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 62e4c08a90f004..ab5cb53454b768 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -454,6 +454,21 @@ inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) { return os << p.to_string(); } +inline ::std::ostream& operator<<(::std::ostream& os, const std::vector& layouts) { + std::stringstream ss; + + ss << "["; + for (size_t i = 0; i < layouts.size(); i++) { + ss << layouts[i].to_short_string(); + + if (i + 1 != layouts.size()) + ss << ", "; + } + ss << "]"; + + return os << ss.str(); +} + using optional_data_type = optional_value; using optional_layout = optional_value; diff --git a/src/plugins/intel_gpu/src/graph/dynamic_quantize.cpp b/src/plugins/intel_gpu/src/graph/dynamic_quantize.cpp index 5c945f4c2d389c..8e4957d5f52797 100644 --- a/src/plugins/intel_gpu/src/graph/dynamic_quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/dynamic_quantize.cpp @@ -22,29 +22,39 @@ layout dynamic_quantize_inst::calc_output_layout(dynamic_quantize_node const& no } template -std::vector dynamic_quantize_inst::__calc_output_layouts(const layout &act_layout, uint64_t group_size) { +std::vector dynamic_quantize_inst::__calc_output_layouts(const layout &act_layout, + const dynamic_quantize::Attributes& attrs) { ov::op::internal::DynamicQuantize op; + op.set_attrs(attrs); + auto output_format = act_layout.format; std::vector input_shapes = { act_layout.get(), }; - std::vector shape_group_size(act_layout.get().size(), 1); - shape_group_size.back() = group_size; + auto output_shapes = ov::op::internal::DynamicQuantize::shape_infer(&op, input_shapes); + + std::vector output_layouts = { layout(output_shapes[0], attrs.quantization_dt, output_format), + layout(output_shapes[1], attrs.scale_dt, output_format) }; - auto output_shapes = ov::op::internal::DynamicQuantize::shape_infer(&op, input_shapes, shape_group_size); + if (attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + output_layouts.emplace_back(layout(output_shapes[2], attrs.zp_dt, output_format)); + } - return { layout(output_shapes[0], data_types::i8, output_format), layout(output_shapes[1], data_types::f16, output_format) }; + return output_layouts; } -template std::vector dynamic_quantize_inst::__calc_output_layouts(const layout &act_layout, uint64_t group_size); +template std::vector dynamic_quantize_inst::__calc_output_layouts(const layout &act_layout, + const dynamic_quantize::Attributes& config); template std::vector dynamic_quantize_inst::calc_output_layouts(dynamic_quantize_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); const auto& input_layout = impl_param.get_input_layout(); - return __calc_output_layouts(input_layout, UINT64_MAX /* TODO: handle group_size here */); + + return __calc_output_layouts(input_layout, desc->attrs); } template std::vector dynamic_quantize_inst::calc_output_layouts(dynamic_quantize_node const& node, @@ -56,6 +66,15 @@ std::string dynamic_quantize_inst::to_string(dynamic_quantize_node const& node) std::stringstream primitive_description; + json_composite dynamic_quantize_info; + dynamic_quantize_info.add("output_storage_type", static_cast(desc->attrs.output_storage_type)); + dynamic_quantize_info.add("scales_zp_output_order", desc->attrs.scales_zp_output_order); + dynamic_quantize_info.add("group_sizes", desc->attrs.group_sizes); + dynamic_quantize_info.add("quantization_dt", desc->attrs.quantization_dt); + dynamic_quantize_info.add("scale_dt", desc->attrs.scale_dt); + dynamic_quantize_info.add("zp_dt", desc->attrs.zp_dt); + dynamic_quantize_info.add("quantization_type", static_cast(desc->attrs.quantization_type)); + node_info->add("dynamic_quantize info", dynamic_quantize_info); node_info->dump(primitive_description); return primitive_description.str(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 7bdbc53ad54d16..e94714c84fdebf 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -423,10 +423,11 @@ bool crop_in_place_optimization::can_crop_be_optimized_simple_data_format(const } static bool can_read_value_be_optimize(const read_value_node& node) { - if (node.get_users().size() == 1) + std::unordered_set unique_users(node.get_users().begin(), node.get_users().end()); + if (unique_users.size() == 1) return true; - const auto non_shape_of_users_count = std::count_if(node.get_users().begin(), node.get_users().end(), [](const program_node* user) { + const auto non_shape_of_users_count = std::count_if(unique_users.begin(), unique_users.end(), [](const program_node* user) { return !user->is_type(); }); if (non_shape_of_users_count <= 1) @@ -659,23 +660,34 @@ void crop_in_place_optimization::update_in_place_crop_padding_simple_data_format if (user_info.first && user_info.first->is_type()) { auto reshape_desc = user_info.first->as().get_primitive(); auto reshape_mode = reshape_desc->mode; + auto reshape_axis = crop_axis; if (reshape_mode == reshape::reshape_mode::base) { - user_info.second.data_padding._dynamic_dims_mask = dyn_pad_sizes; + auto reshape_ps = user_info.second.get_partial_shape(); + auto crop_dim_val = crop_layout.get_partial_shape()[crop_axis].get_length(); + + auto mul = 1; + reshape_axis = reshape_ps.size() - 1; + for (size_t i = reshape_ps.size(); i > 1; i--) { + if (reshape_ps[i - 1].is_dynamic() || mul == crop_dim_val) + break; + + mul *= reshape_ps[i - 1].get_length(); + reshape_axis = i - 1; + } } else if (reshape_mode == reshape::reshape_mode::unsqueeze || reshape_mode == reshape::reshape_mode::squeeze) { auto reshape_ps = user_info.second.get_partial_shape(); auto output_pattern = reshape_desc->output_pattern; - auto reshape_axis = crop_axis; for (size_t i = 0; i < output_pattern.size(); i++) { if (output_pattern[i] <= static_cast(reshape_axis)) { reshape_axis += reshape_mode == reshape::reshape_mode::unsqueeze ? 1 : -1; } } - - padding::DynamicDimsMask dyn_pad_mask; - dyn_pad_mask[reshape_axis] = 1; - user_info.second.data_padding._dynamic_dims_mask = dyn_pad_mask; } + + auto reshape_dyn_pad_mask = padding::DynamicDimsMask(); + reshape_dyn_pad_mask[reshape_axis] = 1; + user_info.second.data_padding._dynamic_dims_mask = reshape_dyn_pad_mask; } return; } @@ -703,13 +715,36 @@ void crop_in_place_optimization::update_in_place_crop_padding_simple_data_format auto reshape_desc = user_info.first->as().get_primitive(); auto reshape_mode = reshape_desc->mode; if (reshape_mode == reshape::reshape_mode::base) { - auto reshape_rank = user_info.second.get_partial_shape().size(); - auto reshape_last_dim = user_info.second.get_partial_shape().to_shape()[reshape_rank - 1]; - if (lower_sizes[crop_axis]) - lower_sizes[crop_axis] /= reshape_last_dim; - if (upper_sizes[crop_axis]) - upper_sizes[crop_axis] /= reshape_last_dim; - user_info.second.data_padding = padding(lower_sizes, upper_sizes, dyn_pad_sizes); + auto reshape_ps = user_info.second.get_partial_shape(); + auto crop_dim_val = crop_layout.get_partial_shape()[crop_axis].get_length(); + + auto divider = 1; + auto reshape_axis = reshape_ps.size(); + for (size_t i = reshape_ps.size(); i > 1; i--) { + const auto& dim_value = reshape_ps[i - 1].get_length(); + if (divider * dim_value == crop_dim_val) + break; + + divider *= dim_value; + reshape_axis = i - 1; + } + reshape_axis -= 1; + + const auto output_rank = std::max(reshape_ps.size(), static_cast(4)); + std::vector reshape_lower_sizes(output_rank, 0); + std::vector reshape_upper_sizes(output_rank, 0); + padding::DynamicDimsMask reshape_dyn_pad_mask; + + reshape_lower_sizes[reshape_axis] = lower_sizes[crop_axis]; + reshape_upper_sizes[reshape_axis] = upper_sizes[crop_axis]; + reshape_dyn_pad_mask[reshape_axis] = 1; + + if (reshape_lower_sizes[reshape_axis]) + reshape_lower_sizes[reshape_axis] /= divider; + if (reshape_upper_sizes[reshape_axis]) + reshape_upper_sizes[reshape_axis] /= divider; + + user_info.second.data_padding = padding(reshape_lower_sizes, reshape_upper_sizes, reshape_dyn_pad_mask); } else { auto reshape_ps = user_info.second.get_partial_shape(); auto output_pattern = reshape_desc->output_pattern; @@ -877,18 +912,39 @@ void prepare_buffer_fusing::run(program& p) { node.set_output_layout(kv_out_layout); node.can_share_buffer(false); - auto update_dep = [&info_dynamic_pad](program_node* dep) { - auto prev_layout = dep->get_output_layout(); + auto update_dep = [](program_node* dep, padding::DynamicDimsMask& info_dynamic_pad, size_t idx) { + auto prev_layout = dep->get_output_layout(true, idx); prev_layout.data_padding._dynamic_dims_mask = info_dynamic_pad; - dep->set_output_layout(prev_layout); + dep->set_output_layout(prev_layout, true, idx); dep->can_share_buffer(false); }; + auto update_scale_zp = [&](size_t kv_cache_output_idx, size_t read_value_output_idx) { + auto scales_out_layout = node.get_output_layout(false, kv_cache_output_idx); + + const size_t scales_zp_concat_axis = 2; + padding::DynamicDimsMask info_dynamic_pad_scales; + info_dynamic_pad_scales[scales_zp_concat_axis] = 1; + scales_out_layout.data_padding._dynamic_dims_mask = info_dynamic_pad_scales; + node.set_output_layout(scales_out_layout, true, kv_cache_output_idx); + + update_dep(rv_prim, info_dynamic_pad_scales, read_value_output_idx); + }; + if (rv_prim) { - update_dep(rv_prim); + update_dep(rv_prim, info_dynamic_pad, 0); } if (gather_prim) { - update_dep(gather_prim); + update_dep(gather_prim, info_dynamic_pad, 0); + } + + const auto& desc = node.get_primitive(); + if (desc->compressed) { + update_scale_zp(2, 1); + + if (desc->get_compression_zp_inputs_num() > 0) { + update_scale_zp(3, 2); + } } } }); @@ -922,7 +978,7 @@ void prepare_buffer_fusing::run(program& p) { // TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful // topological sort (i.e. if we ensure that all read_value users are completed before assign is run) node.can_be_optimized(can_read_value_be_optimize(node)); - GPU_DEBUG_TRACE_DETAIL << "[prepare_buffer_fusing] : " << node.id() << " can be optimized" << std::endl; + GPU_DEBUG_TRACE_DETAIL << "[prepare_buffer_fusing] : " << node.id() << " can be optimized = " << node.can_be_optimized() << std::endl; }); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 39cbc1aa89b4e2..c323109850c489 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -736,8 +736,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { should_fuse |= input.is_type(); - should_fuse |= input.is_type(); - bool legacy_fusion = activation_node.get_dependencies().size() == 1 && !input.can_be_optimized() && !activation_node.is_constant() && @@ -922,8 +920,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (parents[i].first->is_type()) || (parents[i].first->is_type() && reduce_supports_fusings(parents[i].first->as())) || - (parents[i].first->is_type()) || - (parents[i].first->is_type()); + (parents[i].first->is_type()); } // Disable fusion to a node on constant path when second input is in data flow @@ -1063,6 +1060,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { fused_node->get_input_pshape().rbegin()->is_dynamic(); if (is_fc_lora || is_conv_lora || is_gemm_lora) { + if (!can_fuse_parents[peer_idx]) { + return; + } std::swap(peer_node, fused_node); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp index e750303b955d77..1ec8c9458cf30b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp @@ -140,7 +140,7 @@ struct activation_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "activation::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp index 2701a57001eb93..7abd3a141c15f1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp @@ -60,7 +60,7 @@ struct broadcast_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "broadcast::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp index 6b7a483bae7d8c..a7d3beb5b94cf0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp @@ -51,7 +51,7 @@ struct concatenation_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "concat::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp index 99bfa9b9383492..c8041f44a78e86 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/crop.cpp @@ -37,7 +37,7 @@ struct crop_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "crop::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 7d3e4ff7c4eadf..25676ea3e9aa3d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -827,7 +827,7 @@ struct detection_output_impl : typed_primitive_impl { event::ptr execute_impl(const std::vector& events, detection_output_inst& instance) override { auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp index 1b18b78b041fd8..057686f9c2d6a6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp @@ -81,7 +81,7 @@ struct eltwise_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "eltwise::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp index 242273a23dd000..733e143b2c2c76 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp @@ -59,7 +59,7 @@ struct gather_impl : public typed_primitive_impl { return stream.group_events(events); } - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index 4783159d501404..446a277866f222 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -403,7 +403,7 @@ struct non_max_suppression_impl : typed_primitive_impl { event::ptr execute_impl(const std::vector& events, typed_primitive_inst& instance) override { auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { @@ -455,7 +455,7 @@ struct non_max_suppression_gather_impl : typed_primitive_impl& events, typed_primitive_inst& instance) override { auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index e49cb3a832f8ae..b295ee5f84c56f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -388,7 +388,7 @@ struct proposal_impl : typed_primitive_impl { event::ptr execute_impl(const std::vector& events, proposal_inst& instance) override { auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index 83142812f29e8b..439bbf5aa4ef6b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -37,7 +37,7 @@ struct range_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "range::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 5692b6037a09e0..da7e95c4ab74a5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -6,6 +6,8 @@ #include "impls/registry/implementation_map.hpp" #include "register.hpp" +#include "intel_gpu/plugin/multi_tensor_variable_state.hpp" + namespace cldnn { namespace cpu { @@ -64,7 +66,21 @@ struct read_value_impl : public typed_primitive_impl { } if (!instance.can_be_optimized()) { - return instance.output_memory(0).copy_from(stream, *variable.get_memory(), false); + GPU_DEBUG_TRACE_DETAIL << "Copy variable's memory to new read_value's output buffer\n"; + std::vector res_events; + res_events.push_back(instance.output_memory(0).copy_from(stream, *variable.get_memory(), false)); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + auto scales_state = compressed_cache_variable->get_compression_scale_state(); + res_events.push_back(instance.output_memory(1).copy_from(stream, *scales_state->get_memory(), false)); + + if (compressed_cache_variable->has_zp_state()) { + auto zp_state = compressed_cache_variable->get_compression_zp_state(); + res_events.push_back(instance.output_memory(1).copy_from(stream, *zp_state->get_memory(), false)); + } + } + + return stream.aggregate_events(res_events, res_events.size() > 1); } return instance.get_network().get_stream().create_user_event(true); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp index 5a3867f9d1582a..dd6284a3dd795d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reduce.cpp @@ -77,7 +77,7 @@ struct reduce_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "reduce::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp index 1b6f145c4ceb2d..6e4202f815dc96 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp @@ -37,7 +37,7 @@ struct reorder_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "reorder::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp index 1a329ea495ef82..94a12dadddb407 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp @@ -51,7 +51,7 @@ struct scatter_update_impl : public typed_primitive_impl { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "scatter_update::execute_impl"); auto& stream = instance.get_network().get_stream(); - const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.get_node().is_in_shape_of_subgraph(); + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); if (!pass_through_events) { for (auto e : events) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp index 9c9ab75f64ad59..78df55d5c7c1f7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp @@ -50,7 +50,7 @@ struct select_impl : public typed_primitive_impl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp index 8a7225cb3c64ce..63c3d5c63d3800 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp @@ -64,7 +64,7 @@ struct slice_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp index 7295fe57273738..fb77fc3861583c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp @@ -48,7 +48,7 @@ struct softmax_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp index 48631b1e8cace9..9b14a9cff496a8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp @@ -80,7 +80,7 @@ struct strided_slice_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/swiglu.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/swiglu.cpp index 8f96f7afa31556..6013bcef11c0f7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/swiglu.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/swiglu.cpp @@ -25,7 +25,7 @@ struct swiglu_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp index 6d2268de0730e7..0469c3de37c449 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp @@ -25,7 +25,7 @@ struct tile_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp index d90580de3c35fd..d833b28969bd15 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp @@ -24,7 +24,7 @@ struct unique_count_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); @@ -106,7 +106,7 @@ struct unique_gather_impl : typed_primitive_impl_ocl { void load(BinaryInputBuffer& ib) override { parent::load(ib); - if (is_dynamic()) { + if (is_dynamic() && _kernel_data.kernelName.length() != 0) { auto& kernel_selector = kernel_selector_t::Instance(); auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp index e85bda18a034da..9e0a3fa5cfb390 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp @@ -19,7 +19,7 @@ struct ConcatenationImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; static const std::vector supported_types = { ov::element::f16, ov::element::u8, ov::element::i8 }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 83d2a10dc4f2f9..a11ceef8b0f2dd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -204,14 +204,16 @@ struct convolution_onednn : typed_primitive_onednn_impl { auto& a_zp = arg.activations_zero_points(); auto a_zp_dtype = a_zp.get_output_layout().data_type; - if (!data_type_traits::is_i8_u8(a_zp_dtype)) { + if (!data_type_traits::is_i8_u8(a_zp_dtype) && a_zp_dtype != data_types::i32) { throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution"); } if (a_zp_dtype == data_types::i8) { set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); - } else { // if (a_zp_dtype == data_types::u8) + } else if (a_zp_dtype == data_types::u8) { set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); + } else if (a_zp_dtype == data_types::i32) { + set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp index a5616167506f70..c3f599fc5db9f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.hpp @@ -24,7 +24,7 @@ struct ConvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& conv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp index 949c979ed77e80..039cf36261caa0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.hpp @@ -20,7 +20,7 @@ struct DeconvolutionImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& deconv_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index 39423980521042..a601b2c74c09e3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -22,7 +22,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& fc_node = node.as(); @@ -50,7 +50,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager { bool compressed_case = fc_prim->compressed_weights && one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8}) && one_of(wei_dt, {data_types::u8, data_types::i8, data_types::u4, data_types::i4}) && - one_of(out_dt, {data_types::f16, data_types::f32, data_types::u8, data_types::i8}); + one_of(out_dt, {data_types::f16, data_types::f32}); if (!f16f16_case && !f32f32_case && !u8s8_case && !compressed_case) return false; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp index f89d3e588735e2..6c576d177043ee 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.hpp @@ -19,7 +19,7 @@ struct GemmImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& gemm_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 343fe66771de25..4710b0c77b83c7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -20,7 +20,7 @@ struct PoolingImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& in_layout = node.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp index fbdf64131ff384..68d963fd9e369f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduce_onednn.hpp @@ -49,7 +49,7 @@ struct ReduceImplementationManager : public ImplementationManager { bool validate_impl(const program_node& node) const override { assert(node.is_type()); const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; const auto& reduce_node = node.as(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp index b671f5e210e75c..ad08c516e939d8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.hpp @@ -57,7 +57,7 @@ struct ReorderImplementationManager : public ImplementationManager { return true; const auto& info = node.get_program().get_engine().get_device_info(); - if (!info.supports_immad) + if (!info.supports_immad || info.arch == gpu_arch::unknown) return false; if (!one_of(input_fmt.value, supported_formats) || !one_of(output_fmt.value, supported_formats)) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 19ea02c7c66d28..a8aa43671ed048 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -30,6 +30,7 @@ cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) { template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); +template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); cldnn::format default_fmt_for_dims(size_t dims, bool is_grouped) { switch (dims) { @@ -489,6 +490,7 @@ bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) { template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); +template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); static std::string get_external_order(const std::vector& order, bool is_weights, bool is_grouped) { diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/gemm_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/gemm_impls.cpp index 66947ef1a84a00..436a3bb257b483 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/gemm_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/gemm_impls.cpp @@ -19,7 +19,10 @@ const std::vector>& Registry static const std::vector> impls = { OV_GPU_CREATE_INSTANCE_ONEDNN(onednn::GemmImplementationManager, shape_types::static_shape) OV_GPU_GET_INSTANCE_OCL(gemm, shape_types::static_shape) - OV_GPU_GET_INSTANCE_OCL(gemm, shape_types::dynamic_shape) + OV_GPU_GET_INSTANCE_OCL(gemm, shape_types::dynamic_shape, + [](const program_node& node) { + return !node.can_use(impl_types::onednn); + }) }; return impls; diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/reorder_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/reorder_impls.cpp index 3b38e2754fbc12..eab8c1f3d9c3f8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/reorder_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/reorder_impls.cpp @@ -36,7 +36,8 @@ const std::vector>& Registry : public typed_primitive_inst_base< // Internal function to be used from fakealignment template - static std::vector __calc_output_layouts(const layout &act_layout, uint64_t group_size); + static std::vector __calc_output_layouts(const layout &act_layout, + const dynamic_quantize::Attributes& config); static std::string to_string(dynamic_quantize_node const& node); typed_primitive_inst(network& network, dynamic_quantize_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h index f3aa4de5ec34e1..da0a9397433f89 100644 --- a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h @@ -62,6 +62,12 @@ class typed_primitive_inst : public typed_primitive_inst_base= 0 ? sequence_axis : past_layout_rank + sequence_axis; } + static int64_t get_scale_zp_sequence_axis() { + // The order of scales and zero points is fixed, so use constant axis + const auto scale_zp_concat_axis = 2; + return scale_zp_concat_axis; + } + static int64_t get_max_pad(const layout& target_layout, size_t buffer_size, int64_t sequence_axis, std::string target_name = "") { if (buffer_size == 0) return 0; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 33571db8496642..6899812ab28097 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -325,6 +325,7 @@ class primitive_inst { virtual int32_t get_prealloc_iter_num() { return -1; } virtual void update_shape_info_tensor(const kernel_impl_params& params); kernel_impl_params get_fake_aligned_params_if_possible(kernel_impl_params const& orig_impl_param); + bool all_dependencies_cpu_impl() const; protected: primitive_inst(network& network, program_node const& node, bool allocate_memory); diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index ba5363f09f194b..8105a8bc07dec3 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -237,7 +237,7 @@ struct program_node { } void merge_output_padding(padding const& padd, size_t idx = 0) { - set_output_padding(padding::max(padd, output_layouts[idx].data_padding)); + set_output_padding(padding::max(padd, output_layouts[idx].data_padding), idx); } // only calculated output layout (for external usage), does not modify/use cached output layout nor invalidate users diff --git a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h index 74f9ffff581b87..ea0c8b82bb21fa 100644 --- a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h @@ -33,9 +33,14 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(read_value_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); - const auto& default_layout = desc->output_layout; + std::vector output_layouts; - return { impl_param.state_layout.value_or(default_layout) }; + for (size_t i = 0; i < desc->num_outputs; i++) { + const auto& default_layout = desc->output_layouts[i]; + output_layouts.push_back(impl_param.state_layouts.size() > i ? impl_param.state_layouts[i] : default_layout); + } + + return output_layouts; } static layout calc_output_layout(const read_value_node& node, kernel_impl_params const& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index 1bbfd94256a50c..d6a71c20fcac8d 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -59,7 +59,7 @@ struct typed_program_node : public typed_program_node_base { return false; // TODO: If user is RoPE or MVN and dynamic padding exists, ouput padding propagation is not supported in the base mode - if (get_users().size() == 1 && (get_users().front()->is_type() || get_users().front()->is_type())) + if (get_users().size() == 1 && get_users().front()->is_type()) return false; auto axis = input().as().get_primitive()->axis; @@ -73,14 +73,17 @@ struct typed_program_node : public typed_program_node_base { const auto& output_pshape = prim->output_partial_shape; // TODO: If the reshape's output shape is non constant, issue occurs // during shape inference due to execution order at runtime - if ((output_pshape.size() != input_rank + 1) || prim->output_pattern.empty()) + if (prim->output_pattern.empty()) return false; + // Iteratively check the total product of all static innermost dimensions + // until the crop dimension value matches or the first dynamic dimension is encountered int64_t mul = 1; - for (size_t i = input_rank - 1; i < output_pshape.size() ; i++) { - if (output_pshape[i].is_dynamic()) - return false; - mul *= output_pshape[i].get_length(); + for (size_t i = output_pshape.size(); i > 1 ; i--) { + if (output_pshape[i - 1].is_dynamic() || mul == input_last_dim_val) + break; + + mul *= output_pshape[i - 1].get_length(); } if (input_last_dim_val != mul) return false; diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 66a874b9b153ec..808a593c601ad0 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/kv_cache.hpp" +#include "intel_gpu/op/kv_cache_compressed.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/multi_tensor_variable_state.hpp" #include "intel_gpu/runtime/optionals.hpp" @@ -29,19 +30,39 @@ template std::vector kv_cache_inst::calc_output_layouts(kv_cache_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); - ov::intel_gpu::op::KVCache op; - op.set_output_size(desc->num_outputs); - op.set_concat_axis(desc->concat_axis); - op.set_gather_axis(desc->gather_axis); - std::vector input_shapes = {impl_param.get_input_layout(0).get(), impl_param.get_input_layout(1).get()}; - if (desc->num_outputs > 1) + if (desc->indirect) { input_shapes.push_back(impl_param.get_input_layout(2).get()); + } - std::vector output_shapes = shape_infer(&op, input_shapes); + if (desc->compressed) { + input_shapes.push_back(impl_param.get_input_layout(3).get()); + + if (desc->get_compression_zp_inputs_num() > 0) { + input_shapes.push_back(impl_param.get_input_layout(4).get()); + } + } + + std::vector output_shapes; + if (desc->compressed) { + ov::intel_gpu::op::KVCacheCompressed op; + op.set_output_size(desc->num_outputs); + op.set_concat_axis(desc->concat_axis); + op.set_gather_axis(desc->gather_axis); + op.set_quantization_attrs(desc->quantization_attributes); + + output_shapes = shape_infer(&op, input_shapes); + } else { + ov::intel_gpu::op::KVCache op; + op.set_output_size(desc->num_outputs); + op.set_concat_axis(desc->concat_axis); + op.set_gather_axis(desc->gather_axis); + + output_shapes = shape_infer(&op, input_shapes); + } - static const std::map ports_map = {{0, 0}, {1, 2}}; + static const std::map ports_map = {{0, 0}, {1, 2}, {2, 3}, {3, 4}}; std::vector out_layouts; for (size_t i = 0; i < desc->num_outputs; i++) { @@ -64,6 +85,9 @@ std::string kv_cache_inst::to_string(const kv_cache_node& node) { kv_cache_info.add("concat axis", node.get_primitive()->concat_axis); kv_cache_info.add("gather axis", node.get_primitive()->gather_axis); kv_cache_info.add("indirect", node.get_primitive()->indirect); + kv_cache_info.add("compressed", node.get_primitive()->compressed); + kv_cache_info.add("output_storage_type", static_cast(node.get_primitive()->quantization_attributes.output_storage_type)); + kv_cache_info.add("scales_zp_output_order", node.get_primitive()->quantization_attributes.scales_zp_output_order); node_info->add("kv_cache info", kv_cache_info); std::stringstream primitive_description; node_info->dump(primitive_description); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index da56141cee4840..0d0748311caa08 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -40,6 +40,7 @@ #include "graph_optimizer/prepare_buffer_fusing.h" #include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/plugin/multi_tensor_variable_state.hpp" #include "intel_gpu/graph/network.hpp" #include "intel_gpu/graph/serialization/set_serializer.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -294,30 +295,54 @@ void primitive_inst::update_shape() { auto prim = get_node().as().get_primitive(); const auto& variable_id = prim->variable_id; auto& variable = get_network().get_variable(variable_id); - // Initial variable shape is taken from variable itself - auto new_layout = variable.get_layout(); - // If variable is not set and we have an initializer - use it's shape as shape of variable - if (!variable.is_set() && _impl_params->input_layouts.size() == 1) { - new_layout = _impl_params->get_input_layout(0); - } + auto update_state_layout = [&](ov::intel_gpu::VariableStateBase& variable, layout new_layout, size_t layout_idx) { + // If variable is not set and we have an initializer - use it's shape as shape of variable + if (!variable.is_set() && _impl_params->input_layouts.size() > layout_idx) { + new_layout = _impl_params->get_input_layout(layout_idx); + } - // If we still have a dynamic dimension, which basiclly means that we don't have an initializer, then replace dynamic dims with 0 - if (new_layout.is_dynamic()) { - auto pshape = new_layout.get_partial_shape(); - for (auto& d : pshape) { - if (d.is_dynamic()) { - d = 0; + // If we still have a dynamic dimension, which basiclly means that we don't have an initializer, then replace dynamic dims with 0 + if (new_layout.is_dynamic()) { + auto pshape = new_layout.get_partial_shape(); + for (auto& d : pshape) { + if (d.is_dynamic()) { + d = 0; + } } + new_layout.set_partial_shape(pshape); } - new_layout.set_partial_shape(pshape); - } - variable.set_layout(new_layout); + variable.set_layout(new_layout); - if (!_impl_params->state_layout.has_value() || _impl_params->state_layout.value() != new_layout) { - _impl_params->state_layout = new_layout; - input_shape_changed = true; + if (_impl_params->state_layouts[layout_idx] != new_layout) { + _impl_params->state_layouts[layout_idx] = new_layout; + GPU_DEBUG_TRACE_DETAIL << "Update " << layout_idx << " layout: " << new_layout.to_short_string() << "\n"; + input_shape_changed = true; + } + }; + + if (_impl_params->state_layouts.empty()) + _impl_params->state_layouts.resize(1); + + // Initial variable shape is taken from variable itself + auto new_layout = variable.get_layout(); + update_state_layout(variable, new_layout, 0); + + if (prim->num_outputs > 1) { + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + _impl_params->state_layouts.resize(compressed_cache_variable->has_zp_state() ? 3 : 2); + + auto scales_state = compressed_cache_variable->get_compression_scale_state(); + auto new_scales_layout = compressed_cache_variable->get_compression_scale_state()->get_layout(); + update_state_layout(*scales_state, new_scales_layout, 1); + + if (compressed_cache_variable->has_zp_state()) { + auto scales_state = compressed_cache_variable->get_compression_zp_state(); + auto new_zp_layout = compressed_cache_variable->get_compression_zp_state()->get_layout(); + update_state_layout(*scales_state, new_zp_layout, 2); + } + } } } @@ -462,6 +487,14 @@ void primitive_inst::update_shape() { auto& variable = get_network().get_variable(desc->variable_id); // Custom output layout update as update_output_layout handles paddings incorrectly for optimized out read_value + kv_cache pattern _impl_params->output_layouts[0] = variable.get_layout(); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + _impl_params->output_layouts[1] = compressed_cache_variable->get_compression_scale_state()->get_layout(); + + if (compressed_cache_variable->has_zp_state()) { + _impl_params->output_layouts[2] = compressed_cache_variable->get_compression_zp_state()->get_layout(); + } + } } if (get_node().is_type()) { @@ -494,6 +527,25 @@ kernel_impl_params primitive_inst::get_fake_aligned_params_if_possible(kernel_im return updated_params; } +// Check if all dependencies and its predecessors are CPU or constant +static bool check_all_deps_cpu(const primitive_inst* inst) { + return std::all_of(inst->dependencies().begin(), inst->dependencies().end(), + [&](const std::pair& dep) { + if (dep.first->is_constant() || + (dep.first->get_impl() != nullptr && dep.first->get_impl()->is_cpu())) { + return true; + } + // Check if the dependency can be optimized + if (dep.first->can_be_optimized()) { + return check_all_deps_cpu(dep.first); + } + return false; + }); +} + +bool primitive_inst::all_dependencies_cpu_impl() const { + return check_all_deps_cpu(this); +} event::ptr primitive_inst::realloc_if_needed() { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); @@ -544,6 +596,15 @@ event::ptr primitive_inst::realloc_if_needed() { << ", variable layout " << variable.get_layout().to_short_string() << ")" << std::endl; _outputs[0] = variable.get_memory(); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + _outputs[2] = compressed_cache_variable->get_compression_scale_state()->get_memory(); + + if (compressed_cache_variable->has_zp_state()) { + _outputs[3] = compressed_cache_variable->get_compression_zp_state()->get_memory(); + } + } + // To record shape predictor for (size_t j = 0; j < _impl_params->output_layouts.size(); ++j) sp.predict_preallocation_shape(id(), _impl_params->output_layouts[j], true, j); @@ -563,11 +624,27 @@ event::ptr primitive_inst::realloc_if_needed() { GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable (ptr: " << variable.get_memory()->buffer_ptr() << ", actual_size:" << variable.get_actual_mem_size() << " bytes" << ", variable layout:" << variable.get_layout().to_short_string() << ")" << std::endl; + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + compressed_cache_variable->get_compression_scale_state()->set_layout(_impl_params->output_layouts[1]); + + if (compressed_cache_variable->has_zp_state()) { + compressed_cache_variable->get_compression_zp_state()->set_layout(_impl_params->output_layouts[2]); + } + } } // For nodes that can be optimized, variable memory is used as output memory // so there is no need for output memory reallocation if (can_be_optimized()) { _max_output_layout_count[0] = variable.get_actual_mem_size() / dt_sizes_in_B[0]; + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + const size_t scale_idx = _node->is_type() ? 1 : 2; // kv_cache or read_value + _max_output_layout_count[scale_idx] = compressed_cache_variable->get_compression_scale_state()->get_actual_mem_size() / dt_sizes_in_B[1]; + if (compressed_cache_variable->has_zp_state()) { + _max_output_layout_count[scale_idx + 1] = compressed_cache_variable->get_compression_zp_state()->get_actual_mem_size() / dt_sizes_in_B[2]; + } + } GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO("can_be_optimized"); return ev; } @@ -646,7 +723,10 @@ event::ptr primitive_inst::realloc_if_needed() { // dynamic quantization is only applied to activation of FC if (get_node().is_type()) { - auto dyn_quan_scale_layout = dynamic_quantize_inst::__calc_output_layouts(updated_layouts[dep_idx], 0); + const auto& desc = get_node().as().get_primitive(); + auto dyn_quan_scale_layout = + dynamic_quantize_inst::__calc_output_layouts(updated_layouts[dep_idx], + desc->attrs); GPU_DEBUG_TRACE_DETAIL << "update layout of dynamic quantize scale parameter layout " << dyn_quan_scale_layout[1].to_short_string() << std::endl; updated_params.output_layouts[1] = dyn_quan_scale_layout[1]; @@ -671,13 +751,24 @@ event::ptr primitive_inst::realloc_if_needed() { // update layout to ensure that it repsects paddings for correct allocation size if (_node_output_layout.data_padding.is_dynamic()) { - auto current_dims = updated_layouts[0].get_padded_dims(); + auto update_padding = [](layout& orig_layout) { + auto current_dims = orig_layout.get_padded_dims(); + + std::vector current_buf_shape; + current_buf_shape.reserve(current_dims.size()); + std::transform(current_dims.begin(), current_dims.end(), + std::back_inserter(current_buf_shape), [](const tensor::value_type& el) { return static_cast(el); }); + orig_layout = layout(ov::PartialShape(current_buf_shape), orig_layout.data_type, orig_layout.format); + }; + + update_padding(updated_layouts[0]); - std::vector current_buf_shape; - current_buf_shape.reserve(current_dims.size()); - std::transform(current_dims.begin(), current_dims.end(), - std::back_inserter(current_buf_shape), [](const tensor::value_type& el) { return static_cast(el); }); - updated_layouts[0] = layout(ov::PartialShape(current_buf_shape), updated_layouts[0].data_type, updated_layouts[0].format); + // Update scales and zero points buffers paddings, skipping beam_table + if (_node->is_type()) { + for (size_t i = 2; i < updated_layouts.size(); ++i) { + update_padding(updated_layouts[i]); + } + } } int32_t tmp_prealloc_count = get_prealloc_iter_num(); @@ -690,13 +781,14 @@ event::ptr primitive_inst::realloc_if_needed() { for (size_t i = 0; i < updated_layouts.size(); ++i) { bool reclaim = 0; size_t required_buffer_size = 0; - if (_node->is_type() && i == 0) { + if (_node->is_type() && i != 1) { // Relax reclaiming condition for kv cache const auto& desc = _node->as().get_primitive(); auto prealloc_shape = updated_layouts[i].get_shape(); const auto shape_rank = prealloc_shape.size(); - auto seq_axis = - static_cast(desc->concat_axis >= 0 ? desc->concat_axis : shape_rank + desc->concat_axis); + const auto seq_axis = i == 0 ? kv_cache_inst::get_sequence_axis(desc->concat_axis, shape_rank) + : kv_cache_inst::get_scale_zp_sequence_axis(); + prealloc_shape[seq_axis] += tmp_prealloc_count; required_buffer_size = std::accumulate(prealloc_shape.begin(), prealloc_shape.end(), size_t(1), std::multiplies()); } else { @@ -723,11 +815,12 @@ event::ptr primitive_inst::realloc_if_needed() { for (size_t i = 0; i < actual_layouts.size(); ++i) { bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_linear_size() <= _max_output_layout_count[i]); std::pair prealloc_info; - if (_node->is_type() && i == 0) { + if (_node->is_type() && i != 1) { const auto& desc = _node->as().get_primitive(); - auto shape_rank = updated_layouts[i].get_shape().size(); - auto seq_axis = - static_cast(desc->concat_axis >= 0 ? desc->concat_axis : shape_rank + desc->concat_axis); + const auto shape_rank = updated_layouts[i].get_shape().size(); + const auto seq_axis = i == 0 ? kv_cache_inst::get_sequence_axis(desc->concat_axis, shape_rank) + : kv_cache_inst::get_scale_zp_sequence_axis(); + prealloc_info = sp.predict_preallocation_shape(id(), updated_layouts[i], false, i, tmp_prealloc_count, seq_axis); } else { prealloc_info = sp.predict_preallocation_shape(id(), updated_layouts[i], can_reuse_buffer, i, tmp_prealloc_count); @@ -743,19 +836,21 @@ event::ptr primitive_inst::realloc_if_needed() { GPU_DEBUG_TRACE_DETAIL << id() << ": reuse previously allocated output buffer[" << i << "] - " << actual_layouts[i].get_linear_size() << "/" << _max_output_layout_count[i] << std::endl; - if (_node->is_type() && (i == 0)) { + if (_node->is_type() && i != 1) { // kv_cache has already assigned memory. // No need to reinterpret output memory but need to update padding const auto& desc = _node->as().get_primitive(); auto& present_layout = _impl_params->output_layouts[i]; const auto present_layout_rank = present_layout.get_partial_shape().size(); - const auto sequence_axis = kv_cache_inst::get_sequence_axis(desc->concat_axis, present_layout_rank); + const auto sequence_axis = i == 0 ? kv_cache_inst::get_sequence_axis(desc->concat_axis, present_layout_rank) + : kv_cache_inst::get_scale_zp_sequence_axis();; + auto max_pad = kv_cache_inst::get_max_pad(present_layout, _max_output_layout_count[i], sequence_axis, - "present_layout"); + i == 0 ? "present_layout" : "present_scales_layout"); kv_cache_inst::update_pad(present_layout, max_pad, sequence_axis); - GPU_DEBUG_TRACE_DETAIL << _impl_params->output_layouts[i].to_string() << std::endl; + GPU_DEBUG_TRACE_DETAIL << i << ". " << _impl_params->output_layouts[i].to_string() << std::endl; set_shape_change(); } else { _outputs[i] = _network.get_engine().reinterpret_buffer(*_outputs[i], actual_layouts[i]); @@ -816,6 +911,26 @@ event::ptr primitive_inst::realloc_if_needed() { sequence_axis, "present_layout"); if (max_pad > 0) { + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + auto present_scales_layout = _impl_params->output_layouts[2]; + const auto sequence_axis = kv_cache_inst::get_scale_zp_sequence_axis();; + + // In case of compressed KV-cache, calling update_impl for each iteration + // because of scales layout [batch, num_heads, seq_len, head_size], which requires proper + // dynamic padding updates + axis_is_outer_most = false; + kv_cache_inst::update_pad(present_scales_layout, max_pad, sequence_axis); + + _impl_params->output_layouts[2] = present_scales_layout; + compressed_cache_variable->get_compression_scale_state()->set_memory(_outputs[2], present_scales_layout); + if (compressed_cache_variable->has_zp_state()) { + auto present_zp_layout = present_scales_layout; + + _impl_params->output_layouts[3] = present_scales_layout; + compressed_cache_variable->get_compression_zp_state()->set_memory(_outputs[3], present_zp_layout); + } + } + kv_cache_inst::update_pad(present_layout, max_pad, sequence_axis); if (!axis_is_outer_most) { GPU_DEBUG_TRACE_DETAIL << id() << ": Update impl with new output padding" << std::endl; @@ -836,12 +951,32 @@ event::ptr primitive_inst::realloc_if_needed() { << "'s layout with allocated kv cache output: " << present_layout.to_short_string() << " (is_set = " << variable.is_set() << ") " << std::endl; variable.set_memory(_outputs[0], present_layout); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + auto present_scales_layout = _impl_params->output_layouts[2]; + + compressed_cache_variable->get_compression_scale_state()->set_memory(_outputs[2], present_scales_layout); + if (compressed_cache_variable->has_zp_state()) { + auto present_zp_layout = present_scales_layout; + compressed_cache_variable->get_compression_zp_state()->set_memory(_outputs[3], present_zp_layout); + } + } } } else { GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() << "'s layout with allocated kv cache output: " << present_layout.to_short_string() << " (is_set = " << variable.is_set() << ") " << std::endl; variable.set_layout(present_layout); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + auto present_scales_layout = _impl_params->output_layouts[2]; + + compressed_cache_variable->get_compression_scale_state()->set_layout(present_scales_layout); + if (compressed_cache_variable->has_zp_state()) { + auto present_zp_layout = present_scales_layout; + compressed_cache_variable->get_compression_zp_state()->set_layout(present_zp_layout); + } + } } } @@ -1017,8 +1152,8 @@ bool primitive_inst::update_impl(bool use_async_compilation) { } void primitive_inst::update_paddings() { - auto reset_pad = [](kernel_impl_params& params, const program_node* node) { - params.output_layouts[0].data_padding = node->get_output_layout(0).data_padding; + auto reset_pad = [](kernel_impl_params& params, const program_node* node, size_t idx = 0) { + params.output_layouts[idx].data_padding = node->get_output_layout(idx).data_padding; }; if (_node->is_type() || _node->is_type()) { auto variable_id = _node->is_type() ? (_node->as().get_primitive()->variable_id) @@ -1030,6 +1165,15 @@ void primitive_inst::update_paddings() { primitive_inst* inst = this; while (inst) { reset_pad(*inst->_impl_params, inst->_node); + if (inst == this) { + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + const size_t scale_idx = _node->is_type() ? 1 : 2; + reset_pad(*inst->_impl_params, inst->_node, scale_idx); + if (compressed_cache_variable->has_zp_state()) { + reset_pad(*inst->_impl_params, inst->_node, scale_idx + 1); + } + } + } auto& users = inst->_node->get_users(); if (users.size() == 1 && users.front()->get_output_layout(0).data_padding.is_dynamic()) { inst = inst->get_user_insts().front(); @@ -1155,11 +1299,42 @@ void primitive_inst::do_runtime_in_place_kv_cache() { GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " << present_layout.to_string() << std::endl; auto& variable = get_network().get_variable(desc->variable_info.variable_id); variable.set_layout(present_layout); + + if (desc->compressed) { + auto compressed_cache_variable = dynamic_cast(&variable); + auto& present_scales_layout = _impl_params->output_layouts[2]; + const auto sequence_axis = kv_cache_inst::get_scale_zp_sequence_axis(); + kv_cache_inst::update_pad(present_scales_layout, max_pad - new_seq_len, sequence_axis); + GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() + << " Updated present_scale_layout's pad : " << present_scales_layout.to_string() << std::endl; + + compressed_cache_variable->get_compression_scale_state()->set_layout(present_scales_layout); + if (desc->get_compression_zp_inputs_num() > 0) { + auto& present_zp_layout = _impl_params->output_layouts[3]; + kv_cache_inst::update_pad(present_zp_layout, max_pad - new_seq_len, sequence_axis); + GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() + << " Updated present_zp_layout's pad : " << present_scales_layout.to_string() << std::endl; + + compressed_cache_variable->get_compression_zp_state()->set_layout(present_scales_layout); + } + } + GPU_DEBUG_TRACE_DETAIL << "[do_runtime_in_place_kv_cache] " << id() << "Updated variable with present_layout" << variable.get_layout().to_string() << " is_set = " << variable.is_set() << std::endl; if (past_layout.data_padding._upper_size[sequence_axis] > 0 && variable.is_set()) { kv_cache_inst::update_pad(past_layout, max_pad, sequence_axis); _impl_params->_can_be_optimized = true; + + if (desc->compressed) { + auto& past_scale_layout = _impl_params->input_layouts[3]; + const auto sequence_axis = kv_cache_inst::get_scale_zp_sequence_axis(); + kv_cache_inst::update_pad(past_scale_layout, max_pad, sequence_axis); + + if (desc->get_compression_zp_inputs_num() > 0) { + auto& past_zp_layout = _impl_params->input_layouts[4]; + kv_cache_inst::update_pad(past_zp_layout, max_pad, sequence_axis); + } + } GPU_DEBUG_TRACE_DETAIL << "[do_runtime_in_place_kv_cache] " << id() << " Updated past layout's pad : " << past_layout.to_string() << std::endl; } } @@ -1251,36 +1426,31 @@ void primitive_inst::do_runtime_skip_permute() { auto desc = _node->as().get_primitive(); auto input_shape = _impl_params->get_input_layout(0).get_shape(); const auto& permute_order = desc->permute_order; - // Check runtime shape - // Optimize when the largest value among the actual dim values in case where the permute order - // is different from the shape index is equal to the multiplied value - // Set size to zero to pass the case that permute order is same with input order like [0, 1, 2, 3] - int32_t size = 0; - int32_t max_value = 0; - for (int32_t i = 0; i < static_cast(permute_order.size()); ++i) { - int32_t order = static_cast(permute_order[i]); - int32_t dim = static_cast(input_shape[order]); - if (i != order) { - if (dim > max_value) - max_value = dim; - size = (size == 0) ? dim : (size * dim); - } - } - - // If the largest value and total size are different, can_be_optimized needs to be reset - if (size != max_value) { - set_can_be_optimized(false); - GPU_DEBUG_TRACE_DETAIL << "--- Cannot optimize because size(" << size << ") and max_value(" << max_value - << ") are different" << std::endl; - - GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : reset can_be_optimized to false " - << std::endl; - return; + // Can skip if the transposed dim keeps the original order + bool can_skip = true; + auto permute_dest = permute_order; + for (size_t i = 0; i < permute_order.size(); ++i) { + permute_dest[permute_order[i]] = static_cast(i); + } + int16_t prev_dim = -1; + for (size_t i = 0; i < permute_dest.size(); ++i) { + if (input_shape[i] > 1) { + if (permute_dest[i] < prev_dim) { + can_skip = false; + break; + } + prev_dim = permute_dest[i]; + } } - GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : can_be_optimized" << std::endl; + GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : can_be_optimized ? " << can_skip << std::endl; GPU_DEBUG_TRACE_DETAIL << " - Input layout : " << _impl_params->get_input_layout(0).to_short_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << " - Output layout : " << _impl_params->get_output_layout().to_short_string() << std::endl; - set_can_be_optimized(true); + GPU_DEBUG_TRACE_DETAIL << " - permute order : "; + for (auto order : permute_order) { + GPU_DEBUG_TRACE_DETAIL << order << ","; + } + GPU_DEBUG_TRACE_DETAIL << std::endl; + set_can_be_optimized(can_skip); } void primitive_inst::do_runtime_skip_strided_slice() { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 1e2e84043dc82b..07fad4873659cd 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -7,6 +7,7 @@ #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" +#include "openvino/util/weights_path.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -1839,7 +1840,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { std::shared_ptr mapped_memory = nullptr; std::string weights_path = _config.get_property(ov::weights_path); - if (!weights_path.empty()) { + if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index fc9648b90e444c..a9bb7c665f177b 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -90,7 +90,8 @@ void program_node::replace_dependency(size_t idx, std::pair const program_node::get_input_layouts() const { std::vector layouts; for (size_t i = 0; i < dependencies.size(); i++) { - layouts.push_back(get_input_layout(i)); + auto input_layout = get_input_layout(i); + layouts.push_back(input_layout); } return layouts; } diff --git a/src/plugins/intel_gpu/src/graph/read_value.cpp b/src/plugins/intel_gpu/src/graph/read_value.cpp index bf6e730e8a808b..1d6657b9bf8ac4 100644 --- a/src/plugins/intel_gpu/src/graph/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/read_value.cpp @@ -2,8 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "read_value_inst.h" #include "primitive_type_base.h" + +#include "intel_gpu/plugin/multi_tensor_variable_state.hpp" + #include #include @@ -16,7 +19,7 @@ read_value_inst::typed_primitive_inst(network& network, const read_value_node& n } layout read_value_inst::calc_output_layout(const read_value_node& node, kernel_impl_params const& impl_param) { - return impl_param.typed_desc()->output_layout; + return impl_param.typed_desc()->output_layouts[0]; } std::string read_value_inst::to_string(const read_value_node& node) { @@ -45,5 +48,25 @@ void read_value_inst::update_output_memory() { GPU_DEBUG_TRACE_DETAIL << " - layout " << variable.get_layout().to_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << " - actual_size " << variable.get_actual_mem_size() << " bytes" << std::endl; set_output_memory(variable.get_memory(), false, 0); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + auto scales_state = compressed_cache_variable->get_compression_scale_state(); + set_output_memory(scales_state->get_memory(), false, 1); + + GPU_DEBUG_TRACE_DETAIL << id() << " Update output memory with variable " << scales_state->get_name() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - ptr : " << scales_state->get_memory()->buffer_ptr() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - layout " << scales_state->get_layout().to_string() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - actual_size " << scales_state->get_actual_mem_size() << " bytes" << std::endl; + + if (compressed_cache_variable->has_zp_state()) { + auto zp_state = compressed_cache_variable->get_compression_zp_state(); + set_output_memory(zp_state->get_memory(), false, 2); + + GPU_DEBUG_TRACE_DETAIL << id() << " Update output memory with variable " << zp_state->get_name() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - ptr : " << zp_state->get_memory()->buffer_ptr() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - layout " << zp_state->get_layout().to_string() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - actual_size " << zp_state->get_actual_mem_size() << " bytes" << std::endl; + } + } } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/scaled_dot_product_attention.cpp b/src/plugins/intel_gpu/src/graph/scaled_dot_product_attention.cpp index e8e213ad97011a..e80cb62a534b52 100644 --- a/src/plugins/intel_gpu/src/graph/scaled_dot_product_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/scaled_dot_product_attention.cpp @@ -11,6 +11,7 @@ #include "scaled_dot_product_attention_shape_inference.hpp" #include "intel_gpu/op/sdpa.hpp" +#include "ov_ops/dynamic_quantize.hpp" namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(scaled_dot_product_attention) @@ -87,6 +88,14 @@ std::string scaled_dot_product_attention_inst::to_string(scaled_dot_product_atte json_composite scaled_dot_product_attention_info; scaled_dot_product_attention_info.add("input id", input.id()); scaled_dot_product_attention_info.add("is_causal", desc->is_causal); + scaled_dot_product_attention_info.add("is_kv_compressed", desc->is_kv_compressed); + scaled_dot_product_attention_info.add("output_storage_type", static_cast(node.get_primitive()->quantization_attributes.output_storage_type)); + scaled_dot_product_attention_info.add("group_size", desc->quantization_attributes.group_sizes); + scaled_dot_product_attention_info.add("quantization_type", static_cast(node.get_primitive()->quantization_attributes.quantization_type)); + scaled_dot_product_attention_info.add("quantization_dt", desc->quantization_attributes.quantization_dt); + scaled_dot_product_attention_info.add("scale_dt", desc->quantization_attributes.scale_dt); + scaled_dot_product_attention_info.add("zp_dt", desc->quantization_attributes.zp_dt); + scaled_dot_product_attention_info.add("indirect_axis", desc->indirect_axis); scaled_dot_product_attention_info.add("has_attn_mask_input", desc->has_attn_mask_input); scaled_dot_product_attention_info.add("has_scale_input", desc->has_scale_input); scaled_dot_product_attention_info.add("input_q_transpose_order", desc->input_q_transpose_order); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_kv_cache.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_kv_cache.cl new file mode 100644 index 00000000000000..22a2f03c94564a --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_kv_cache.cl @@ -0,0 +1,121 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/batch_headers/fetch_data.cl" +#include "include/batch_headers/fetch_data.cl" +#include "include/batch_headers/common.cl" +#include "include/batch_headers/sub_group_block_read.cl" +#include "include/batch_headers/sub_group_block_write.cl" +#include "include/batch_headers/sub_group_shuffle.cl" + + +#if OUTPUT_DIMS != 4 +#error "dynamic_quantize_gpu_opt.cl: Unsupported output dimension" +#endif + +#define VLOAD_N CAT(vload, VEC_SIZE) +#define VSTORE_N CAT(vstore, VEC_SIZE) +#define CONVERT_CHAR_N CAT(convert_char, VEC_SIZE) +#define AS_TYPE_N_(type, n, x) as_##type##n(x) +#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) +#define AS_INPUT_TYPE_N(x) AS_TYPE_N(INPUT0_TYPE, VEC_SIZE, x) + + +inline uint FUNC(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint y, uint x) { + return OUTPUT1_GET_INDEX(b, f, y, x); +} + +inline uint FUNC(get_scales_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint y, uint x) { +#ifdef SCALES_OUTPUT_ORDER + return FUNC_CALL(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_TENSOR SCALES_OUTPUT_ORDER); +#else + return FUNC_CALL(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, y, x); +#endif +} + +#define SUBGROUP_SIZE 16 +#define INNERMOST_DIM_VALUE INPUT0_SIZE_X +#define INPUT_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, 1, ptr, offset) +#define OUTPUT_BLOCK_WRITE(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, val) + +__attribute__((reqd_work_group_size(SUBGROUP_SIZE, SUBGROUPS_NUMBER, 1))) +REQD_SUB_GROUP_SIZE(SUBGROUP_SIZE) +KERNEL(dynamic_quantize_gpu_kv_cache)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output, + __global OUTPUT1_TYPE* output_scale +#if ASYMMETRIC_QUANTIZATION && !GROUP_SCALES_WITH_ZP + , __global OUTPUT2_TYPE* output_zp +#endif +#ifdef APPEND_MODE + , const uint axis_offset +#endif + ) +{ + const uint sglid = get_sub_group_local_id(); + const uint grouped_indexes = get_global_id(1); + const uint batch_indexes = get_global_id(2); + + DECLARE_BATCHED_DIMS_INDEXES(batch_indexes); + DECLARE_GROUPED_DIMS_INDEXES(grouped_indexes); + + // The innermost dimension is always processed in the loop inside the kernel + const uint x = 0; + + half max_value = INPUT0_VAL_MIN; + half min_value = INPUT0_VAL_MAX; + + half val[INNERMOST_DIM_VALUE / SUBGROUP_SIZE]; + + const uint input_offset = INPUT0_GET_INDEX(b, f, y, x); + unroll_for (uint i = 0; i < INNERMOST_DIM_VALUE / SUBGROUP_SIZE; i++) { + val[i] = INPUT_BLOCK_READ(input, input_offset + i * SUBGROUP_SIZE); +#if ASYMMETRIC_QUANTIZATION + max_value = fmax(max_value, val[i]); + min_value = fmin(min_value, val[i]); +#else + max_value = fmax(max_value, fabs(val[i])); +#endif + } + +#if ASYMMETRIC_QUANTIZATION + min_value = work_group_reduce_min(min_value); + max_value = work_group_reduce_max(max_value); + OUTPUT1_TYPE scale = (OUTPUT1_TYPE)((CHAR_MAX - CHAR_MIN) / (max_value - min_value)); + OUTPUT1_TYPE zp = (OUTPUT1_TYPE)(-min_value * scale) - CHAR_MAX; +#else + max_value = work_group_reduce_max(max_value); + OUTPUT1_TYPE scale = 127.0h / max_value; +#endif + +#ifdef APPEND_MODE + APPEND_AXIS_NAME += axis_offset; +#endif + + const uint output_offset = OUTPUT_GET_INDEX(b, f, y, x); + unroll_for (uint i = 0; i < INNERMOST_DIM_VALUE / SUBGROUP_SIZE; i++) { +#if ASYMMETRIC_QUANTIZATION + OUTPUT_TYPE res = convert_char_rte(val[i] * scale + zp); +#else + OUTPUT_TYPE res = convert_char_rte(val[i] * scale); +#endif + OUTPUT_BLOCK_WRITE(output, output_offset + i * SUBGROUP_SIZE, res); + } + + const uint scale_idx = FUNC_CALL(get_scales_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, y, x); + + if (grouped_indexes == 0 && sglid == 0) { +#if ASYMMETRIC_QUANTIZATION + output_scale[scale_idx] = 1.0h / scale; +#if GROUP_SCALES_WITH_ZP + output_scale[scale_idx + 1] = zp; +#else + output_zp[scale_idx] = zp; +#endif +#else + output_scale[scale_idx] = 1.0h / scale; +#endif + } +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl index 858571fea71914..62482b8b9b5047 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl @@ -8,48 +8,143 @@ #error "dynamic_quantize_gpu_ref.cl: Unsupported output dimension" #endif +inline uint FUNC(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint y, uint x) { + return OUTPUT1_GET_INDEX(b, f, y, x); +} + +inline uint FUNC(get_scales_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint y, uint x) { +#ifdef SCALES_OUTPUT_ORDER + return FUNC_CALL(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_TENSOR SCALES_OUTPUT_ORDER); +#else + return FUNC_CALL(get_scales_offset_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, y, x); +#endif +} + KERNEL(dynamic_quantize_gpu_ref)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, - __global OUTPUT1_TYPE* output_scale) + __global OUTPUT1_TYPE* output_scale +#if ASYMMETRIC_QUANTIZATION && !GROUP_SCALES_WITH_ZP + , __global OUTPUT2_TYPE* output_zp +#endif +) { const uint bf = (uint)get_global_id(0); - const uint b = (uint)get_global_id(0) / INPUT0_FEATURE_NUM; - const uint f = (uint)get_global_id(0) % INPUT0_FEATURE_NUM; + const uint b = bf / INPUT0_FEATURE_NUM; + const uint f = bf % INPUT0_FEATURE_NUM; const uint y = (uint)get_global_id(1); - const uint scale_idx = OUTPUT1_GET_INDEX(b, f, y, 0); + const uint x = (uint)get_global_id(2); +#ifdef SCALES_OUTPUT_ORDER + const uint scale_idx = FUNC_CALL(get_scales_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, y, x); +#else + const uint scale_idx = OUTPUT1_GET_INDEX_SAFE(b, f, y, x); +#endif - half max_val = 0.0001h; - for (int y_off = 0; y_off < (get_global_size(1) == 1 ? INPUT0_SIZE_Y : 1); y_off++) { - const uint offset = INPUT0_GET_INDEX(b, f, y + y_off, 0); + half max_val = INPUT0_VAL_MIN; + half min_val = INPUT0_VAL_MAX; + for (int b_off = 0; b_off < (GROUP_SIZE_DIM0 == 1 ? 1 : INPUT0_BATCH_NUM); b_off++) { + for (int f_off = 0; f_off < (GROUP_SIZE_DIM1 == 1 ? 1 : INPUT0_FEATURE_NUM); f_off++) { + for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == 1 ? 1 : INPUT0_SIZE_Y); y_off++) { +#if GROUP_SIZE_DIM3 == 1 + const uint offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, x); + half val = input[offset]; +#if ASYMMETRIC_QUANTIZATION + max_val = fmax(max_value, val); + min_val = fmin(min_value, val); +#else + half abs_val = fabs(val); + max_val = fmax(max_val, abs_val); +#endif +#else + const uint offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, 0); int x; for (x = 0; x < INPUT0_SIZE_X / 8; x++) { half8 val = as_half8(vload8(0, (ushort*)input + offset + x * 8)); half8 abs_val = fabs(val); - - for (int j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) { +#if ASYMMETRIC_QUANTIZATION + max_val = fmax(max_val, val[j]); + min_val = fmin(min_val, val[j]); +#else max_val = fmax(max_val, abs_val[j]); +#endif + } } x *= 8; - for (; x < INPUT0_SIZE_X; x++) - max_val = fmax(max_val, fabs(input[offset + x])); + for (; x < INPUT0_SIZE_X; x++) { + half val = input[offset + x]; +#if ASYMMETRIC_QUANTIZATION + max_val = fmax(max_val, val); + min_val = fmin(min_val, val); +#else + max_val = fmax(max_val, fabs(val)); +#endif + } +#endif + } + } } - half scale = 127.0h / max_val; - for (int y_off = 0; y_off < (get_global_size(1) == 1 ? INPUT0_SIZE_Y : 1); y_off++) { - const uint in_offset = INPUT0_GET_INDEX(b, f, y + y_off, 0); - const uint out_offset = OUTPUT_GET_INDEX(b, f, y + y_off, 0); +#if ASYMMETRIC_QUANTIZATION + OUTPUT1_TYPE scale = (OUTPUT1_TYPE)((CHAR_MAX - CHAR_MIN) / (max_val - min_val)); + OUTPUT1_TYPE zp = (OUTPUT1_TYPE)(-min_val * scale) - CHAR_MAX; +#else + max_val = work_group_reduce_max(max_val); + OUTPUT1_TYPE scale = 127.0h / max_val; +#endif + + for (int b_off = 0; b_off < (GROUP_SIZE_DIM0 == 1 ? 1 : INPUT0_BATCH_NUM); b_off++) { + for (int f_off = 0; f_off < (GROUP_SIZE_DIM1 == 1 ? 1 : INPUT0_FEATURE_NUM); f_off++) { + for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == 1 ? 1 : INPUT0_SIZE_Y); y_off++) { +#if GROUP_SIZE_DIM3 == 1 + const uint in_offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, x); + const uint out_offset = OUTPUT_GET_INDEX(b + b_off, f + f_off, y + y_off, x); + + half val = input[in_offset]; +#if ASYMMETRIC_QUANTIZATION + val *= scale; + val += zp; + output[out_offset] = convert_char_rte(val); +#else + val *= scale; + output[out_offset] = convert_char_rte(val); +#endif +#else + const uint in_offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, 0); + const uint out_offset = OUTPUT_GET_INDEX(b + b_off, f + f_off, y + y_off, 0); int x; for (x = 0; x < INPUT0_SIZE_X / 8; x++) { half8 val = as_half8(vload8(0, (ushort*)input + in_offset + x * 8)); +#if ASYMMETRIC_QUANTIZATION val *= scale; + val += zp; +#else + val *= scale; +#endif vstore8(convert_char8_rte(val), 0, output + out_offset + x * 8); } x *= 8; - for (; x < INPUT0_SIZE_X; x++) - output[out_offset + x] = convert_char(input[in_offset + x] * scale); + for (; x < INPUT0_SIZE_X; x++) { + half val = input[in_offset + x]; +#if ASYMMETRIC_QUANTIZATION + val *= scale; + val += zp; + output[out_offset + x] = convert_char_rte(val); +#else + val *= scale; + output[out_offset + x] = convert_char_rte(val); +#endif + } +#endif + } + } } output_scale[scale_idx] = 1.0h / scale; +#if ASYMMETRIC_QUANTIZATION && GROUP_SCALES_WITH_ZP + output_scale[scale_idx + 1] = zp; +#elif ASYMMETRIC_QUANTIZATION + output_zp[scale_idx] = zp; +#endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 70c55bfb73b8f5..ef4cc76650e0f3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -22,36 +22,47 @@ #if FC_KERNEL_DYNAMIC_QUANTIZE KERNEL(quantize_input)( const __global INPUT0_TYPE* input, - __global char* quantized_input, - __global INPUT0_TYPE* de_quan_scale) { + __global DQ_TYPE* quantized_input, + __global INPUT0_TYPE* quan_var +) { const uint offset = get_global_id(0); const uint input_offset = offset * QUANTIZE_GROUP_SIZE; const uint quantize_block = QUANTIZE_GROUP_SIZE / 4; - half4 input_0[quantize_block]; - char4 quantized_value[quantize_block]; - half max[quantize_block]; + MAKE_VECTOR_TYPE(INPUT0_TYPE, INPUT_LOAD_SIZE) input_0[quantize_block]; + MAKE_VECTOR_TYPE(DQ_TYPE, INPUT_LOAD_SIZE) quantized_value[quantize_block]; + INPUT0_TYPE max[quantize_block]; unroll_for (uint i = 0 ; i < quantize_block ; ++i) { input_0[i] = vload4(0, &input[input_offset + i * 4]); max[i] = fmax(fmax(fabs(input_0[i][0]), fabs(input_0[i][1])), fmax(fabs(input_0[i][2]), fabs(input_0[i][3]))); } - half max_value = 0.001; - for (uint i = 0 ; i < quantize_block; i+=8) { - half temp = fmax(fmax(fmax(max[i], max[i+1]), fmax(max[i+2], max[i+3])), + INPUT0_TYPE max_value = 0.001; + for (uint i = 0 ; i < quantize_block ; i+=8) { + INPUT0_TYPE temp = fmax(fmax(fmax(max[i], max[i+1]), fmax(max[i+2], max[i+3])), fmax(fmax(max[i+4], max[i+5]), fmax(max[i+6], max[i+7]))); max_value = fmax(max_value, temp); } - half quan_scale = max_value / 128; - - unroll_for (uint i = 0 ; i < quantize_block ; ++i) { - quantized_value[i] = CAT(convert_, MAKE_VECTOR_TYPE(char, INPUT_LOAD_SIZE))(input_0[i] / (half4)quan_scale); + half quan_scale = (half)max_value / 127; + #if COMPRESSED_WEIGHTS_INT8 + half quantized_sum = 0; + #endif + for (uint i = 0 ; i < quantize_block ; ++i) { + half4 buff = input_0[i] / (half4)quan_scale; + quantized_value[i] = CAT(CAT(convert_, MAKE_VECTOR_TYPE(DQ_TYPE, INPUT_LOAD_SIZE)), _rte)(buff); + #if COMPRESSED_WEIGHTS_INT8 + quantized_sum += (buff[0] + buff[1] + buff[2] + buff[3]); + #endif vstore4(quantized_value[i], 0, &quantized_input[input_offset + i * 4]); } - de_quan_scale[offset] = quan_scale; + // Pair of quantizing_scale and quantized activation_sum for each group + quan_var[offset * 2] = quan_scale; + #if COMPRESSED_WEIGHTS_INT8 + quan_var[(offset * 2) + 1] = quantized_sum; + #endif } #else // !FC_KERNEL_DYNAMIC_QUANTIZE @@ -189,6 +200,7 @@ inline void FUNC(fc_bf_tiled_kernel_default)( #else uint gid = (uint)get_group_id(0); #endif + uint sglid = (uint)get_sub_group_local_id(); // Dispatch as bs_fs_bsv_fsv, where bsv = DISPATCH_BSV and fsv = DISPATCH_FSV. @@ -212,10 +224,9 @@ inline void FUNC(fc_bf_tiled_kernel_default)( ACCUMULATOR_VEC_TYPE acc[TILE_B] = { }; INPUT_VEC_TYPE in_0[TILE_B] = { }; -#if !USE_SLM - FILTER_VEC_TYPE wei = 0; -#endif - + #if !USE_SLM || !COMPRESSED_WEIGHTS_INT4 + FILTER_VEC_TYPE wei = 0; + #endif #if OUTPUT_3D uint out_b0 = out_b / OUTPUT_FEATURE_NUM; @@ -743,19 +754,31 @@ inline void FUNC(fc_bf_tiled_kernel_default)( // ===================================================================================================================================== } + + + // Dyc Quantize #if USE_SLM && DYNAMIC_QUANTIZE -#define PACKED_DQ_TYPE int -#define DQ_VEC_TYPE MAKE_VECTOR_TYPE(DQ_TYPE, TILE_IFM) -#define DQ_SLM_FILTER_VEC MAKE_VECTOR_TYPE(DQ_TYPE, 4) + +#if COMPRESSED_WEIGHTS_INT4 + #define SLM_WEIGHT_TYPE DQ_TYPE +#else + #define SLM_WEIGHT_TYPE FILTER_TYPE +#endif + +#define PACKED_DQ_TYPE uint +#define ACCUM_DQ_TYPE int #define DQ_SLM_FILTER_PACKED_VEC MAKE_VECTOR_TYPE(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE) -#define DQ_SLM_FILTER_UNPACKED_VEC MAKE_VECTOR_TYPE(DQ_TYPE, FILTER_ELEMENTS_PER_LOAD) -#define DQ_FILTER_VEC_TYPE MAKE_VECTOR_TYPE(DQ_TYPE, TILE_K_OFM) +#define SLM_WEIGHT_VEC MAKE_VECTOR_TYPE(SLM_WEIGHT_TYPE, INPUT_LOAD_SIZE) +#define SLM_WEIGHT_UNPACKED_VEC MAKE_VECTOR_TYPE(SLM_WEIGHT_TYPE, FILTER_ELEMENTS_PER_LOAD) +#define WEIGHT_VEC_TYPE MAKE_VECTOR_TYPE(SLM_WEIGHT_TYPE, TILE_K_OFM) +#define MAKE_DQ_TYPE_VEC(x) MAKE_VECTOR_TYPE(DQ_TYPE, x) #define TO_DQ_TYPE(x) CAT(CAT(convert_, DQ_TYPE),_sat)(x) #define TO_DQ_VEC_TYPE(x) CAT(convert_, DQ_VEC_TYPE)(x) -#define TO_DQ_SLM_FILTER_UNPACKED_VEC(x) CAT(convert_, DQ_SLM_FILTER_UNPACKED_VEC)(x) -#define TO_DQ_FILTER_VEC_TYPE(x) CAT(convert_, DQ_FILTER_VEC_TYPE)(x) +#define TO_ACCUM_DQ_TYPE(x) CAT(convert_, ACCUM_DQ_TYPE)(x) +#define TO_SLM_WEIGHT_UNPACKED_VEC(x) CAT(convert_, SLM_WEIGHT_UNPACKED_VEC)(x) +#define TO_WEIGHT_VEC_TYPE(x) CAT(convert_, WEIGHT_VEC_TYPE)(x) #define AS_TYPE_N_(type, n, x) as_##type##n(x) #define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) @@ -764,8 +787,8 @@ inline void FUNC(fc_bf_tiled_kernel_default)( inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, - __global char* quantized_input, - __global INPUT0_TYPE* scale, + __global DQ_TYPE* quantized_input, + __global INPUT0_TYPE* quan_var, // pair of params for each quantizing group : scale, activation_sum #if DECOMPRESSION_SCALE_TERM const __global DECOMPRESSION_SCALE_TYPE* decompression_scale, #endif @@ -774,7 +797,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #endif __global OUTPUT_TYPE* output, const __global FILTER_TYPE* weights - , __local int* wei_local_mem + , __local uint* wei_local_mem #if BIAS_TERM , const __global BIAS_TYPE* biases #endif @@ -801,28 +824,32 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( uint out_f = gid * (TILE_OFM * SIMD); uint out_b = LWS_BATCHES * TILE_B * (uint)get_group_id(2) + local_id * TILE_B; -#if OUTPUT_3D - uint out_b0 = out_b / OUTPUT_FEATURE_NUM; - uint out_b1 = out_b % OUTPUT_FEATURE_NUM; - uint input_offset = out_b0 * INPUT0_BATCH_PITCH + out_b1 * INPUT0_FEATURE_PITCH + INPUT0_OFFSET; -#else - uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; -#endif + #if OUTPUT_3D + uint out_b0 = out_b / OUTPUT_FEATURE_NUM; + uint out_b1 = out_b % OUTPUT_FEATURE_NUM; + uint input_offset = out_b0 * INPUT0_BATCH_PITCH + out_b1 * INPUT0_FEATURE_PITCH + INPUT0_OFFSET; + #else + uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; + #endif -#if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 - const int power_of_two_for_simd = 5; - const int power_of_two_for_osv = 6; - const uint osv64_weight_base = (( (int) (out_f >> power_of_two_for_osv) ) << power_of_two_for_osv); - const uint osv_weight_stride = (INPUT_ELEMENTS_COUNT >> 1); - const uint out_f_offset = (int)((out_f >> power_of_two_for_simd) & 0x1) << power_of_two_for_simd; - // out_f(32) : 0 * osv_weight_stride + 32; - // out_f(64) : 64 * osv_weight_stride + 0; - // out_f(128) : 64 * osv_weight_stride + 32; - // ... - uint weights_offset = osv64_weight_base * osv_weight_stride + out_f_offset; -#else - uint weights_offset = out_f * (INPUT_ELEMENTS_COUNT / 2); -#endif + #if COMPRESSED_WEIGHTS_INT4 + #if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + const int power_of_two_for_simd = 5; + const int power_of_two_for_osv = 6; + const uint osv64_weight_base = (( (int) (out_f >> power_of_two_for_osv) ) << power_of_two_for_osv); + const uint osv_weight_stride = (INPUT_ELEMENTS_COUNT >> 1); + const uint out_f_offset = (int)((out_f >> power_of_two_for_simd) & 0x1) << power_of_two_for_simd; + // out_f(32) : 0 * osv_weight_stride + 32; + // out_f(64) : 64 * osv_weight_stride + 0; + // out_f(128) : 64 * osv_weight_stride + 32; + // ... + uint weights_offset = osv64_weight_base * osv_weight_stride + out_f_offset; + #else + uint weights_offset = out_f * (INPUT_ELEMENTS_COUNT / 2); + #endif + #else + uint weights_offset = out_f * INPUT_ELEMENTS_COUNT; + #endif ACCUMULATOR_VEC_TYPE acc[TILE_B] = { }; @@ -831,38 +858,42 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( PACKED_DQ_TYPE packed_in_0[HALF_TILE_B] = { }; // Packing char4 inputs to 1 integer INPUT0_TYPE de_quantize_scale[TILE_B]; -#if COMPRESSED_WEIGHTS && DECOMPRESSION_SCALE_GROUPS_NUM == 1 - #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) == 0 - ACCUMULATOR_VEC_TYPE d_scale = TO_ACCUMULATOR_VEC_TYPE(BLOCK_READN(DECOMPRESSION_SCALE_TYPE, TILE_OFM, decompression_scale, out_f)); - #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) != 0 - ACCUMULATOR_VEC_TYPE d_scale = 0; - unroll_for(uint of = 0; of < TILE_OFM; ++of) { - uint offset = out_f + of*SIMD + get_sub_group_local_id(); - if (offset < DECOMPRESSION_SCALE_LENGTH) - ((ACCUMULATOR_TYPE*)(&d_scale))[of] = decompression_scale[offset]; - } - #else - ACCUMULATOR_VEC_TYPE d_scale = decompression_scale[0]; + #if COMPRESSED_WEIGHTS_INT8 + INPUT0_TYPE activation_sum[TILE_B] = { }; #endif - ACCUMULATOR_TYPE* d_scales = (ACCUMULATOR_TYPE*)(&d_scale); -#endif + #if COMPRESSED_WEIGHTS && DECOMPRESSION_SCALE_GROUPS_NUM == 1 + #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) == 0 + ACCUMULATOR_VEC_TYPE d_scale = TO_ACCUMULATOR_VEC_TYPE(BLOCK_READN(DECOMPRESSION_SCALE_TYPE, TILE_OFM, decompression_scale, out_f)); + #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) != 0 + ACCUMULATOR_VEC_TYPE d_scale = 0; + unroll_for(uint of = 0; of < TILE_OFM; ++of) { + uint offset = out_f + of*SIMD + get_sub_group_local_id(); + if (offset < DECOMPRESSION_SCALE_LENGTH) + ((ACCUMULATOR_TYPE*)(&d_scale))[of] = decompression_scale[offset]; + } + #else + ACCUMULATOR_VEC_TYPE d_scale = decompression_scale[0]; + #endif -#if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 && !DECOMPRESSION_ZP_SCALAR - #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) == 0 - ACCUMULATOR_VEC_TYPE d_zp = TO_ACCUMULATOR_VEC_TYPE(BLOCK_READN(DECOMPRESSION_ZP_TYPE, TILE_OFM, decompression_zp, out_f)); - #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) != 0 - ACCUMULATOR_VEC_TYPE d_zp = 0; - unroll_for(uint of = 0; of < TILE_OFM; ++of) { - uint offset = out_f + of*SIMD + get_sub_group_local_id(); - if (offset < DECOMPRESSION_ZP_LENGTH) - ((ACCUMULATOR_TYPE*)(&d_zp))[of] = decompression_zp[offset]; - } - #else - ACCUMULATOR_VEC_TYPE d_zp = decompression_zp[0]; + ACCUMULATOR_TYPE* d_scales = (ACCUMULATOR_TYPE*)(&d_scale); + #endif + + #if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 && !DECOMPRESSION_ZP_SCALAR + #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) == 0 + ACCUMULATOR_VEC_TYPE d_zp = TO_ACCUMULATOR_VEC_TYPE(BLOCK_READN(DECOMPRESSION_ZP_TYPE, TILE_OFM, decompression_zp, out_f)); + #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) != 0 + ACCUMULATOR_VEC_TYPE d_zp = 0; + unroll_for(uint of = 0; of < TILE_OFM; ++of) { + uint offset = out_f + of*SIMD + get_sub_group_local_id(); + if (offset < DECOMPRESSION_ZP_LENGTH) + ((ACCUMULATOR_TYPE*)(&d_zp))[of] = decompression_zp[offset]; + } + #else + ACCUMULATOR_VEC_TYPE d_zp = decompression_zp[0]; + #endif + ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif - ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); -#endif // ===================================================================================================================================== // Main computation loop @@ -871,7 +902,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( uint idx_sglid = (sglid * TILE_K) % TILE_IFM_ELEMENTS_SIZE; // same index for sglid 0~7 : to tile_k direction uint batch_sglid = (sglid * TILE_K) / TILE_IFM_ELEMENTS_SIZE; // 0 to 1 : to batch direction - const uint scale_pitch = TILE_IN_B_PITCH / QUANTIZE_GROUP_SIZE; + const uint scale_pitch = (TILE_IN_B_PITCH / QUANTIZE_GROUP_SIZE); MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { }; __attribute__((opencl_unroll_hint(1))) for (uint ni = 0; ni < iterations; ++ni) { @@ -881,14 +912,20 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // Load quantizing info from pre-quantizing kernel tiled_input_0[bi] = vload4(0, &quantized_input[in_offset]); // Packing : Get 4(B)x4(K) integer vector (packing to 4x1 vector) - packed_in_0[bi] = as_int(tiled_input_0[bi]); + packed_in_0[bi] = as_uint(tiled_input_0[bi]); // Next batch in_offset += (TILE_IN_B_PITCH * 2); #if NUM_LOOP_IN_DYN_QUAN_GROUP == 1 - de_quantize_scale[bi * 2] = scale[scale_offset]; - de_quantize_scale[bi * 2 + 1] = scale[scale_offset+ scale_pitch]; + de_quantize_scale[bi * 2] = quan_var[scale_offset * 2]; + de_quantize_scale[bi * 2 + 1] = quan_var[scale_offset * 2 + scale_pitch * 2]; + #if COMPRESSED_WEIGHTS_INT8 + // Need additional accumulation of quantized activation along the dyn-quan group + // to use i8 multiplier for int8 weight + activation_sum[bi * 2] = quan_var[scale_offset * 2 + 1]; + activation_sum[bi * 2 + 1] = quan_var[scale_offset * 2 + 1 + scale_pitch * 2]; + #endif scale_offset += (scale_pitch * 2); #endif } @@ -896,7 +933,10 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #if NUM_LOOP_IN_DYN_QUAN_GROUP > 1 if (ni % NUM_LOOP_IN_DYN_QUAN_GROUP == 0) { unroll_for (uint bi = 0; bi < TILE_B; ++bi) { - de_quantize_scale[bi] = scale[scale_offset]; + de_quantize_scale[bi] = quan_var[scale_offset * 2]; + #if COMPRESSED_WEIGHTS_INT8 + activation_sum[bi] = quan_var[scale_offset * 2 + 1]; + #endif scale_offset += scale_pitch; } } @@ -916,49 +956,64 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( barrier(CLK_LOCAL_MEM_FENCE); #endif - __local int* char_slm_weight = (__local int*)wei_local_mem; + __local uint* char_slm_weight = (__local uint*)wei_local_mem; - #if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 - uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_LOAD_BLOCK_SIZE * 2; + #if COMPRESSED_WEIGHTS_INT4 + #if FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_LOAD_BLOCK_SIZE * 2; + #else + uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_ACTUAL_LOAD_BLOCK_SIZE; + #endif #else - uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * FILTER_ACTUAL_LOAD_BLOCK_SIZE; + uint weights_idx = weights_offset + local_id * SIMD * FILTER_LOAD_ITERS * TILE_K_OFM_PACKED; #endif uint wei_local_idx = local_id * SIMD * FILTER_LOAD_ITERS * (FILTER_LOAD_BLOCK_SIZE/2) + sglid * 2; - // DECOMPRESSION_SCALE_POST_OP SHOULD be enabled for dynamic quantize FC : scale is ACCUMULATOR_VAL_ONE + #if COMPRESSED_WEIGHTS_INT8 + ACCUMULATOR_TYPE wei_zp[TILE_OFM] = { }; + #endif + + // DQ_DECOMPRESSION_SCALE_POST_OP SHOULD be enabled for dynamic quantize FC : scale is ACCUMULATOR_VAL_ONE unroll_for(uint load_iter = 0; load_iter < FILTER_LOAD_ITERS; ++load_iter) { - #if FILTER_LAYOUT_OS_IYX_OSV16 - SLM_FILTER_PACKED_VEC wei_packed0 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, weights_idx); - SLM_FILTER_PACKED_VEC wei_packed1 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, (weights_idx + ((IFM_SIZE / 2) * 16))); - DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked; - // loaded weights 'wei_packed' of os_iyx_osv16 format have continuous values along TILE_K. So no need to transpose while unpacking - dq_wei_unpacked.s0123 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0)); - dq_wei_unpacked.s4567 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1)); - #elif FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 - SLM_FILTER_PACKED_VEC wei_packed0 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, weights_idx); - SLM_FILTER_PACKED_VEC wei_packed1 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, (weights_idx + (FILTER_LOAD_BLOCK_SIZE * SIMD))); - DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked; - DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked_tmp; - dq_wei_unpacked_tmp.s0123 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0)); - dq_wei_unpacked_tmp.s4567 = UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1)); - dq_wei_unpacked.s01 = dq_wei_unpacked_tmp.s01; - dq_wei_unpacked.s23 = dq_wei_unpacked_tmp.s45; - dq_wei_unpacked.s45 = dq_wei_unpacked_tmp.s23; - dq_wei_unpacked.s67 = dq_wei_unpacked_tmp.s67; - #else - SLM_FILTER_PACKED_VEC wei_packed = BLOCK_READN(FILTER_TYPE, FILTER_LOAD_BLOCK_SIZE, weights, weights_idx); - DQ_SLM_FILTER_UNPACKED_VEC dq_wei_unpacked = UNPACK_TRANSPOSED_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD *)&wei_packed)); + #if COMPRESSED_WEIGHTS_INT4 + #if FILTER_LAYOUT_OS_IYX_OSV16 + SLM_FILTER_PACKED_VEC wei_packed0 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, weights_idx); + SLM_FILTER_PACKED_VEC wei_packed1 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, (weights_idx + ((IFM_SIZE / 2) * 16))); + SLM_WEIGHT_UNPACKED_VEC dq_wei_unpacked; + // loaded weights 'wei_packed' of os_iyx_osv16 format have continuous values along TILE_K. So no need to transpose while unpacking + dq_wei_unpacked.s0123 = (UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0))); + dq_wei_unpacked.s4567 = (UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1))); + #elif FILTER_LAYOUT_OS_IS_YX_OSV64_ISV2 + SLM_FILTER_PACKED_VEC wei_packed0 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, weights_idx); + SLM_FILTER_PACKED_VEC wei_packed1 = BLOCK_READN(FILTER_TYPE, FILTER_ACTUAL_LOAD_BLOCK_SIZE, weights, (weights_idx + (FILTER_LOAD_BLOCK_SIZE * SIMD))); + SLM_WEIGHT_UNPACKED_VEC dq_wei_unpacked; + SLM_WEIGHT_UNPACKED_VEC dq_wei_unpacked_tmp; + dq_wei_unpacked_tmp.s0123 = (UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed0))); + dq_wei_unpacked_tmp.s4567 = (UNPACK_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD*)&wei_packed1))); + dq_wei_unpacked.s01 = dq_wei_unpacked_tmp.s01; + dq_wei_unpacked.s23 = dq_wei_unpacked_tmp.s45; + dq_wei_unpacked.s45 = dq_wei_unpacked_tmp.s23; + dq_wei_unpacked.s67 = dq_wei_unpacked_tmp.s67; + #else + SLM_FILTER_PACKED_VEC wei_packed = BLOCK_READN(FILTER_TYPE, FILTER_LOAD_BLOCK_SIZE, weights, weights_idx); + SLM_WEIGHT_UNPACKED_VEC dq_wei_unpacked = (UNPACK_TRANSPOSED_INT4(DQ_TYPE, *((INT4_PACKED_TYPE_PRELOAD *)&wei_packed))); + #endif + #else // COMPRESSED_WEIGHTS_INT8 + SLM_WEIGHT_UNPACKED_VEC dq_wei_unpacked; + WEIGHT_VEC_TYPE wei_packed = TO_WEIGHT_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_idx)); + dq_wei_unpacked.s0123 = wei_packed.s0246; + dq_wei_unpacked.s4567 = wei_packed.s1357; #endif - // Calculate zero-point and scale only for DECOMPRESSION_SCALE_POST_OP enabled + // Calculate zero-point and scale only for DQ_DECOMPRESSION_SCALE_POST_OP enabled // Calculate weight : w = (w - dzp) * ds // if DECOMPRESSION_ZP_TERM is not enabled, then dzp is ACCUMULATOR_VAL_ZERO. - #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_TERM && !COMPRESSED_WEIGHTS_INT8 #if DECOMPRESSION_ZP_SCALAR - DQ_SLM_FILTER_UNPACKED_VEC dzp = (DQ_SLM_FILTER_UNPACKED_VEC)(DECOMPRESSION_ZP_VALUE); + SLM_WEIGHT_UNPACKED_VEC dzp = (SLM_WEIGHT_UNPACKED_VEC)(DECOMPRESSION_ZP_VALUE); dq_wei_unpacked -= dzp; #elif DECOMPRESSION_ZP_GROUPS_NUM > 1 - DQ_TYPE* w = (DQ_TYPE*)(&dq_wei_unpacked); + SLM_WEIGHT_TYPE* w = (SLM_WEIGHT_TYPE*)(&dq_wei_unpacked); const uint ni_offset = ni * TILE_IFM * SIMD + local_id * FILTER_LOAD_ITERS * FILTER_LOAD_BLOCK_SIZE; unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { const uint offset_ofm = out_f + fi*SIMD + sglid; @@ -966,11 +1021,11 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( const uint offset_ifm = ni_offset + load_iter * FILTER_LOAD_BLOCK_SIZE + kii; const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + (offset_ifm / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; - w[W_DYN_QUAN_IDX] = w[W_DYN_QUAN_IDX] - TO_DQ_TYPE(decompression_zp[zp_offset]); + w[W_DYN_QUAN_IDX] = w[W_DYN_QUAN_IDX] - CAT(CAT(convert_, SLM_WEIGHT_TYPE),_rte)(decompression_zp[zp_offset]); } } #else - DQ_TYPE* w = (DQ_TYPE*)(&dq_wei_unpacked); + SLM_WEIGHT_TYPE* w = (SLM_WEIGHT_TYPE*)(&dq_wei_unpacked); unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { unroll_for(uint kii = 0; kii < FILTER_LOAD_BLOCK_SIZE; ++kii) { w[W_DYN_QUAN_IDX] = w[W_DYN_QUAN_IDX] - d_zps[fi % DECOMPRESSION_ZP_LENGTH]; @@ -979,29 +1034,58 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #endif #endif + #if COMPRESSED_WEIGHTS_INT8 + unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { + #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_SCALAR + wei_zp[fi] = (TO_ACCUMULATOR_TYPE)(DECOMPRESSION_ZP_VALUE); + #elif DECOMPRESSION_ZP_GROUPS_NUM > 1 + #if FILTER_LOAD_BLOCK_SIZE % DECOMPRESSION_ZP_GROUP_SIZE != 0 + #error "FC bf_tiled kernel: Not support DECOMPRESSION_ZP_GROUPS_NUM > 1" + #endif + + const uint ni_offset = ni * TILE_IFM * SIMD + local_id * FILTER_LOAD_ITERS * FILTER_LOAD_BLOCK_SIZE; + const uint offset_ofm = out_f + fi*SIMD + sglid; + const uint offset_ifm = ni_offset + load_iter * FILTER_LOAD_BLOCK_SIZE; + const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + + (offset_ifm / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; + wei_zp[fi] = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); + #else + wei_zp[fi] = TO_ACCUMULATOR_TYPE(d_zps[fi % DECOMPRESSION_ZP_LENGTH]); + #endif + #else + wei_zp[fi] = ACCUMULATOR_VAL_ZERO; + #endif + } + #endif + #if FILTER_LOAD_BLOCK_SIZE == 2 - DQ_SLM_FILTER_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; - char_slm_weight[wei_local_idx] = as_int(wei_1); + SLM_WEIGHT_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; + char_slm_weight[wei_local_idx] = as_uint(wei_1); #elif FILTER_LOAD_BLOCK_SIZE == 4 - DQ_SLM_FILTER_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; - char_slm_weight[wei_local_idx] = as_int(wei_1); - DQ_SLM_FILTER_VEC wei_2 = {dq_wei_unpacked.s45, dq_wei_unpacked.s67}; - char_slm_weight[wei_local_idx+1] = as_int(wei_2); + SLM_WEIGHT_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; + char_slm_weight[wei_local_idx] = as_uint(wei_1); + SLM_WEIGHT_VEC wei_2 = {dq_wei_unpacked.s45, dq_wei_unpacked.s67}; + char_slm_weight[wei_local_idx+1] = as_uint(wei_2); #elif FILTER_LOAD_BLOCK_SIZE == 8 - DQ_SLM_FILTER_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; - char_slm_weight[wei_local_idx] = as_int(wei_1); - DQ_SLM_FILTER_VEC wei_2 = {dq_wei_unpacked.s45, dq_wei_unpacked.s67}; - char_slm_weight[wei_local_idx+1] = as_int(wei_2); - DQ_SLM_FILTER_VEC wei_3 = {dq_wei_unpacked.s89, dq_wei_unpacked.sab}; - char_slm_weight[wei_local_idx+2] = as_int(wei_3); - DQ_SLM_FILTER_VEC wei_4 = {dq_wei_unpacked.scd, dq_wei_unpacked.sef}; - char_slm_weight[wei_local_idx+3] = as_int(wei_4); + SLM_WEIGHT_VEC wei_1 = {dq_wei_unpacked.s01, dq_wei_unpacked.s23}; + char_slm_weight[wei_local_idx] = as_uint(wei_1); + SLM_WEIGHT_VEC wei_2 = {dq_wei_unpacked.s45, dq_wei_unpacked.s67}; + char_slm_weight[wei_local_idx+1] = as_uint(wei_2); + SLM_WEIGHT_VEC wei_3 = {dq_wei_unpacked.s89, dq_wei_unpacked.sab}; + char_slm_weight[wei_local_idx+2] = as_uint(wei_3); + SLM_WEIGHT_VEC wei_4 = {dq_wei_unpacked.scd, dq_wei_unpacked.sef}; + char_slm_weight[wei_local_idx+3] = as_uint(wei_4); #else #error "FC bf_tiled kernel: unsupported FILTER_LOAD_BLOCK_SIZE for SLM kernel" #endif wei_local_idx += SIMD * (FILTER_LOAD_BLOCK_SIZE/2); - weights_idx += SIMD * FILTER_ACTUAL_LOAD_BLOCK_SIZE; + #if COMPRESSED_WEIGHTS_INT8 + weights_idx += SIMD * TILE_K_OFM_PACKED; + #else + weights_idx += SIMD * FILTER_ACTUAL_LOAD_BLOCK_SIZE; + #endif } wei_local_idx = sglid * 2; @@ -1014,11 +1098,11 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #endif // Compute input * weight : packed char4 type - char8 weight = vload8(0, (__local char *)(&char_slm_weight[wei_local_idx + 16*2*ki])); - char4 first_weight = weight.s0123; - char4 second_weight = weight.s4567; + WEIGHT_VEC_TYPE weight = vload8(0, (__local SLM_WEIGHT_TYPE *)(&char_slm_weight[wei_local_idx + 16*2*ki])); + SLM_WEIGHT_VEC first_weight = weight.s0123; + SLM_WEIGHT_VEC second_weight = weight.s4567; unroll_for (uint bi = 0; bi < TILE_B; ++bi) { - char4 input_val = as_char4(_sub_group_shuffle(packed_in_0[bi / 2], (bi % 2) * 8 + ki)); + MAKE_DQ_TYPE_VEC(4) input_val = AS_DQ_TYPE_4(_sub_group_shuffle(packed_in_0[bi / 2], (bi % 2) * 8 + ki)); acc_tmp[0][bi] = imad_SW(acc_tmp[0][bi], input_val, first_weight); acc_tmp[1][bi] = imad_SW(acc_tmp[1][bi], input_val, second_weight); } @@ -1038,7 +1122,12 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; #endif - ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += convert_half(((int *)(&acc_tmp[fi]))[bi]) * ds * de_quantize_scale[bi]; + #if COMPRESSED_WEIGHTS_INT8 + ACCUM_DQ_TYPE modified_calc_buff = ((int *)(&acc_tmp[fi]))[bi] - ((float)(wei_zp[fi]) * (convert_float)(activation_sum[bi])); + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += (convert_half)(convert_float(modified_calc_buff) * (float)ds * (float)de_quantize_scale[bi]); + #else + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += convert_half(((int *)(&acc_tmp[fi]))[bi]) * ds * de_quantize_scale[bi]; + #endif acc_tmp[fi][bi] = 0; } } @@ -1060,7 +1149,12 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; #endif - ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += convert_half(((int *)(&acc_tmp[fi]))[bi]) * ds * de_quantize_scale[bi]; + #if COMPRESSED_WEIGHTS_INT8 + ACCUM_DQ_TYPE modified_calc_buff = ((int *)(&acc_tmp[fi]))[bi] - ((float)(wei_zp[fi]) * (convert_float)(activation_sum[bi])); + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += (convert_half)(convert_float(modified_calc_buff) * (float)ds * (float)de_quantize_scale[bi]); + #else + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += convert_half(((int *)(&acc_tmp[fi]))[bi]) * ds * de_quantize_scale[bi]; + #endif acc_tmp[fi][bi] = 0; } } @@ -1169,13 +1263,13 @@ KERNEL(fc)( , FUSED_OPS_DECLS #endif #if DYNAMIC_QUANTIZE - , __global char* quantized_input - , __global INPUT0_TYPE* de_quan_scale + , __global DQ_TYPE* quantized_input + , __global INPUT0_TYPE* quan_var #endif ) { #if USE_SLM #if DYNAMIC_QUANTIZE - __local int dq_wei_local_mem[SIMD * TILE_OFM * SIMD]; + __local uint dq_wei_local_mem[SIMD * TILE_OFM * SIMD]; #else __local ACCUMULATOR_TYPE wei_local_mem[TILE_IFM * SIMD * TILE_OFM * SIMD]; #endif @@ -1321,7 +1415,7 @@ KERNEL(fc)( OPTIONAL_SHAPE_INFO_TENSOR input, quantized_input, - de_quan_scale, + quan_var, #if DECOMPRESSION_SCALE_TERM decompression_scale, #endif @@ -1368,7 +1462,7 @@ KERNEL(fc)( OPTIONAL_SHAPE_INFO_TENSOR input, quantized_input, - de_quan_scale, + quan_var, #if DECOMPRESSION_SCALE_TERM decompression_scale, #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl index 68d778475f5601..99ff124e3a39f9 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl @@ -59,6 +59,13 @@ inline char4 unpack_to_char(uint4x4_t v) __attribute__((overloadable)) { return (char4)(v0.s0, v0.s1, v1.s0, v1.s1); } +inline uchar4 unpack_to_uchar(uint4x4_t v) __attribute__((overloadable)) { + uchar2 v0 = unpack_to_uchar(v.s0); + uchar2 v1 = unpack_to_uchar(v.s1); + return (uchar4)(v0.s0, v0.s1, v1.s0, v1.s1); +} + + inline char4 unpack_transposed_to_char(int4x4_t v) __attribute__((overloadable)) { char2 v0 = unpack_to_char(v.s0); char2 v1 = unpack_to_char(v.s1); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_kv_cache_update_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_kv_cache_update_ref.cl index ef2f78496b2cf2..8426baf719f990 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_kv_cache_update_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_kv_cache_update_ref.cl @@ -34,10 +34,14 @@ KERNEL(pa_kv_cache_update)( const uint seq_block_idx = block_indices_begins[seq_idx] + seq_len / PAGED_ATTENTION_BLOCK_SIZE; const uint block_idx = block_indices[seq_block_idx]; - uint key_value_in_offset = seq_idx * KV_HEADS_NUM * HEAD_SIZE + head_idx * HEAD_SIZE; + uint key_in_offset = INPUT0_OFFSET + + seq_idx * (KV_HEADS_NUM * HEAD_SIZE + INPUT0_PAD_BEFORE_FEATURE_NUM + INPUT0_PAD_AFTER_FEATURE_NUM) + + head_idx * HEAD_SIZE; + uint value_in_offset = INPUT1_OFFSET + + seq_idx * (KV_HEADS_NUM * HEAD_SIZE + INPUT1_PAD_BEFORE_FEATURE_NUM + INPUT1_PAD_AFTER_FEATURE_NUM) + + head_idx * HEAD_SIZE; uint key_out_offset = block_idx * KV_HEADS_NUM * HEAD_SIZE * PAGED_ATTENTION_BLOCK_SIZE + head_idx * HEAD_SIZE * PAGED_ATTENTION_BLOCK_SIZE + current_token_pos_in_block; - uint value_out_offset = block_idx * KV_HEADS_NUM * HEAD_SIZE * PAGED_ATTENTION_BLOCK_SIZE + head_idx * HEAD_SIZE * PAGED_ATTENTION_BLOCK_SIZE + current_token_pos_in_block * HEAD_SIZE; #define READ_BLOCK_SIZE GENERATE_STAGE_BLOCK_SIZE @@ -45,7 +49,7 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; @@ -56,7 +60,7 @@ KERNEL(pa_kv_cache_update)( #endif } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -83,8 +87,13 @@ KERNEL(pa_kv_cache_update)( const uint token_start_pos = (past_len + block_start_pos - subsequence_begin_idx) % PAGED_ATTENTION_BLOCK_SIZE; - uint key_value_in_offset = block_start_pos * KV_HEADS_NUM * HEAD_SIZE + - head_idx * HEAD_SIZE; + uint key_in_offset = INPUT0_OFFSET + + block_start_pos * (KV_HEADS_NUM * HEAD_SIZE + INPUT0_PAD_AFTER_FEATURE_NUM + INPUT0_PAD_BEFORE_FEATURE_NUM) + + head_idx * HEAD_SIZE; + + uint value_in_offset = INPUT1_OFFSET + + block_start_pos * (KV_HEADS_NUM * HEAD_SIZE + INPUT1_PAD_AFTER_FEATURE_NUM + INPUT1_PAD_BEFORE_FEATURE_NUM) + + head_idx * HEAD_SIZE; const uint current_block_idx = (past_len + block_start_pos - subsequence_begin_idx) / PAGED_ATTENTION_BLOCK_SIZE; @@ -106,14 +115,14 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; key_cache_data[key_offset] = input_data[i]; } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -126,14 +135,14 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; key_cache_data[key_offset] = input_data[i]; } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -146,14 +155,14 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; key_cache_data[key_offset] = input_data[i]; } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -166,14 +175,14 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; key_cache_data[key_offset] = input_data; } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -181,7 +190,8 @@ KERNEL(pa_kv_cache_update)( } } - key_value_in_offset += KV_HEADS_NUM * HEAD_SIZE; + key_in_offset += (KV_HEADS_NUM * HEAD_SIZE + INPUT0_PAD_AFTER_FEATURE_NUM + INPUT0_PAD_BEFORE_FEATURE_NUM); + value_in_offset += (KV_HEADS_NUM * HEAD_SIZE + INPUT1_PAD_AFTER_FEATURE_NUM + INPUT1_PAD_BEFORE_FEATURE_NUM); key_out_offset += 1; value_out_offset += HEAD_SIZE; } @@ -194,14 +204,14 @@ KERNEL(pa_kv_cache_update)( #define BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, READ_BLOCK_SIZE, ptr, offset); #define DATA_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, READ_BLOCK_SIZE) - DATA_VEC input_data = BLOCK_READ(key_data, key_value_in_offset + head_idx_index); + DATA_VEC input_data = BLOCK_READ(key_data, key_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint key_offset = key_out_offset + (head_idx_index + sglid + SUBGROUP_SIZE * i) * PAGED_ATTENTION_BLOCK_SIZE; key_cache_data[key_offset] = input_data; } - input_data = BLOCK_READ(value_data, key_value_in_offset + head_idx_index); + input_data = BLOCK_READ(value_data, value_in_offset + head_idx_index); unroll_for (uint i = 0; i < READ_BLOCK_SIZE; i++) { uint value_offset = value_out_offset + head_idx_index + sglid + SUBGROUP_SIZE * i; @@ -209,7 +219,8 @@ KERNEL(pa_kv_cache_update)( } } - key_value_in_offset += KV_HEADS_NUM * HEAD_SIZE; + key_in_offset += (KV_HEADS_NUM * HEAD_SIZE + INPUT0_PAD_AFTER_FEATURE_NUM + INPUT0_PAD_BEFORE_FEATURE_NUM); + value_in_offset += (KV_HEADS_NUM * HEAD_SIZE + INPUT1_PAD_AFTER_FEATURE_NUM + INPUT1_PAD_BEFORE_FEATURE_NUM); key_out_offset += 1; value_out_offset += HEAD_SIZE; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl index 38066b4461def4..133440a21301f2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl @@ -28,14 +28,11 @@ KERNEL(rope_ref)( uint r = rf < HALF_ROTARY_NDIMS ? rf * 2 : 0; uint f = rf < HEAD_SIZE - ROTARY_NDIMS ? rf * 2 : 0; -#ifdef ENABLE_SLICE - uint input_idx = GET_DATA_INDEX(SLICED_INPUT0, p, b, h * HEAD_SIZE, 0); - - input_idx += SLICED_FROM_START * (p * INPUT0_FEATURE_NUM + b + 1) - + SLICED_FROM_END * (p * INPUT0_FEATURE_NUM + b); -#else uint input_idx = INPUT0_GET_INDEX(p, b, h * HEAD_SIZE, 0); +#ifdef ENABLE_SLICE + input_idx += SLICED_FROM_START; #endif + uint cos_sin_p = p < INPUT1_BATCH_NUM ? p : 0; uint cos_sin_b = b < INPUT1_FEATURE_NUM ? b : 0; uint cos_sin_idx = INPUT1_GET_INDEX(cos_sin_p, cos_sin_b, 0, 0); @@ -69,14 +66,11 @@ KERNEL(rope_ref)( const uint h = (uint)get_global_id(2) / HALF_ROTARY_NDIMS; const uint r = (uint)get_global_id(2) % HALF_ROTARY_NDIMS; -#ifdef ENABLE_SLICE - uint input_idx = GET_DATA_INDEX(SLICED_INPUT0, b, p, h * HEAD_SIZE, 0); - - input_idx += SLICED_FROM_START * (b * INPUT0_FEATURE_NUM + p + 1) - + SLICED_FROM_END * (b * INPUT0_FEATURE_NUM + p); -#else uint input_idx = INPUT0_GET_INDEX(b, p, h * HEAD_SIZE, 0); +#ifdef ENABLE_SLICE + input_idx += SLICED_FROM_START; #endif + uint cos_sin_b = b < INPUT1_BATCH_NUM ? b : 0; uint cos_sin_p = p + INPUT1_FEATURE_NUM - INPUT0_FEATURE_NUM < INPUT1_FEATURE_NUM ? p + INPUT1_FEATURE_NUM - INPUT0_FEATURE_NUM : 0; uint cos_sin_h = h < INPUT1_SIZE_Y ? h : 0; @@ -119,15 +113,13 @@ KERNEL(rope_ref)( const uint p = (uint)get_global_id(2) / HALF_ROTARY_NDIMS; const uint r = (uint)get_global_id(2) % HALF_ROTARY_NDIMS; -#ifdef ENABLE_SLICE - uint input_idx = GET_DATA_INDEX(SLICED_INPUT0, b, h, p, 0); - - input_idx += SLICED_FROM_START * (b * INPUT0_FEATURE_NUM + h + 1) - + SLICED_FROM_END * (b * INPUT0_FEATURE_NUM + h); -#elif ENABLE_TRANSPOSE - uint input_idx = GET_DATA_INDEX(TRANSPOSED_INPUT0, b, h, p, 0); +#if ENABLE_TRANSPOSE + uint input_idx = INPUT0_GET_INDEX(b, p, h, 0); #else uint input_idx = INPUT0_GET_INDEX(b, h, p, 0); +#ifdef ENABLE_SLICE + input_idx += SLICED_FROM_START; +#endif #endif uint cos_sin_b = b < INPUT1_BATCH_NUM ? b : 0; diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl index 748f79115262e0..55f87e4189d9fe 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl @@ -118,12 +118,21 @@ inline uint FUNC(get_bt_index_value)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uin #define VALUE_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT2_TYPE, 1, ptr, offset) #define SUBGROUPS_PER_WG (HEAD_SIZE * SG_SCALE_FACTOR / SUBGROUP_SIZE) +#if IS_KV_COMPRESSED +#if COMPRESSED_PER_HEAD + #define GET_COMPRESSION_INDEX(INPUT, b, f, y, x) GET_DATA_INDEX(INPUT, (b), (f), (y), (0)); +#else + #define GET_COMPRESSION_INDEX(INPUT, b, f, y, x) GET_DATA_INDEX(INPUT, (b), (0), (y), (0)); +#endif +#endif + #ifdef SDPA_STAGE_0 #if TARGET_SEQ_LEN_BLOCK_SIZE == 1 /* This version is used for 2nd token */ REQD_SUB_GROUP_SIZE(SUBGROUP_SIZE) +__attribute__((reqd_work_group_size(1, 1, HEAD_SIZE * SG_SCALE_FACTOR))) KERNEL(sdpa_opt)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* query_input, @@ -136,6 +145,10 @@ KERNEL(sdpa_opt)( const __global INPUT4_TYPE* scale, #endif __global OUTPUT_TYPE* output, +#if IS_KV_COMPRESSED + const __global KEY_COMPRESSION_SCALE_TYPE* key_scale, + const __global VALUE_COMPRESSION_SCALE_TYPE* val_scale, +#endif #ifdef BEAM_TABLE_TYPE const __global BEAM_TABLE_TYPE* beam_table, #endif @@ -149,7 +162,18 @@ KERNEL(sdpa_opt)( const uint b1_idx = batch_idx % NUM_HEADS; /* HEADS_NUM dim */ const uint target_seq_idx = get_global_id(1); const uint lid = get_local_id(2); + +#if SG_SCALE_FACTOR == 2 + const uint head_size_idx = lid % HEAD_SIZE; +#elif SG_SCALE_FACTOR == 1 const uint head_size_idx = lid; +#else + #error "sdpa_opt.cl: Unsupported scale factor" +#endif + +#if SUBGROUPS_PER_WG > SUBGROUP_SIZE + #error "sdpa_opt.cl: Number of subgroups per work group should be less than subgroup_size +#endif const uint sgid = get_sub_group_id(); const uint sglid = get_sub_group_local_id(); @@ -166,7 +190,7 @@ KERNEL(sdpa_opt)( // SLM for query inputs __local INPUT0_TYPE query_local[HEAD_SIZE * TARGET_SEQ_LEN_BLOCK_SIZE]; // SLM for intermediate QK results - __local OUTPUT_TYPE qk_local[SEQ_LEN_PARTITION_SIZE * TARGET_SEQ_LEN_BLOCK_SIZE]; + __local SOFTMAX_ACCUMULATOR_TYPE qk_local[SEQ_LEN_PARTITION_SIZE * TARGET_SEQ_LEN_BLOCK_SIZE]; // SLM buffers for SoftMax calculation and qk_max/qk_sums results aggregation across all WG __local SOFTMAX_ACCUMULATOR_TYPE qk_max_vals[SUBGROUPS_PER_WG * TARGET_SEQ_LEN_BLOCK_SIZE]; __local SOFTMAX_ACCUMULATOR_TYPE qk_sum_vals[SUBGROUPS_PER_WG * TARGET_SEQ_LEN_BLOCK_SIZE]; @@ -199,13 +223,19 @@ KERNEL(sdpa_opt)( uint query_offset = INPUT0_GET_INDEX(b0_idx, b1_idx, target_seq_idx, (sgid * SUBGROUP_SIZE)); const uint query_pitch = QUERY_STEP_LOCAL; #endif - for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { - #define QUERY_BLOCK_SIZE 1 +#if SG_SCALE_FACTOR == 2 + if (sgid < HEAD_SIZE / SUBGROUP_SIZE) { +#else + { +#endif + for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { + #define QUERY_BLOCK_SIZE 1 - INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, QUERY_BLOCK_SIZE, query_input, query_offset); - query_local[query_local_offset] = val * scale_val; - query_local_offset += QUERY_STEP_LOCAL; - query_offset += query_pitch; + INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, QUERY_BLOCK_SIZE, query_input, query_offset); + query_local[query_local_offset] = val * scale_val; + query_local_offset += QUERY_STEP_LOCAL; + query_offset += query_pitch; + } } #undef QUERY_BLOCK_SIZE #undef QUERY_STEP @@ -216,28 +246,45 @@ KERNEL(sdpa_opt)( // Main Gemm1 calculation loop // Each SG performs element-wise multiplications of Q[HEAD_SIZE]xK[HEAD_SIZE] values // HEAD_SIZE / SUBGROUPS_PER_WG times in the loop and saves the result to the qk_local SLM buffer - for (uint seq_len = sgid; seq_len < partition_seq_len; seq_len += (HEAD_SIZE / SUBGROUP_SIZE)) { -#ifdef INPUT1_DIMS_ORDER + for (uint seq_len = sgid; seq_len < partition_seq_len; seq_len += (HEAD_SIZE / SUBGROUP_SIZE) * SG_SCALE_FACTOR) { #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_key)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + seq_len, 0)]; #else const uint b_idx = b0_idx; #endif - const uint key_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + seq_len, 0); + +#ifdef INPUT1_DIMS_ORDER + uint key_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + seq_len, 0); #else - const uint key_offset = INPUT1_GET_INDEX(b0_idx, b1_idx, start_partition_idx + seq_len, 0); + uint key_offset = INPUT1_GET_INDEX(b_idx, b1_idx, start_partition_idx + seq_len, 0); #endif - INPUT0_TYPE acc[TARGET_SEQ_LEN_BLOCK_SIZE] = {INPUT0_VAL_ZERO}; + SOFTMAX_ACCUMULATOR_TYPE acc[TARGET_SEQ_LEN_BLOCK_SIZE] = {SOFTMAX_ACCUMULATOR_VAL_ZERO}; +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(KEY_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + seq_len, 0); + KEY_COMPRESSION_SCALE_TYPE comp_scale = key_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + KEY_COMPRESSION_SCALE_TYPE comp_zp = key_scale[comp_offset + 1]; +#endif +#endif uint head_idx_index = 0; #define KEY_BLOCK_SIZE 8 for (; head_idx_index + (KEY_BLOCK_SIZE * SUBGROUP_SIZE) <= HEAD_SIZE; head_idx_index += SUBGROUP_SIZE * KEY_BLOCK_SIZE) { #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, KEY_BLOCK_SIZE, ptr, offset); #define KEY_BLOCK MAKE_VECTOR_TYPE(INPUT1_TYPE, KEY_BLOCK_SIZE) + #define KEY_BLOCK_UNCOMPRESSED MAKE_VECTOR_TYPE(KEY_COMPRESSION_SCALE_TYPE, KEY_BLOCK_SIZE) + #define TO_KEY_BLOCK_UNCOMPRESSED_TYPE(val) CAT(convert_, KEY_BLOCK_UNCOMPRESSED)(val) #define QUERY_BLOCK MAKE_VECTOR_TYPE(INPUT0_TYPE, KEY_BLOCK_SIZE) - KEY_BLOCK key_vals = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); + KEY_BLOCK key_vec_packed = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed) - comp_zp) * comp_scale; +#elif IS_KV_COMPRESSED + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed)) * comp_scale; +#else + KEY_BLOCK key_vals = key_vec_packed; +#endif uint query_offset = head_idx_index + sglid; unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { @@ -247,7 +294,7 @@ KERNEL(sdpa_opt)( } unroll_for(uint i = 0; i < KEY_BLOCK_SIZE; i++) { - acc[seq_idx] = mad(query_vals_reg[i], key_vals[i], acc[seq_idx]); + acc[seq_idx] = mad(TO_SOFTMAX_ACCUMULATOR_TYPE(query_vals_reg[i]), TO_SOFTMAX_ACCUMULATOR_TYPE(key_vals[i]), acc[seq_idx]); } query_offset += HEAD_SIZE; @@ -258,9 +305,18 @@ KERNEL(sdpa_opt)( for (; head_idx_index + (KEY_BLOCK_SIZE * SUBGROUP_SIZE) <= HEAD_SIZE; head_idx_index += SUBGROUP_SIZE * KEY_BLOCK_SIZE) { #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, KEY_BLOCK_SIZE, ptr, offset); #define KEY_BLOCK MAKE_VECTOR_TYPE(INPUT1_TYPE, KEY_BLOCK_SIZE) + #define KEY_BLOCK_UNCOMPRESSED MAKE_VECTOR_TYPE(KEY_COMPRESSION_SCALE_TYPE, KEY_BLOCK_SIZE) + #define TO_KEY_BLOCK_UNCOMPRESSED_TYPE(val) CAT(convert_, KEY_BLOCK_UNCOMPRESSED)(val) #define QUERY_BLOCK MAKE_VECTOR_TYPE(INPUT0_TYPE, KEY_BLOCK_SIZE) - KEY_BLOCK key_vals = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); + KEY_BLOCK key_vec_packed = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed) - comp_zp) * comp_scale; +#elif IS_KV_COMPRESSED + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed)) * comp_scale; +#else + KEY_BLOCK key_vals = key_vec_packed; +#endif uint query_offset = head_idx_index + sglid; unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { @@ -270,7 +326,7 @@ KERNEL(sdpa_opt)( } unroll_for(uint i = 0; i < KEY_BLOCK_SIZE; i++) { - acc[seq_idx] = mad(query_vals_reg[i], key_vals[i], acc[seq_idx]); + acc[seq_idx] = mad(TO_SOFTMAX_ACCUMULATOR_TYPE(query_vals_reg[i]), TO_SOFTMAX_ACCUMULATOR_TYPE(key_vals[i]), acc[seq_idx]); } query_offset += HEAD_SIZE; @@ -281,9 +337,18 @@ KERNEL(sdpa_opt)( for (; head_idx_index + (KEY_BLOCK_SIZE * SUBGROUP_SIZE) <= HEAD_SIZE; head_idx_index += SUBGROUP_SIZE * KEY_BLOCK_SIZE) { #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, KEY_BLOCK_SIZE, ptr, offset); #define KEY_BLOCK MAKE_VECTOR_TYPE(INPUT1_TYPE, KEY_BLOCK_SIZE) + #define KEY_BLOCK_UNCOMPRESSED MAKE_VECTOR_TYPE(KEY_COMPRESSION_SCALE_TYPE, KEY_BLOCK_SIZE) + #define TO_KEY_BLOCK_UNCOMPRESSED_TYPE(val) CAT(convert_, KEY_BLOCK_UNCOMPRESSED)(val) #define QUERY_BLOCK MAKE_VECTOR_TYPE(INPUT0_TYPE, KEY_BLOCK_SIZE) - KEY_BLOCK key_vals = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); + KEY_BLOCK key_vec_packed = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed) - comp_zp) * comp_scale; +#elif IS_KV_COMPRESSED + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed)) * comp_scale; +#else + KEY_BLOCK key_vals = key_vec_packed; +#endif uint query_offset = head_idx_index + sglid; unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { @@ -293,7 +358,7 @@ KERNEL(sdpa_opt)( } unroll_for(uint i = 0; i < KEY_BLOCK_SIZE; i++) { - acc[seq_idx] = mad(query_vals_reg[i], key_vals[i], acc[seq_idx]); + acc[seq_idx] = mad(TO_SOFTMAX_ACCUMULATOR_TYPE(query_vals_reg[i]), TO_SOFTMAX_ACCUMULATOR_TYPE(key_vals[i]), acc[seq_idx]); } query_offset += HEAD_SIZE; @@ -304,9 +369,18 @@ KERNEL(sdpa_opt)( for (; head_idx_index + (KEY_BLOCK_SIZE * SUBGROUP_SIZE) <= HEAD_SIZE; head_idx_index += SUBGROUP_SIZE * KEY_BLOCK_SIZE) { #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, KEY_BLOCK_SIZE, ptr, offset); #define KEY_BLOCK MAKE_VECTOR_TYPE(INPUT1_TYPE, KEY_BLOCK_SIZE) + #define KEY_BLOCK_UNCOMPRESSED MAKE_VECTOR_TYPE(KEY_COMPRESSION_SCALE_TYPE, KEY_BLOCK_SIZE) + #define TO_KEY_BLOCK_UNCOMPRESSED_TYPE(val) CAT(convert_, KEY_BLOCK_UNCOMPRESSED)(val) #define QUERY_BLOCK MAKE_VECTOR_TYPE(INPUT0_TYPE, KEY_BLOCK_SIZE) - KEY_BLOCK key_vals = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); + KEY_BLOCK key_vec_packed = KEY_BLOCK_READ(key_input, key_offset + head_idx_index); +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed) - comp_zp) * comp_scale; +#elif IS_KV_COMPRESSED + KEY_BLOCK_UNCOMPRESSED key_vals = (TO_KEY_BLOCK_UNCOMPRESSED_TYPE(key_vec_packed)) * comp_scale; +#else + KEY_BLOCK key_vals = key_vec_packed; +#endif uint query_offset = head_idx_index + sglid; unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { @@ -315,7 +389,7 @@ KERNEL(sdpa_opt)( query_vals_reg = query_local[query_offset + i * SUBGROUP_SIZE]; } - acc[seq_idx] = mad(query_vals_reg, key_vals, acc[seq_idx]); + acc[seq_idx] = mad(TO_SOFTMAX_ACCUMULATOR_TYPE(query_vals_reg), TO_SOFTMAX_ACCUMULATOR_TYPE(key_vals), acc[seq_idx]); query_offset += HEAD_SIZE; } } @@ -331,11 +405,11 @@ KERNEL(sdpa_opt)( // Wait until all SG finishes their calculations and apply scale and attention mask to the results barrier(CLK_LOCAL_MEM_FENCE); - INPUT0_TYPE qk_val[TARGET_SEQ_LEN_BLOCK_SIZE]; + SOFTMAX_ACCUMULATOR_TYPE qk_val[TARGET_SEQ_LEN_BLOCK_SIZE]; const uint seq_idx_end = 1; for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { // Iterate over all values QK values in SLM and apply scale and attention mask - for (uint seq_len = sgid * SUBGROUP_SIZE + sglid; seq_len < partition_seq_len; seq_len += (HEAD_SIZE)) { + for (uint seq_len = sgid * SUBGROUP_SIZE + sglid; seq_len < partition_seq_len; seq_len += (HEAD_SIZE * SG_SCALE_FACTOR)) { // Read value from SLM and apply scale qk_val[seq_idx] = qk_local[seq_idx * SEQ_LEN_PARTITION_SIZE + seq_len]; @@ -388,7 +462,7 @@ KERNEL(sdpa_opt)( SOFTMAX_ACCUMULATOR_TYPE exp_sum[TARGET_SEQ_LEN_BLOCK_SIZE] = {SOFTMAX_ACCUMULATOR_VAL_ZERO}; const uint qk_num_per_wi = CEIL_DIV(partition_seq_len, SUBGROUPS_PER_WG * SUBGROUP_SIZE); for (uint qk_idx = 0; qk_idx < qk_num_per_wi; qk_idx++) { - const uint local_data_idx = qk_idx * (SUBGROUPS_PER_WG * SUBGROUP_SIZE) + head_size_idx; + const uint local_data_idx = qk_idx * (SUBGROUPS_PER_WG * SUBGROUP_SIZE) + lid; if (local_data_idx < partition_seq_len) { for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { SOFTMAX_ACCUMULATOR_TYPE qk_new = native_exp(TO_SOFTMAX_ACCUMULATOR_TYPE(qk_local[seq_idx * SEQ_LEN_PARTITION_SIZE + local_data_idx]) - qk_max[seq_idx]); @@ -420,7 +494,7 @@ KERNEL(sdpa_opt)( // const SOFTMAX_ACCUMULATOR_TYPE inv_exp_sum = SOFTMAX_ACCUMULATOR_VAL_ONE / exp_sum[seq_idx]; for (uint qk_idx = 0; qk_idx < qk_num_per_wi; qk_idx++) { - const uint local_data_idx = qk_idx * (SUBGROUPS_PER_WG * SUBGROUP_SIZE) + sgid * SUBGROUP_SIZE + sglid; + const uint local_data_idx = qk_idx * (SUBGROUPS_PER_WG * SUBGROUP_SIZE) + lid; if (local_data_idx < partition_seq_len) { for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { SOFTMAX_ACCUMULATOR_TYPE qk_new = TO_SOFTMAX_ACCUMULATOR_TYPE(qk_local[seq_idx * SEQ_LEN_PARTITION_SIZE + local_data_idx]) / exp_sum[seq_idx]; @@ -434,7 +508,7 @@ KERNEL(sdpa_opt)( { // If the number of partitions is greater than 1, save exm_sums and max_logits to the temporary buffers // Use single WI in the WG, since all the WIs have the same value - if (num_of_partitions > 1 && head_size_idx == 0) { + if (num_of_partitions > 1 && lid == 0) { for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { const uint exp_sums_offset = b0_idx * (NUM_HEADS * TARGET_SEQ_LEN * num_of_partitions) + b1_idx * (TARGET_SEQ_LEN * num_of_partitions) + @@ -463,15 +537,32 @@ KERNEL(sdpa_opt)( #endif #endif - for (uint seq_len = 0; seq_len < partition_seq_len / SUBGROUP_SIZE; seq_len++) { +#if SG_SCALE_FACTOR > 1 + const uint seq_len_start = (sgid / (HEAD_SIZE / SUBGROUP_SIZE)) * (SEQ_LEN_PARTITION_SIZE / SG_SCALE_FACTOR / SUBGROUP_SIZE); + const uint seq_len_end = min(seq_len_start + (SEQ_LEN_PARTITION_SIZE / SG_SCALE_FACTOR / SUBGROUP_SIZE), partition_seq_len / SUBGROUP_SIZE); +#else + const uint seq_len_start = 0; + const uint seq_len_end = partition_seq_len / SUBGROUP_SIZE; +#endif + + for (uint seq_len = seq_len_start; seq_len < seq_len_end; seq_len++) { #ifdef BEAM_TABLE_TYPE - uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, sgid * SUBGROUP_SIZE)]; + const uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, sgid * SUBGROUP_SIZE)]; uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, sgid * SUBGROUP_SIZE); #else + const uint b_idx = b0_idx; #ifdef INPUT2_DIMS_ORDER - uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); + uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); #else - uint value_offset = INPUT2_GET_INDEX(b0_idx, b1_idx, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); + uint value_offset = INPUT2_GET_INDEX(b_idx, b1_idx, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); +#endif +#endif + +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; #endif #endif @@ -482,9 +573,17 @@ KERNEL(sdpa_opt)( unroll_for (uint i = 0; i < SUBGROUP_SIZE; i++) { #ifdef BEAM_TABLE_TYPE - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); #else - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, value_offset); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, value_offset); +#endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed - sub_group_broadcast(comp_zp, i)) * sub_group_broadcast(comp_scale, i); +#elif IS_KV_COMPRESSED + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed * sub_group_broadcast(comp_scale, i)); +#else + INPUT2_TYPE value_val = value_packed; #endif unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { acc[seq_idx] = mad(sub_group_broadcast(qk_val[seq_idx], i), value_val, acc[seq_idx]); @@ -496,17 +595,30 @@ KERNEL(sdpa_opt)( } } - const uint seq_len_leftovers_start = (partition_seq_len / SUBGROUP_SIZE) * SUBGROUP_SIZE; - for (uint seq_len = seq_len_leftovers_start; seq_len < partition_seq_len; seq_len++) { -#ifdef INPUT2_DIMS_ORDER + +#if SG_SCALE_FACTOR > 1 + if (sgid >= HEAD_SIZE / SUBGROUP_SIZE) { +#endif + + for (uint seq_len = (partition_seq_len / SUBGROUP_SIZE) * SUBGROUP_SIZE; seq_len < partition_seq_len; seq_len++) { #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + seq_len, head_size_idx)]; #else const uint b_idx = b0_idx; #endif + +#ifdef INPUT2_DIMS_ORDER const uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + seq_len, head_size_idx); #else - const uint value_offset = INPUT2_GET_INDEX(b0_idx, b1_idx, start_partition_idx + seq_len, head_size_idx); + const uint value_offset = INPUT2_GET_INDEX(b_idx, b1_idx, start_partition_idx + seq_len, head_size_idx); +#endif + +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + seq_len, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; +#endif #endif OUTPUT_TYPE qk_val[TARGET_SEQ_LEN_BLOCK_SIZE]; @@ -514,15 +626,42 @@ KERNEL(sdpa_opt)( qk_val[seq_idx] = qk_local[seq_idx * SEQ_LEN_PARTITION_SIZE + seq_len]; } - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, value_offset); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, value_offset); +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + const VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed - comp_zp) * comp_scale; +#elif IS_KV_COMPRESSED + const VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed * comp_scale); +#else + const INPUT2_TYPE value_val = value_packed; +#endif unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { acc[seq_idx] = mad(qk_val[seq_idx], value_val, acc[seq_idx]); } } +#if SG_SCALE_FACTOR > 1 + } // if (sgid >= HEAD_SIZE / SUBGROUP_SIZE) +#endif + +#if SG_SCALE_FACTOR > 1 + if ((partition_seq_len > (SEQ_LEN_PARTITION_SIZE / SG_SCALE_FACTOR)) || (partition_seq_len % SUBGROUP_SIZE != 0)) { + if (sgid >= HEAD_SIZE / SUBGROUP_SIZE) { + // Reuse query_local SLM to sum-up results between two groups of subgroups + query_local[head_size_idx] = acc[0]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (sgid < HEAD_SIZE / SUBGROUP_SIZE) { + acc[0] += query_local[head_size_idx]; + } + } +#endif + // If the number of partitions is greater than 1, save results to the temporary buffer; // otherwise, save results directly to the main output. +#if SG_SCALE_FACTOR > 1 + if (sgid < HEAD_SIZE / SUBGROUP_SIZE) { +#endif if (num_of_partitions > 1) { const uint seq_idx_end = 1; for (uint seq_idx = 0; seq_idx < seq_idx_end; seq_idx++) { @@ -542,6 +681,9 @@ KERNEL(sdpa_opt)( output[output_offset] = acc[seq_idx]; } } +#if SG_SCALE_FACTOR > 1 + } // if (sgid < HEAD_SIZE / SUBGROUP_SIZE) { +#endif } // Gemm2 calculation end } @@ -582,6 +724,12 @@ KERNEL(sdpa_opt)( #define ATTN_SCALE_BUFFER_ARG #endif +// Applying scales to query input improves the accuracy, but leads to performance drop for FP16 KV-cache case, +// so use it only for compressed version +#if IS_KV_COMPRESSED +#define APPLY_SCALES_TO_QUERY 1 +#endif + #define MASK_VECTOR_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) inline MASK_VECTOR_TYPE FUNC(load_attn_mask)(OPTIONAL_SHAPE_INFO_ARG @@ -683,6 +831,10 @@ KERNEL(sdpa_opt)( const __global ALIBI_TYPE* alibi_slopes, #endif __global OUTPUT_TYPE* output, +#if IS_KV_COMPRESSED + const __global KEY_COMPRESSION_SCALE_TYPE* key_scale, + const __global VALUE_COMPRESSION_SCALE_TYPE* val_scale, +#endif #ifdef BEAM_TABLE_TYPE const __global BEAM_TABLE_TYPE* beam_table, #endif @@ -733,9 +885,10 @@ KERNEL(sdpa_opt)( #if IS_PAGED_ATTENTION const uint block_start_pos = blocked_indexes_start[target_seq_dim]; const uint block_end_pos = blocked_indexes_end[target_seq_dim]; - - uint query_offset = block_start_pos * HEAD_SIZE * NUM_HEADS + num_heads_dim * HEAD_SIZE + head_size_idx; - const uint query_pitch = HEAD_SIZE * NUM_HEADS; + uint query_offset = INPUT0_OFFSET + + block_start_pos * (HEAD_SIZE * NUM_HEADS + INPUT0_PAD_BEFORE_FEATURE_NUM + INPUT0_PAD_AFTER_FEATURE_NUM) + + num_heads_dim * HEAD_SIZE + head_size_idx; + const uint query_pitch = (HEAD_SIZE * NUM_HEADS + INPUT0_PAD_BEFORE_FEATURE_NUM + INPUT0_PAD_AFTER_FEATURE_NUM); const uint cur_target_seq_len_size = block_end_pos - block_start_pos; #else @@ -751,12 +904,22 @@ KERNEL(sdpa_opt)( #endif uint query_local_offset = head_size_idx * TARGET_SEQ_LEN_BLOCK_SIZE; +#if APPLY_SCALES_TO_QUERY +#if HAS_SCALE_INPUT + const INPUT0_TYPE scale_val = *scale; +#else + const INPUT0_TYPE scale_val = TO_INPUT0_TYPE(STATIC_SCALE_VALUE); +#endif +#else + const INPUT0_TYPE scale_val = INPUT0_VAL_ONE; +#endif + if (cur_target_seq_len_size != TARGET_SEQ_LEN_BLOCK_SIZE) { if (sgid * SUBGROUP_SIZE < HEAD_SIZE) { for (uint seq_idx = 0; seq_idx < cur_target_seq_len_size; seq_idx++) { INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, 1, query_input, query_offset); - slm_query[query_local_offset] = val; + slm_query[query_local_offset] = val * scale_val; query_offset += query_pitch; query_local_offset++; } @@ -767,7 +930,7 @@ KERNEL(sdpa_opt)( unroll_for (uint seq_idx = 0; seq_idx < (TARGET_SEQ_LEN_BLOCK_SIZE / SG_SCALE_FACTOR); seq_idx++) { INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, 1, query_input, query_offset); - slm_query[query_local_offset] = val; + slm_query[query_local_offset] = val * scale_val; query_offset += query_pitch; query_local_offset++; } @@ -777,7 +940,7 @@ KERNEL(sdpa_opt)( unroll_for (uint seq_idx = 0; seq_idx < (TARGET_SEQ_LEN_BLOCK_SIZE / SG_SCALE_FACTOR); seq_idx++) { INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, 1, query_input, query_offset); - slm_query[query_local_offset] = val; + slm_query[query_local_offset] = val * scale_val; query_offset += query_pitch; query_local_offset++; } @@ -788,7 +951,7 @@ KERNEL(sdpa_opt)( unroll_for (uint seq_idx = 0; seq_idx < (TARGET_SEQ_LEN_BLOCK_SIZE / SG_SCALE_FACTOR); seq_idx++) { INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, 1, query_input, query_offset); - slm_query[query_local_offset] = val; + slm_query[query_local_offset] = val * scale_val; query_offset += query_pitch; query_local_offset++; } @@ -796,7 +959,7 @@ KERNEL(sdpa_opt)( unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { INPUT0_TYPE val = BLOCK_READN(INPUT0_TYPE, 1, query_input, query_offset); - slm_query[query_local_offset] = val; + slm_query[query_local_offset] = val * scale_val; query_offset += query_pitch; query_local_offset++; } @@ -834,13 +997,17 @@ KERNEL(sdpa_opt)( const uint heads_dim = num_heads_dim; #endif #define KEY_SEQ_OFFSET subsequence_begins[gws_seq_indexes_correspondence[target_seq_dim]] - uint key_offset = KEY_SEQ_OFFSET * HEAD_SIZE * NUM_KV_HEADS + heads_dim * HEAD_SIZE + seq_len * HEAD_SIZE * NUM_KV_HEADS; - const uint key_pitch = HEAD_SIZE * NUM_KV_HEADS; + const uint key_pitch = (HEAD_SIZE * NUM_KV_HEADS + INPUT1_PAD_BEFORE_FEATURE_NUM + INPUT1_PAD_AFTER_FEATURE_NUM); + uint key_offset = INPUT1_OFFSET + + KEY_SEQ_OFFSET * key_pitch + + heads_dim * HEAD_SIZE + + seq_len * key_pitch; #else #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_key)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, seq_len + sglid, 0)]; const uint key_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, seq_len + sglid, 0); #else + const uint b_idx = b0_idx; #ifdef INPUT1_DIMS_ORDER uint key_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, seq_len, 0); uint key_offset_next_seq = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, seq_len + 1, 0); @@ -870,10 +1037,17 @@ KERNEL(sdpa_opt)( PA_BUFFERS); if (seq_len_calc_size >= SUBGROUP_SIZE) { +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(KEY_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, seq_len + sglid, 0); + KEY_COMPRESSION_SCALE_TYPE comp_scale = key_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + KEY_COMPRESSION_SCALE_TYPE comp_zp = key_scale[comp_offset + 1]; +#endif +#endif __attribute__((opencl_unroll_hint(1))) for (uint head_idx_index = 0; head_idx_index < HEAD_SIZE; head_idx_index += SUBGROUP_SIZE) { #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset); - #define QUERY_VEC MAKE_VECTOR_TYPE(INPUT1_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) + #define QUERY_VEC MAKE_VECTOR_TYPE(INPUT0_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) QUERY_VEC queries_vec; uint query_local_offset = (head_idx_index * TARGET_SEQ_LEN_BLOCK_SIZE) + sglid; @@ -884,9 +1058,17 @@ KERNEL(sdpa_opt)( unroll_for (uint key_row_idx = 0; key_row_idx < TARGET_SEQ_LEN_BLOCK_SIZE; key_row_idx++) { #ifdef BEAM_TABLE_TYPE - INPUT1_TYPE key_vals = KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index); + const INPUT1_TYPE key_packed = KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index); +#else + const INPUT1_TYPE key_packed = KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index); +#endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + KEY_COMPRESSION_SCALE_TYPE key_vals = (TO_KEY_COMPRESSION_SCALE_TYPE(key_packed) - sub_group_broadcast(comp_zp, key_row_idx)) * sub_group_broadcast(comp_scale, key_row_idx); +#elif IS_KV_COMPRESSED + KEY_COMPRESSION_SCALE_TYPE key_vals = (TO_KEY_COMPRESSION_SCALE_TYPE(key_packed) * sub_group_broadcast(comp_scale, key_row_idx)); #else - INPUT1_TYPE key_vals = KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index); + INPUT1_TYPE key_vals = key_packed; #endif unroll_for (uint i = 0; i < SUBGROUP_SIZE; i++) { @@ -895,12 +1077,29 @@ KERNEL(sdpa_opt)( } } } else if (seq_len_calc_size > 0) { +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(KEY_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, seq_len + min(sglid, (uint)seq_len_calc_size - 1), 0); + // const uint comp_offset = GET_COMPRESSION_INDEX(KEY_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, seq_len + sglid, 0); + KEY_COMPRESSION_SCALE_TYPE comp_scale = key_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + KEY_COMPRESSION_SCALE_TYPE comp_zp = key_scale[comp_offset + 1]; +#endif +#endif __attribute__((opencl_unroll_hint(1))) for (uint head_idx_index = 0; head_idx_index < HEAD_SIZE; head_idx_index += SUBGROUP_SIZE) { - #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset); - #define QUERY_VEC MAKE_VECTOR_TYPE(INPUT1_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) + #define KEY_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset) + #define QUERY_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) +#if IS_KV_COMPRESSED + #define KEY_UNPACKED_TYPE KEY_COMPRESSION_SCALE_TYPE + #define KEY_UNPACKED_VEC_TYPE MAKE_VECTOR_TYPE(KEY_COMPRESSION_SCALE_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) + #define TO_KEY_UNPACKED_TYPE(val) TO_KEY_COMPRESSION_SCALE_TYPE(val) +#else + #define KEY_UNPACKED_TYPE INPUT1_TYPE + #define KEY_UNPACKED_VEC_TYPE MAKE_VECTOR_TYPE(INPUT1_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) + #define TO_KEY_UNPACKED_TYPE(val) TO_INPUT1_TYPE(val) +#endif - QUERY_VEC queries_vec; + QUERY_VEC_TYPE queries_vec; uint query_local_offset = (head_idx_index * TARGET_SEQ_LEN_BLOCK_SIZE) + sglid; unroll_for (uint q_row_idx = 0; q_row_idx < TARGET_SEQ_LEN_BLOCK_SIZE; q_row_idx++) { queries_vec[q_row_idx] = slm_query[query_local_offset]; @@ -908,27 +1107,37 @@ KERNEL(sdpa_opt)( } #ifndef LOAD_KEY_LEFTOVERS_IN_CALC_LOOP - QUERY_VEC key_vec = 0; + KEY_UNPACKED_VEC_TYPE key_vec = 0; unroll_for (uint key_row_idx = 0; key_row_idx < seq_len_calc_size; key_row_idx++) { - #ifdef BEAM_TABLE_TYPE - key_vec[key_row_idx] = KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index); - #else - key_vec[key_row_idx] = KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index); - #endif +#ifdef BEAM_TABLE_TYPE + key_vec[key_row_idx] = TO_KEY_UNPACKED_TYPE(KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index)); +#else + key_vec[key_row_idx] = TO_KEY_UNPACKED_TYPE(KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index)); +#endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + key_vec[key_row_idx] = (key_vec[key_row_idx] - sub_group_broadcast(comp_zp, key_row_idx)) * sub_group_broadcast(comp_scale, key_row_idx); +#elif IS_KV_COMPRESSED + key_vec[key_row_idx] *= sub_group_broadcast(comp_scale, key_row_idx); +#endif } #endif unroll_for (uint key_row_idx = 0; key_row_idx < TARGET_SEQ_LEN_BLOCK_SIZE; key_row_idx++) { #ifdef LOAD_KEY_LEFTOVERS_IN_CALC_LOOP - #ifdef BEAM_TABLE_TYPE - INPUT1_TYPE key_vals = 0; - if (key_row_idx < seq_len_calc_size) - key_vals = KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index); - #else - INPUT1_TYPE key_vals = 0; - if (key_row_idx < seq_len_calc_size) - key_vals = KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index); - #endif + KEY_UNPACKED_TYPE key_vals = 0; + if (key_row_idx < seq_len_calc_size) { +#ifdef BEAM_TABLE_TYPE + key_vals = TO_KEY_UNPACKED_TYPE(KEY_BLOCK_READ(key_input, sub_group_broadcast(key_offset, key_row_idx) + head_idx_index)); +#else + key_vals = TO_KEY_UNPACKED_TYPE(KEY_BLOCK_READ(key_input, key_offset + key_row_idx * key_pitch + head_idx_index)); +#endif + } +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + key_vals = (key_vals - sub_group_broadcast(comp_zp, key_row_idx)) * sub_group_broadcast(comp_scale, key_row_idx); +#elif IS_KV_COMPRESSED + key_vals *= sub_group_broadcast(comp_scale, key_row_idx); +#endif #else #define key_vals key_vec[key_row_idx] #endif @@ -941,12 +1150,14 @@ KERNEL(sdpa_opt)( { unroll_for (uint i = 0; i < TARGET_SEQ_LEN_BLOCK_SIZE; i++) { +#if !APPLY_SCALES_TO_QUERY #if HAS_SCALE_INPUT const OUTPUT_TYPE scale_val = *scale; #else const OUTPUT_TYPE scale_val = TO_OUTPUT_TYPE(STATIC_SCALE_VALUE); #endif qk_acc[i] *= scale_val; +#endif #ifdef HAS_ALIBI const int alibi_val = (1 - SOURCE_SEQ_LEN) + seq_len + i; @@ -1018,7 +1229,7 @@ KERNEL(sdpa_opt)( // QK*V calculation MAKE_VECTOR_TYPE(OUTPUT_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) acc_output_res = OUTPUT_VAL_ZERO; #if IS_PAGED_ATTENTION - const uint value_pitch = HEAD_SIZE * NUM_KV_HEADS; + const uint value_pitch = (HEAD_SIZE * NUM_KV_HEADS + INPUT2_PAD_BEFORE_FEATURE_NUM + INPUT2_PAD_AFTER_FEATURE_NUM); #else #ifdef INPUT2_DIMS_ORDER uint value_offset_base = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, 0, 0); @@ -1039,12 +1250,16 @@ KERNEL(sdpa_opt)( const uint heads_dim = num_heads_dim; #endif const uint value_seq_offset = subsequence_begins[gws_seq_indexes_correspondence[target_seq_dim]]; - uint value_offset = value_seq_offset * HEAD_SIZE * NUM_KV_HEADS + heads_dim * HEAD_SIZE + (start_partition_idx + (seq_len)) * HEAD_SIZE * NUM_KV_HEADS + head_size_idx; + uint value_offset = INPUT2_OFFSET + + value_seq_offset * value_pitch + + heads_dim * HEAD_SIZE + + (start_partition_idx + (seq_len)) * value_pitch + head_size_idx; #else #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len) + sglid, sgid * SUBGROUP_SIZE)]; const uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + (seq_len) + sglid, sgid * SUBGROUP_SIZE); #else + const uint b_idx = b0_idx; #ifdef INPUT2_DIMS_ORDER uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len), head_size_idx); #else @@ -1058,12 +1273,28 @@ KERNEL(sdpa_opt)( qk_val[seq_idx] = slm_qk_vals[seq_idx * SEQ_LEN_PARTITION_SIZE + seq_len + sglid]; } +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + seq_len + sglid, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; +#endif +#endif unroll_for (uint i = 0; i < SUBGROUP_SIZE; i++) { #ifdef BEAM_TABLE_TYPE - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); #else - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, value_offset); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, value_offset); #endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed - sub_group_broadcast(comp_zp, i)) * sub_group_broadcast(comp_scale, i); +#elif IS_KV_COMPRESSED + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed * sub_group_broadcast(comp_scale, i)); +#else + INPUT2_TYPE value_val = value_packed; +#endif + unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { acc_output_res[seq_idx] = mad(sub_group_broadcast(qk_val[seq_idx], i), value_val, acc_output_res[seq_idx]); } @@ -1087,18 +1318,30 @@ KERNEL(sdpa_opt)( const uint heads_dim = num_heads_dim; #endif const uint value_seq_offset = subsequence_begins[gws_seq_indexes_correspondence[target_seq_dim]]; - uint value_offset = value_seq_offset * HEAD_SIZE * NUM_KV_HEADS + heads_dim * HEAD_SIZE + (start_partition_idx + (seq_len * SUBGROUP_SIZE)) * HEAD_SIZE * NUM_KV_HEADS + head_size_idx; + uint value_offset = INPUT2_OFFSET + + value_seq_offset * value_pitch + + heads_dim * HEAD_SIZE + + (start_partition_idx + (seq_len * SUBGROUP_SIZE)) * value_pitch + head_size_idx; #else #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, sgid * SUBGROUP_SIZE)]; uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, sgid * SUBGROUP_SIZE); #else + const uint b_idx = b0_idx; #ifdef INPUT2_DIMS_ORDER uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); #else uint value_offset = INPUT2_GET_INDEX(b0_idx, b1_idx, start_partition_idx + (seq_len * SUBGROUP_SIZE), head_size_idx); #endif #endif +#endif + +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + (seq_len * SUBGROUP_SIZE) + sglid, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; +#endif #endif MAKE_VECTOR_TYPE(OUTPUT_TYPE, TARGET_SEQ_LEN_BLOCK_SIZE) qk_val; @@ -1108,9 +1351,17 @@ KERNEL(sdpa_opt)( unroll_for (uint i = 0; i < SUBGROUP_SIZE; i++) { #ifdef BEAM_TABLE_TYPE - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, i)); +#else + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, value_offset); +#endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed - sub_group_broadcast(comp_zp, i)) * sub_group_broadcast(comp_scale, i); +#elif IS_KV_COMPRESSED + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed * sub_group_broadcast(comp_scale, i)); #else - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, value_offset); + INPUT2_TYPE value_val = value_packed; #endif unroll_for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { acc_output_res[seq_idx] = mad(sub_group_broadcast(qk_val[seq_idx], i), value_val, acc_output_res[seq_idx]); @@ -1138,25 +1389,46 @@ KERNEL(sdpa_opt)( const uint heads_dim = num_heads_dim; #endif const uint value_seq_offset = subsequence_begins[gws_seq_indexes_correspondence[target_seq_dim]]; - uint value_offset = value_seq_offset * HEAD_SIZE * NUM_KV_HEADS + heads_dim * HEAD_SIZE + (start_partition_idx + seq_len_leftovers_start) * HEAD_SIZE * NUM_KV_HEADS + head_size_idx; + uint value_offset = INPUT2_OFFSET + + value_seq_offset * value_pitch + + heads_dim * HEAD_SIZE + + (start_partition_idx + seq_len_leftovers_start) * value_pitch + head_size_idx; #else #ifdef BEAM_TABLE_TYPE const uint b_idx = beam_table[FUNC_CALL(get_bt_index_value)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + seq_len_leftovers_start + sglid, sgid * SUBGROUP_SIZE)]; const uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1_idx, 0, 0, start_partition_idx + seq_len_leftovers_start + sglid, sgid * SUBGROUP_SIZE); #else + const uint b_idx = b0_idx; #ifdef INPUT2_DIMS_ORDER uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b0_idx, b1_idx, 0, 0, start_partition_idx + seq_len_leftovers_start, head_size_idx); #else uint value_offset = INPUT2_GET_INDEX(b0_idx, b1_idx, start_partition_idx + seq_len_leftovers_start, head_size_idx); #endif #endif +#endif + +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + min(seq_len_leftovers_start + sglid, seq_len_end - 1), 0); + // const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1_idx / BROADCAST_GROUP_SIZE, start_partition_idx + seq_len_leftovers_start + sglid, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; +#if USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; +#endif #endif for (uint seq_len_idx = 0; seq_len_idx < partition_seq_len - seq_len_leftovers_start; seq_len_idx++) { #ifdef BEAM_TABLE_TYPE - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, seq_len_idx)); + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, sub_group_broadcast(value_offset, seq_len_idx)); +#else + const INPUT2_TYPE value_packed = VALUE_BLOCK_READ(value_input, value_offset); +#endif + +#if IS_KV_COMPRESSED && USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed - sub_group_broadcast(comp_zp, seq_len_idx)) * sub_group_broadcast(comp_scale, seq_len_idx); +#elif IS_KV_COMPRESSED + VALUE_COMPRESSION_SCALE_TYPE value_val = (value_packed * sub_group_broadcast(comp_scale, seq_len_idx)); #else - INPUT2_TYPE value_val = VALUE_BLOCK_READ(value_input, value_offset); + INPUT2_TYPE value_val = value_packed; #endif for (uint seq_idx = 0; seq_idx < TARGET_SEQ_LEN_BLOCK_SIZE; seq_idx++) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_ref.cl index 83e3c7c7e9fef1..682af11777012f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_ref.cl @@ -112,6 +112,15 @@ inline uint FUNC(get_bt_index_value)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uin #endif #define APPLY_SCALE_TO_QUERY 1 +#define HAS_KV_CACHE_ZP_INPUT USE_ASYMMETRIC_QUANTIZATION && !COMBINE_SCALES_AND_ZP + +#if IS_KV_COMPRESSED +#if COMPRESSED_PER_HEAD + #define GET_COMPRESSION_INDEX(INPUT, b, f, y, x) GET_DATA_INDEX(INPUT, (b), (f), (y), (0)); +#else + #define GET_COMPRESSION_INDEX(INPUT, b, f, y, x) GET_DATA_INDEX(INPUT, (b), (0), (y), (0)); +#endif +#endif KERNEL(sdpa_ref)( OPTIONAL_SHAPE_INFO_ARG @@ -125,6 +134,14 @@ KERNEL(sdpa_ref)( const __global INPUT4_TYPE* scale, #endif __global OUTPUT_TYPE* output, +#if IS_KV_COMPRESSED + const __global KEY_COMPRESSION_SCALE_TYPE* key_scale, + const __global VALUE_COMPRESSION_SCALE_TYPE* val_scale, +#if HAS_KV_CACHE_ZP_INPUT + const __global KEY_COMPRESSION_ZP_TYPE* key_zp, + const __global VALUE_COMPRESSION_ZP_TYPE* val_zp, +#endif +#endif #ifdef BEAM_TABLE_TYPE const __global BEAM_TABLE_TYPE* beam_table, #endif @@ -162,7 +179,24 @@ KERNEL(sdpa_ref)( #else INPUT0_TYPE q_val = query_input[query_offset]; #endif - INPUT1_TYPE k_val = key_input[key_offset]; + + INPUT1_TYPE k_val_packed = key_input[key_offset]; +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(KEY_COMPRESSION_SCALE, b_idx, b1 / BROADCAST_GROUP_SIZE, s, 0); + KEY_COMPRESSION_SCALE_TYPE comp_scale = key_scale[comp_offset]; + +#if USE_ASYMMETRIC_QUANTIZATION && HAS_KV_CACHE_ZP_INPUT + KEY_COMPRESSION_SCALE_TYPE comp_zp = key_zp[comp_offset]; +#elif USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = key_scale[comp_offset + 1]; +#else + KEY_COMPRESSION_SCALE_TYPE comp_zp = 0; +#endif + KEY_COMPRESSION_SCALE_TYPE k_val = ((k_val_packed - comp_zp) * comp_scale); + +#else + INPUT1_TYPE k_val = k_val_packed; +#endif acc += q_val * k_val; } @@ -236,7 +270,24 @@ KERNEL(sdpa_ref)( #endif uint value_offset = FUNC_CALL(get_input2_index)(OPTIONAL_SHAPE_INFO_TENSOR b_idx, b1, 0, 0, s, head_size_idx); - acc += tmp_buf[tmp_buf_offset] * value_input[value_offset]; + const INPUT2_TYPE value_packed = value_input[value_offset]; +#if IS_KV_COMPRESSED + const uint comp_offset = GET_COMPRESSION_INDEX(VALUE_COMPRESSION_SCALE, b_idx, b1 / BROADCAST_GROUP_SIZE, s, 0); + VALUE_COMPRESSION_SCALE_TYPE comp_scale = val_scale[comp_offset]; + +#if USE_ASYMMETRIC_QUANTIZATION && HAS_KV_CACHE_ZP_INPUT + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_zp[comp_offset]; +#elif USE_ASYMMETRIC_QUANTIZATION + VALUE_COMPRESSION_SCALE_TYPE comp_zp = val_scale[comp_offset + 1]; +#else + VALUE_COMPRESSION_SCALE_TYPE comp_zp = 0; +#endif + VALUE_COMPRESSION_SCALE_TYPE value = ((value_packed - comp_zp) * comp_scale); +#else + INPUT2_TYPE value = value_packed; +#endif + + acc += tmp_buf[tmp_buf_offset] * value; } uint output_offset = OUTPUT_GET_INDEX(b0, b1, target_seq_idx, head_size_idx); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_kv_cache.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_kv_cache.h new file mode 100644 index 00000000000000..ac6870a37a1728 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_kv_cache.h @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" +#include "dynamic_quantize_kernel_ref.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// dynamic_quantize_params +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class DynamicQuantizeKernelKVCache : public KernelBaseOpenCL { +public: + DynamicQuantizeKernelKVCache() : KernelBaseOpenCL("dynamic_quantize_gpu_kv_cache") {} + virtual ~DynamicQuantizeKernelKVCache() {} + + virtual JitConstants GetJitConstants(const dynamic_quantize_params& params) const; + virtual CommonDispatchData SetDefault(const dynamic_quantize_params& params) const; + KernelsData GetKernelsData(const Params& params) const override; + KernelsPriority GetKernelsPriority(const Params& params) const override; + Datatype GetAccumulatorType(const dynamic_quantize_params& params) const; + ParamsKey GetSupportedKey() const override; + +protected: + bool Validate(const Params&) const override; + void GetUpdateDispatchDataFunc(KernelData& kd) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp index 6a678770e85d72..52a648679499f2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp @@ -154,6 +154,20 @@ bool DynamicQuantizeKernelOpt::Validate(const Params& params) const { if (dq_params.inputs[0].GetPaddedVal() != 0 || dq_params.outputs[0].GetPaddedVal() != 0) return false; + if (dq_params.append_axis != -1) + return false; + + if (dq_params.group_sizes.back() != UINT64_MAX) + return false; + + // Allow only default scales order + const auto& scales_output_order = dq_params.scales_output_order; + if (!scales_output_order.empty()) { + for (size_t i = 0; i < scales_output_order.size(); i++) + if (scales_output_order[i] != i) + return false; + } + return true; } } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt_kv_cache.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt_kv_cache.cpp new file mode 100644 index 00000000000000..d0c99484e3f52e --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt_kv_cache.cpp @@ -0,0 +1,285 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "dynamic_quantize_kernel_kv_cache.h" +#include "kernel_selector_utils.h" +#include + + +static constexpr size_t subgroup_size = 16; + +namespace kernel_selector { +static Tensor::NDims get_normalized_dims(const DataTensor& tensor) { + auto dims = tensor.GetDims(); + std::reverse(dims.begin(), dims.end()); + + return dims; +} + +static size_t get_elements_number_per_batch(const dynamic_quantize_params& params) { + const auto& group_sizes = params.group_sizes; + const auto& input_dims = get_normalized_dims(params.inputs[0]); + + size_t total_elements_number = 1; + for (size_t i = 0; i < group_sizes.size(); i++) { + if (group_sizes[i] != UINT64_MAX) { + total_elements_number *= input_dims[i].v; + } + } + + return total_elements_number; +} + +static size_t get_elements_number_per_group(const dynamic_quantize_params& params) { + const auto& group_sizes = params.group_sizes; + const auto& input_dims = get_normalized_dims(params.inputs[0]); + + size_t total_elements_number = 1; + for (size_t i = 0; i < group_sizes.size(); i++) { + if (group_sizes[i] == UINT64_MAX) { + total_elements_number *= input_dims[i].v; + } else { + total_elements_number *= group_sizes[i]; + } + } + + return total_elements_number; +} + +static std::string generate_dims_indexes_calculation(std::vector> dims) { + std::reverse(dims.begin(), dims.end()); // reorder dims in order from innermost to outermost dimensions + + auto generate_calc_function = [&](std::string data_type, std::string index_var, size_t dim_idx) { + std::string index_calc_str; + index_calc_str += "" + data_type + " " + dims[dim_idx].first + " = "; + index_calc_str += "(" + index_var + " / "; + index_calc_str += "(1"; + for (size_t i = 0; i < dim_idx; i++) { + index_calc_str += " * " + dims[i].second; + } + index_calc_str += ")) % " + dims[dim_idx].second + ";"; + + return index_calc_str; + }; + + std::stringstream indexes_calc_str; + for (size_t i = 0; i < dims.size(); i++) { + indexes_calc_str << generate_calc_function("uint", "data_idx", i); + } + + return indexes_calc_str.str(); +} + +static size_t get_per_iter_elements_number(const dynamic_quantize_params& params) { + const auto maxWorkGroupSize = params.engineInfo.maxWorkGroupSize; + const auto total_grouped_elements = get_elements_number_per_group(params); + + if (total_grouped_elements % maxWorkGroupSize == 0) + return maxWorkGroupSize; + + if (total_grouped_elements < maxWorkGroupSize) + return total_grouped_elements; + + return 0; +} + +ParamsKey DynamicQuantizeKernelKVCache::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableDifferentTypes(); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDynamicShapesSupport(); + return k; +} + +JitConstants DynamicQuantizeKernelKVCache::GetJitConstants(const dynamic_quantize_params& params) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + const std::vector> default_dims = {{"b", "INPUT0_BATCH_NUM"}, + {"f", "INPUT0_FEATURE_NUM"}, + {"y", "INPUT0_SIZE_Y"}, + {"x", "INPUT0_SIZE_X"}}; + + const auto& group_sizes = params.group_sizes; + std::vector> batch_dims, grouped_dims; + for (size_t i = 0; i < group_sizes.size(); i++) { + if (group_sizes[i] == 1) { + batch_dims.push_back(default_dims[i]); + } else { + grouped_dims.push_back(default_dims[i]); + } + } + + const auto& input_dims = get_normalized_dims(params.inputs[0]); + const auto total_grouped_elements = get_elements_number_per_group(params); + const auto per_iter_elements_number = get_per_iter_elements_number(params); + const auto total_subgroups_number = total_grouped_elements / input_dims.back().v; + + // Drop the last dimensions, since it will be processed in the kernel's loop + grouped_dims.pop_back(); + + const bool append_mode = params.append_axis != -1; + std::pair append_axis_info = {}; + if (append_mode) { + jit.AddConstant(MakeJitConstant("APPEND_MODE", append_mode)); + jit.AddConstant(MakeJitConstant("APPEND_AXIS_NAME", default_dims[params.append_axis].first)); + } + + jit.AddConstant(MakeJitConstant("DECLARE_BATCHED_DIMS_INDEXES(data_idx)", generate_dims_indexes_calculation(batch_dims))); + jit.AddConstant(MakeJitConstant("DECLARE_GROUPED_DIMS_INDEXES(data_idx)", generate_dims_indexes_calculation(grouped_dims))); + jit.AddConstant(MakeJitConstant("SUBGROUPS_NUMBER", total_subgroups_number)); + + const auto iterations_number = total_grouped_elements / per_iter_elements_number; + + jit.AddConstant(MakeJitConstant("ITERATIONS_NUMBER", iterations_number)); + jit.AddConstant(MakeJitConstant("ASYMMETRIC_QUANTIZATION", params.use_asymmetric_quantization)); + jit.AddConstant(MakeJitConstant("GROUP_SCALES_WITH_ZP", params.combine_scales_and_zp)); + + bool rearrange_scales_order = false; + const auto& scales_output_order = params.scales_output_order; + if (!scales_output_order.empty()) { + for (size_t i = 0; i < scales_output_order.size(); i++) { + if (i != scales_output_order[i]) { + rearrange_scales_order = true; + break; + } + } + } + + if (rearrange_scales_order) { + const std::array default_dim_order = {'b', 'f', 'y', 'x'}; + std::stringstream ss; + for (size_t i = 0; i < scales_output_order.size(); i++) { + ss << default_dim_order[scales_output_order[i]]; + + if (i + 1 != scales_output_order.size()) + ss << ", "; + } + + jit.AddConstant(MakeJitConstant("SCALES_OUTPUT_ORDER", ss.str())); + } + + for (size_t i = 0; i < group_sizes.size(); i++) { + jit.AddConstant(MakeJitConstant("GROUP_SIZE_DIM" + std::to_string(i), group_sizes[i])); + } + + return jit; +} + +CommonDispatchData DynamicQuantizeKernelKVCache::SetDefault(const dynamic_quantize_params& params) const { + CommonDispatchData dispatchData; + + const auto& input_dims = get_normalized_dims(params.inputs[0]); + const auto total_batched_elements = get_elements_number_per_batch(params); + const auto total_grouped_elements = get_elements_number_per_group(params); + const auto total_subgroups_number = total_grouped_elements / input_dims.back().v; + + dispatchData.gws = {subgroup_size, total_subgroups_number, total_batched_elements}; + dispatchData.lws = {subgroup_size, total_subgroups_number, 1}; + + return dispatchData; +} + +void DynamicQuantizeKernelKVCache::GetUpdateDispatchDataFunc(KernelData& kd) const { + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = false; + + if (prim_params.append_axis != -1) { + kd.kernels[0].params.scalars.clear(); + + ScalarDescriptor axis_offset; + axis_offset.t = ScalarDescriptor::Types::UINT32; + axis_offset.v.u32 = static_cast(prim_params.axis_offset); + kd.kernels[0].params.scalars.push_back(axis_offset); + } + }; +} + +KernelsData DynamicQuantizeKernelKVCache::GetKernelsData(const Params& params) const { + assert(params.GetType() == KernelType::DYNAMIC_QUANTIZE); + + if (!Validate(params)) + return {}; + + const dynamic_quantize_params& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + + KernelData kd = KernelData::Default(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + GetUpdateDispatchDataFunc(kd); + + auto& kernel = kd.kernels[0]; + FillCLKernelData(kernel, + dispatchData, + params.engineInfo, + kernelName, + jit, + entry_point, + EXE_MODE_DEFAULT, + false, + false, + 1, + GetFusedPrimitiveInputsCount(params), + static_cast(prim_params.outputs.size()), + prim_params.is_shape_agnostic); + + if (prim_params.append_axis != -1) + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); + + return {kd}; +} + +KernelsPriority DynamicQuantizeKernelKVCache::GetKernelsPriority(const Params& /*params*/) const { + return FORCE_PRIORITY_3; +} + +bool DynamicQuantizeKernelKVCache::Validate(const Params& params) const { + if (!KernelBaseOpenCL::Validate(params)) + return false; + + const auto& dq_params = static_cast(params); + + const auto& group_sizes = dq_params.group_sizes; + const auto& input_dims = get_normalized_dims(dq_params.inputs[0]); + const size_t non_compressed_dims_number = std::count(group_sizes.begin(), group_sizes.end(), 1); + + if (non_compressed_dims_number == group_sizes.size()) + return false; + + for (size_t i = 0; i < group_sizes.size(); i++) { + if (group_sizes[i] != 1 && input_dims[i].is_dynamic) { + return false; + } + } + + // Last dimension should be static, reduced by group_sizes configuration and divisible by 16 + if (group_sizes.back() == 1 || input_dims.back().is_dynamic || input_dims.back().v % subgroup_size != 0) + return false; + + // Limit the size of the innermost dimension + if (input_dims.back().v > 256) + return false; + + // In case of HEADS_NUM * HEAD_SIZE group size, check that it fits into the supported workgroup size limit + if (get_elements_number_per_group(dq_params) / input_dims.back().v >= params.engineInfo.maxWorkGroupSize / subgroup_size) + return false; + + return true; +} +} // namespace kernel_selector + diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp index 3b214848e2f8ad..bd3d0f87cdc931 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp @@ -26,6 +26,41 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa jit.Merge(GetTensorFriendlyWorkGroupsJit(params.outputs[0])); + bool rearrange_scales = false; + const auto& scales_output_order = params.scales_output_order; + if (!scales_output_order.empty()) { + for (size_t i = 0; i < scales_output_order.size(); i++) { + if (i != scales_output_order[i]) { + rearrange_scales = true; + break; + } + } + } + + if (rearrange_scales) { + const std::array default_dim_order = {'b', 'f', 'y', 'x'}; + + std::stringstream ss; + for (size_t i = 0; i < scales_output_order.size(); i++) { + ss << default_dim_order[scales_output_order[i]]; + + if (i + 1 != scales_output_order.size()) + ss << ", "; + } + + jit.AddConstant(MakeJitConstant("SCALES_OUTPUT_ORDER", ss.str())); + } + + jit.AddConstant(MakeJitConstant("ASYMMETRIC_QUANTIZATION", params.use_asymmetric_quantization)); + jit.AddConstant(MakeJitConstant("GROUP_SCALES_WITH_ZP", params.combine_scales_and_zp)); + + auto group_sizes = params.group_sizes; + group_sizes.resize(std::min((size_t)4, group_sizes.size()), 1); + + for (size_t i = 0; i < group_sizes.size(); i++) { + jit.AddConstant(MakeJitConstant("GROUP_SIZE_DIM" + std::to_string(i), group_sizes[i])); + } + return jit; } @@ -34,7 +69,15 @@ CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_p CommonDispatchData dispatchData; OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor"); - dispatchData.gws = {params.outputs[0].Batch().v * params.outputs[0].Feature().v, 1, 1}; + + auto group_sizes = params.group_sizes; + group_sizes.resize(std::min((size_t)4, group_sizes.size()), 1); + auto batch_size = group_sizes[0] == 1 ? params.outputs[0].Batch().v : 1; + auto feature_size = group_sizes[1] == 1 ? params.outputs[0].Feature().v : 1; + auto y_size = group_sizes[2] == 1 ? params.outputs[0].Y().v : 1; + auto x_size = group_sizes[3] == 1 ? params.outputs[0].X().v : 1; + + dispatchData.gws = {batch_size * feature_size, y_size, x_size}; dispatchData.lws = {1, 1, 1}; return dispatchData; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h index c46b6b2685a940..d437d6ab6eb1f6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h @@ -13,6 +13,13 @@ namespace kernel_selector { struct dynamic_quantize_params : public base_params { dynamic_quantize_params() : base_params(KernelType::DYNAMIC_QUANTIZE) {} size_t fc_ifm_size = 0; + + int64_t append_axis = -1; + int64_t axis_offset = -1; + std::vector group_sizes; + std::vector scales_output_order; + bool use_asymmetric_quantization = false; + bool combine_scales_and_zp = false; }; class DynamicQuantizeKernelRef : public KernelBaseOpenCL { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_selector.cpp index 6ca9fbd2f5bd76..d38cf6ad2b4e52 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_selector.cpp @@ -5,11 +5,13 @@ #include "dynamic_quantize_kernel_selector.h" #include "dynamic_quantize_kernel_ref.h" #include "dynamic_quantize_kernel_opt.h" +#include "dynamic_quantize_kernel_kv_cache.h" namespace kernel_selector { dynamic_quantize_kernel_selector::dynamic_quantize_kernel_selector() { Attach(); Attach(); + Attach(); } KernelsData dynamic_quantize_kernel_selector::GetBestKernels(const Params& params) const { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 6604def1a69093..178e1ea405b6bb 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -8,6 +8,7 @@ #include #include "common_types.h" +static constexpr size_t lws_batches = 8; static constexpr size_t simd = 16; static constexpr size_t min_quantize_grp_size = 32; static constexpr size_t min_slm_size = 256; @@ -50,6 +51,17 @@ static std::pair get_output_aligned_bf_size(const fully_connecte return {output_b, output_f}; } +static bool is_weight_dyn_quantizable(const fully_connected_params& params) { + auto weight_type = params.weights.GetDType(); + if (weight_type == WeightsType::INT4 || weight_type == WeightsType::UINT4) + return true; + // UINT8 weight type is supported by FC dyn-quantize(with SLM). + if (weight_type == WeightsType::UINT8) + return true; + + return false; +} + // DYNAMIC_QUANTIZE static size_t get_dynamic_quantize_group_size(const fully_connected_params& params) { auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; @@ -91,7 +103,7 @@ static size_t get_dynamic_quantize_group_size(const fully_connected_params& para return 0; } -static bool should_dynamic_quantize(const fully_connected_params& params) { +static bool should_dynamic_quantize(const fully_connected_params& params, bool print_log = false) { size_t dynamic_quantization_group_size = get_dynamic_quantize_group_size(params); if (params.inputs[0].GetFirstElementOffset() != 0) @@ -110,11 +122,17 @@ static bool should_dynamic_quantize(const fully_connected_params& params) { const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; if ((scale_group_size % simd == 0) && (input_f % dynamic_quantization_group_size == 0) && (params.is_shape_agnostic || (params.inputs[0].Batch().v > 1 && input_b > min_slm_size)) && - params.inputs[0].GetDType() == Datatype::F16 && - (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4)) { - GPU_DEBUG_TRACE_DETAIL << " Dynamic quantizing for FC : scale_group_size " << scale_group_size << ", Input (" << - kernel_selector::toString(params.inputs[0].GetDType()) << ", " << kernel_selector::toString(params.outputs[0].GetLayout()) << - ") B: " << params.inputs[0].Batch().v << ", F: " << params.inputs[0].Feature().v << ", Y: " << params.inputs[0].Y().v << std ::endl; + params.inputs[0].GetDType() == Datatype::F16 && is_weight_dyn_quantizable(params)) { + if (print_log) { + GPU_DEBUG_TRACE_DETAIL << " Dynamic quantizing for FC : scale_group_size: " << scale_group_size << + ", Dyn-quan group size: " << dynamic_quantization_group_size << + ", Type(I:" << kernel_selector::toString(params.inputs[0].GetDType()) << + ", O:" << kernel_selector::toString(params.outputs[0].GetDType()) << + ", W:" << kernel_selector::toString(params.weights.GetDType()) << + "), Format(W:" << kernel_selector::toString(params.weights.GetLayout()) << + ") B: " << params.inputs[0].Batch().v << ", F: " << params.inputs[0].Feature().v << + ", Y: " << params.inputs[0].Y().v << std ::endl; + } return true; } @@ -204,8 +222,9 @@ DeviceFeaturesKey FullyConnected_bf_tiled::get_required_device_features_key(cons } bool FullyConnected_bf_tiled::Validate(const Params& params) const { - if (!Parent::Validate(params)) + if (!Parent::Validate(params)) { return false; + } auto& fc_params = static_cast(params); auto& input = fc_params.inputs[0]; @@ -314,21 +333,21 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params, if (tparams.tile_ofm * simd > 64) return false; - bool is_i4_u4 = (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4); + bool is_dyn_quantable_type = is_weight_dyn_quantizable(params); if (tparams.kernel_type == FullyConnected_bf_tiled::KernelType::SLM) { const auto required_batch_alignment = 64; if (!params.is_shape_agnostic && (!IsAligned(output_b, required_batch_alignment) || output_b < min_slm_size)) return false; const auto required_tile_b = 8; - if ((tparams.tile_b != required_tile_b) && !is_i4_u4) + if ((tparams.tile_b != required_tile_b) && !is_dyn_quantable_type) return false; const auto required_tile_ofm = 2; if (tparams.tile_ofm != required_tile_ofm) return false; - if (params.weights.GetDType() != WeightsType::INT4 && params.weights.GetDType() != WeightsType::UINT4) + if (!is_dyn_quantable_type) return false; if (params.engineInfo.deviceType != dev_type::integrated_gpu) @@ -340,7 +359,7 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params, return true; } - if (params.compressed && is_i4_u4) { + if (params.compressed && is_dyn_quantable_type) { if (!(tparams.tile_ofm == 2 || tparams.tile_ofm == 4)) return false; if (tparams.tile_ofm == 4 && tparams.outer_ofm == 2 && !is_suitable_outer_ofm(params, output_f)) @@ -382,11 +401,10 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, while (max_tile_ofm * 2 * simd <= output_f && max_tile_ofm < 4) max_tile_ofm *= 2; - if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4) { + if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4 || + (is_weight_dyn_quantizable(params) && should_dynamic_quantize(params))) { + // Only 4bit weight type is fully optimized to use SLM. In default kernel, SLM is not applied to 8bit weight. if (!params.is_shape_agnostic && batch == 1) { - if (should_dynamic_quantize(params)) - return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); - // Tuning for Meteor Lake if (is_weight_vertical(params, output_f)) { if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { @@ -411,9 +429,11 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, selector.Case(tune_params(16, 2, 2, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) .Case(tune_params(16, 2, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); } + selector.Case(tune_params(8, 2, 2, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) .Case(tune_params(8, 2, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); } + if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) return selector.Default(tune_params(8, 1, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT)); else if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) @@ -501,7 +521,6 @@ FullyConnected_bf_tiled::SetDefault(const fully_connected_params& params, int au auto batch_threads = threads.first; auto feature_threads = threads.second; - const size_t lws_batches = 8; const size_t aligned_batch = Align(batch_threads, lws_batches); // Each WG calculates 8x8 batches (TILE_B x LWS[2] size) const bool can_use_slm = tparams.kernel_type == KernelType::SLM; @@ -550,7 +569,6 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para WeightsType weights_dt = params.weights.GetDType(); if (weights_dt == WeightsType::UINT4 || weights_dt == WeightsType::INT4) { tile_k_ofm_packed /= 2; - jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE", weights_dt, tile_k_ofm)); const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; // Do not use SCALE_POST_OP for SLM kernel, since it demonstrates worse performance @@ -581,7 +599,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para if (dispatchData.use_slm) { OPENVINO_ASSERT(dispatchData.tile_n == 2, "[GPU] Unsupported TILE_OFM size for SLM kernel configuration"); - OPENVINO_ASSERT(weights_dt == WeightsType::INT4 || weights_dt == WeightsType::UINT4, "[GPU] Unsupported FC weights type for SLM kernel configuration"); + OPENVINO_ASSERT(is_weight_dyn_quantizable(params), "[GPU] Unsupported FC weights type for SLM kernel configuration"); auto lws_batches = dispatchData.lws[2]; auto total_weights_elements = simd * dispatchData.tile_n * simd * dispatchData.tile_mk; // SIMD * TILE_OFM * SIMD * TILE_IFM @@ -608,15 +626,19 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para jit.AddConstant(MakeJitConstant("LWS_BATCHES", lws_batches)); jit.AddConstant(MakeJitConstant("FILTER_LOAD_ITERS", weights_load_iters)); - if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) { - jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size / 2)); - jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load / 2)); - } else if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) { - jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size / 2)); - jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load / 2)); + if (weights_dt == WeightsType::INT4 || weights_dt == WeightsType::UINT4) { + if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) { + jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size / 2)); + jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load / 2)); + } else if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) { + jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size / 2)); + jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load / 2)); + } else { + jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size)); + jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load)); + } } else { jit.AddConstant(MakeJitConstant("FILTER_ACTUAL_LOAD_BLOCK_SIZE", block_read_size)); - jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE_PRELOAD", params.weights.GetDType(), weights_elements_per_load)); } jit.AddConstant(MakeJitConstant("FILTER_LOAD_BLOCK_SIZE", block_read_size)); @@ -629,7 +651,6 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para if (should_dynamic_quantize(params)) { jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 1)); jit.AddConstant(MakeJitConstant("DQ_DECOMPRESSION_SCALE_POST_OP", 1)); - jit.AddConstant(MakeJitConstant("DQ_TYPE", "char")); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", quantize_grp_size)); } else { if (add_decompress_scale_post_op) @@ -637,6 +658,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 0)); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", min_quantize_grp_size)); } + jit.AddConstant(MakeJitConstant("DQ_TYPE", "char")); jit.AddConstant(MakeJitConstant("IFM_SIZE", get_input_bf_size(params).second)); jit.AddConstant(MakeJitConstant("SIMD", simd)); @@ -659,9 +681,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para } auto max_tile_b_size = dispatchData.tile_m; - if (params.compressed && - params.is_shape_agnostic && - (weights_dt == WeightsType::UINT4 || weights_dt == WeightsType::INT4)) + if (params.compressed && params.is_shape_agnostic && is_weight_dyn_quantizable(params)) max_tile_b_size = std::max(max_tile_b_size, (uint32_t)8); jit.Merge(MakeConstantLoopUnrollJitConstants(max_tile_b_size)); @@ -772,8 +792,10 @@ void FullyConnected_bf_tiled::GetUpdateDispatchDataFunc(KernelData& kd) const { if (kd.internalBufferSizes[0] < input_size) { kd.internalBufferSizes.clear(); - kd.internalBufferSizes.push_back(input_size); // quantized input is char type - kd.internalBufferSizes.push_back(input_size / quantize_grp_size * 2); // de_quan_scale is half type + // quantized input is char type + kd.internalBufferSizes.push_back(input_size); + // half type of de_quan_scale and activation sum for each quantized group + kd.internalBufferSizes.push_back((input_size / quantize_grp_size) * 2 * 2); } kd.kernels[0].params.workGroups.global = {std::max((input_size / quantize_grp_size), (size_t)1), 1, 1}; @@ -800,7 +822,7 @@ KernelsData FullyConnected_bf_tiled::GetTunedKernelsDataByIndex(const Params &pa && (fc_params.weights.GetLayout() == WeightsLayout::oiyx || fc_params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) && (fc_params.weights.GetDType() == WeightsType::INT4 || fc_params.weights.GetDType() == WeightsType::UINT4) && is_weight_horizontal(fc_params, output_f)) { - // Large N + Small K case (horizontal weight) to use [osv64_isv2] + TILE_OFM 4 for batch 1 + // Large N + small K case (horizontal weight) to use [osv64_isv2] + TILE_OFM 4 for batch 1 weights_layout = WeightsLayout::os_is_yx_osv64_isv2; } else if (fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 && (fc_params.weights.GetDType() == WeightsType::INT4 || fc_params.weights.GetDType() == WeightsType::UINT4) @@ -947,6 +969,8 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params ¶ms, auto& quan_kernel = kd.kernels[0]; DispatchData dyn_quan_dispatch = dispatchData; auto input_size = std::max(fc_params.inputs[0].PhysicalSize(), get_input_bf_size(fc_params).second); + if (!params.is_shape_agnostic) + input_size = std::max(input_size, Align(get_input_bf_size(fc_params).first, lws_batches) * get_input_bf_size(fc_params).second); dyn_quan_dispatch.gws = {input_size / quantize_grp_size, 1, 1}; dyn_quan_dispatch.lws = {16, 1, 1}; quan_kernel.params.workGroups.global = dyn_quan_dispatch.gws; @@ -958,7 +982,6 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params ¶ms, quan_cldnn_jit.AddConstant(MakeJitConstant("FC_KERNEL_DYNAMIC_QUANTIZE", 1)); auto quan_jit = CreateJit(kernelName, quan_cldnn_jit, quan_entry_point); - FillCLKernelData(quan_kernel, dyn_quan_dispatch, params.engineInfo, @@ -977,8 +1000,10 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params ¶ms, quan_kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); quan_kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); quan_kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + // char type quantized input kd.internalBufferSizes.push_back(input_size); - kd.internalBufferSizes.push_back(input_size / quantize_grp_size * 2); + // half type of de_quan_scale and activation sum for each quantized group + kd.internalBufferSizes.push_back(input_size / quantize_grp_size * 2 * 2); kernel_number++; } kd.internalBufferDataType = Datatype::F16; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp index a48632f6c45509..130c5a69d4262c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp @@ -29,38 +29,15 @@ JitConstants RoPEKernelBase::GetJitConstants(const rope_params& params, RoPEKern if (params.slice_stop > params.slice_start) { jit.AddConstant(MakeJitConstant("ENABLE_SLICE", true)); - - auto f = toCodeString(params.inputs[0].Feature(), 1); - auto x = toCodeString(params.inputs[0].X(), 2); - auto y = toCodeString(params.inputs[0].Y(), 3); - - auto sliced_val = toCodeString(params.slice_stop - params.slice_start); - auto sliced_x = params.axis == 3 ? sliced_val : x; - auto sliced_y = params.axis == 2 ? sliced_val : y; - - jit.AddConstant(MakeJitConstant("SLICED_INPUT0_X_PITCH", 1)); - jit.AddConstant(MakeJitConstant("SLICED_INPUT0_Y_PITCH", sliced_x)); - jit.AddConstant(MakeJitConstant("SLICED_INPUT0_FEATURE_PITCH", sliced_x + "*" + sliced_y)); - jit.AddConstant(MakeJitConstant("SLICED_INPUT0_BATCH_PITCH", sliced_x + "*" + sliced_y + "*" + f)); - jit.AddConstant(MakeJitConstant("SLICED_INPUT0_OFFSET", 0)); jit.AddConstant(MakeJitConstant("SLICED_FROM_START", toCodeString(params.slice_start))); - if (params.axis == 2) { - jit.AddConstant(MakeJitConstant("SLICED_FROM_END", "(" + y + "-" + toCodeString(params.slice_stop) + ")")); - } else if (params.axis == 3) { - jit.AddConstant(MakeJitConstant("SLICED_FROM_END", "(" + x + "-" + toCodeString(params.slice_stop) + ")")); - } else { + if (params.axis != 2 && params.axis != 3) { OPENVINO_THROW("[GPU] Invalid axis value for RoPE operation"); } } if (params.transposed_input) { jit.AddConstant(MakeJitConstant("ENABLE_TRANSPOSE", true)); - jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_OFFSET", 0)); - jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_X_PITCH", 1)); - jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_Y_PITCH", "INPUT0_FEATURE_PITCH")); - jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_FEATURE_PITCH", "INPUT0_Y_PITCH")); - jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_BATCH_PITCH", "INPUT0_BATCH_PITCH")); } if (!params.is_chatglm && (params.inputs[1].has_dynamic_pad() || params.inputs[2].has_dynamic_pad())) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.cpp index 7556debd29df00..e2a538750d1615 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.cpp @@ -4,6 +4,7 @@ #include "sdpa_kernel_base.h" #include "kernel_selector_utils.h" +#include "intel_gpu/runtime/debug_configuration.hpp" namespace kernel_selector { @@ -73,6 +74,8 @@ JitConstants SDPAKernelBase::GetJitConstants(const sdpa_params& params) const { jit.AddConstant(MakeJitConstant("DO_BROADCAST_KEY_VALUE", GetBroadcastInputStr(params.inputs[0].GetDims().size(), params.conf.broadcast_axis, params.conf.group_size))); + } else { + jit.AddConstant(MakeJitConstant("BROADCAST_GROUP_SIZE", 1)); } jit.AddConstant(MakeJitConstant("IS_CAUSAL", params.conf.is_causal)); @@ -81,6 +84,21 @@ JitConstants SDPAKernelBase::GetJitConstants(const sdpa_params& params) const { jit.AddConstant(MakeJitConstant("HAS_SCALE_INPUT", params.inputs.size() > 4)); } + jit.AddConstant(MakeJitConstant("IS_KV_COMPRESSED", params.conf.is_kv_compressed)); + + if (params.conf.is_kv_compressed) { + jit.AddConstant(MakeJitConstant("USE_ASYMMETRIC_QUANTIZATION", params.conf.use_asymmetric_quantization)); + jit.AddConstant(MakeJitConstant("COMBINE_SCALES_AND_ZP", params.conf.combine_scales_and_zp)); + jit.AddConstant(MakeJitConstant("COMPRESSED_PER_HEAD", params.conf.per_head_quantization)); + jit.AddConstant(MakeJitConstant("KEY_COMPRESSION_SCALE", params.key_cache_comp_scale)); + jit.AddConstant(MakeJitConstant("VALUE_COMPRESSION_SCALE", params.value_cache_comp_scale)); + + if (params.conf.use_asymmetric_quantization && !params.conf.combine_scales_and_zp) { + jit.AddConstant(MakeJitConstant("KEY_COMPRESSION_ZP", params.key_cache_comp_zp)); + jit.AddConstant(MakeJitConstant("VALUE_COMPRESSION_ZP", params.value_cache_comp_zp)); + } + } + auto is_default_order = [](const std::vector& order) { for (size_t i = 0; i < order.size(); i++) if (order[i] != static_cast(i)) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h index 492e86ebcce5cc..493bd0acedea32 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h @@ -88,6 +88,10 @@ struct sdpa_configuration { bool is_causal = false; bool has_alibi_input = false; + bool is_kv_compressed = false; + bool use_asymmetric_quantization = false; + bool combine_scales_and_zp = false; + bool per_head_quantization = false; // Paged Attention configuration bool is_paged_attention = false; @@ -110,6 +114,10 @@ struct sdpa_params : public base_params { int64_t indirect_axis = -1; DataTensor beam_table; + DataTensor key_cache_comp_scale; + DataTensor key_cache_comp_zp; + DataTensor value_cache_comp_scale; + DataTensor value_cache_comp_zp; sdpa_configuration conf; }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp index 974d4532c84e60..a6fa66f4799d3f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp @@ -344,6 +344,9 @@ bool SDPAKernelMicro::Validate(const Params& p) const { if (params.conf.head_size > 256) return false; + if (params.conf.is_kv_compressed) + return false; + return true; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp index 6942e5f8ea4357..4e71064efbc895 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp @@ -4,6 +4,7 @@ #include "sdpa_kernel_opt.h" #include "kernel_selector_utils.h" +#include "common_types.h" #include #include @@ -21,11 +22,16 @@ constexpr size_t subgroup_size = 16; } // namespace static size_t get_sg_number_scale_factor(const sdpa_params& sdpa_params, size_t kernel_type) { + const size_t optimal_scale_factor = 2; if (kernel_type == KernelsTypes::MULTI_TOKENS) { - const size_t optimal_scale_factor = 2; if (sdpa_params.conf.head_size * optimal_scale_factor <= sdpa_params.engineInfo.maxWorkGroupSize) { return optimal_scale_factor; } + } else if (kernel_type == KernelsTypes::SINGLE_TOKEN) { + if (sdpa_params.conf.head_size * optimal_scale_factor <= sdpa_params.engineInfo.maxWorkGroupSize && + sdpa_params.conf.head_size * optimal_scale_factor / subgroup_size <= subgroup_size) { + return optimal_scale_factor; + } } return 1; @@ -126,6 +132,7 @@ static std::string GetKernelName(std::string base_name, KernelsTypes type, const ParamsKey SDPAKernelOpt::GetSupportedKey() const { ParamsKey k; + k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::INT32); @@ -154,6 +161,9 @@ bool SDPAKernelOpt::Validate(const Params& p) const { if (params.conf.head_size < 1 || params.conf.head_size % subgroup_size != 0) return false; + if (params.conf.use_asymmetric_quantization && !params.conf.combine_scales_and_zp) + return false; + return true; } @@ -233,10 +243,11 @@ CommonDispatchData SDPAKernelOpt::SetDefault(const sdpa_params& params, size_t k const size_t target_seq_len_block_size = kernel_idx == 1 ? get_target_seq_len_block_size() : 1; if (kernel_idx == KernelsTypes::SINGLE_TOKEN) { + const size_t sg_num_scale = get_sg_number_scale_factor(params, kernel_idx); dispatch_data.gws = { batch_size * heads_num, CeilDiv(target_seq_len, target_seq_len_block_size), - head_size * num_of_partitions }; - dispatch_data.lws = { 1, 1, head_size }; + head_size * num_of_partitions * sg_num_scale }; + dispatch_data.lws = { 1, 1, head_size * sg_num_scale }; } else if (kernel_idx == KernelsTypes::MULTI_TOKENS) { const size_t sg_num_scale = get_sg_number_scale_factor(params, kernel_idx); dispatch_data.gws = { batch_size * heads_num, @@ -309,8 +320,20 @@ KernelsData SDPAKernelOpt::GetKernelsData(const Params& params) const { static_cast(prim_params.outputs.size()), prim_params.is_shape_agnostic); - if (prim_params.indirect_axis != -1 && kernel_idx != KernelsTypes::FINALIZATION) - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, static_cast(prim_params.inputs.size())}); + auto beam_table_idx = prim_params.inputs.size(); + if (prim_params.conf.is_kv_compressed && kernel_idx != KernelsTypes::FINALIZATION) { + auto key_cache_compression_scale_idx = static_cast(prim_params.inputs.size()); + auto value_cache_compression_scale_idx = static_cast(prim_params.inputs.size() + 1); + + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, key_cache_compression_scale_idx}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, value_cache_compression_scale_idx}); + + beam_table_idx += 2; + } + + if (prim_params.indirect_axis != -1 && kernel_idx != KernelsTypes::FINALIZATION) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, static_cast(beam_table_idx)}); + } kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_ref.cpp index 579c4bc06c17e2..0d551883b6c385 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_ref.cpp @@ -12,6 +12,7 @@ namespace kernel_selector { ParamsKey SDPAKernelRef::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::F32); // beam table input k.EnableInputDataType(Datatype::INT32); @@ -74,8 +75,26 @@ KernelsData SDPAKernelRef::GetKernelsData(const Params& params) const { "", false, false, static_cast(prim_params.inputs.size()), GetFusedPrimitiveInputsCount(params), 1, prim_params.is_shape_agnostic); - if (prim_params.indirect_axis != -1) - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, static_cast(prim_params.inputs.size())}); + auto beam_table_idx = prim_params.inputs.size(); + if (prim_params.conf.is_kv_compressed) { + auto key_cache_compression_scale_idx = static_cast(prim_params.inputs.size()); + auto value_cache_compression_scale_idx = static_cast(prim_params.inputs.size() + 1); + + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, key_cache_compression_scale_idx}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, value_cache_compression_scale_idx}); + + if (prim_params.conf.use_asymmetric_quantization && !prim_params.conf.combine_scales_and_zp) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, key_cache_compression_scale_idx + 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, value_cache_compression_scale_idx + 2}); + beam_table_idx += 2; + } + + beam_table_idx += 2; + } + + if (prim_params.indirect_axis != -1) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, static_cast(beam_table_idx)}); + } kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); diff --git a/src/plugins/intel_gpu/src/plugin/common_utils.cpp b/src/plugins/intel_gpu/src/plugin/common_utils.cpp index 2daeb09c90a5bf..ddd6b5677adc45 100644 --- a/src/plugins/intel_gpu/src/plugin/common_utils.cpp +++ b/src/plugins/intel_gpu/src/plugin/common_utils.cpp @@ -83,9 +83,11 @@ void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_p // For state conversions CASE(ov::element::f32, ov::element::f32, float, float); - CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16); CASE(ov::element::f32, ov::element::f16, float, ov::float16); CASE(ov::element::f16, ov::element::f32, ov::float16, float); + CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16); + CASE(ov::element::bf16, ov::element::f32, ov::bfloat16, float); + CASE(ov::element::bf16, ov::element::f16, ov::bfloat16, ov::float16); OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et); } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 15ff4447b4bafe..18e7a88fc42f3e 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/util/weights_path.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/runtime/itt.hpp" @@ -169,14 +170,17 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - m_config.get_property(ov::weights_path).empty()) + ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); + std::string weights_path = m_config.get_property(ov::weights_path); + if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && + !ov::util::validate_weights_path(weights_path)) return; OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); cldnn::BinaryOutputBuffer ob(model); + ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); // Inputs { @@ -257,6 +261,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RO}, + ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RO}, ov::PropertyName{ov::device::id.name(), PropertyMutability::RO}, ov::PropertyName{ov::execution_devices.name(), PropertyMutability::RO}, }; @@ -288,5 +293,13 @@ std::shared_ptr CompiledModel::create_sync_infer_request( return std::make_shared(std::static_pointer_cast(shared_from_this())); } + +void CompiledModel::release_memory() { +#ifdef ENABLE_ONEDNN_FOR_GPU + auto capacity = dnnl::get_primitive_cache_capacity(); + dnnl::set_primitive_cache_capacity(0); + dnnl::set_primitive_cache_capacity(capacity); +#endif +} } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/multi_tensor_variable_state.cpp b/src/plugins/intel_gpu/src/plugin/multi_tensor_variable_state.cpp index 7574b664b6b4b7..8173a29c1b35f8 100644 --- a/src/plugins/intel_gpu/src/plugin/multi_tensor_variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/multi_tensor_variable_state.cpp @@ -152,5 +152,64 @@ VariableState::Ptr VariableStateIndirectKVCache::get_beam_table_state() const { return m_hidden_states[1]; } +VariableStateIndirectKVCacheCompressed::VariableStateIndirectKVCacheCompressed( + const VariableStateInfo& info, + std::shared_ptr context, + std::shared_ptr shape_predictor, + const std::vector& output_layouts, + size_t beam_idx, + size_t concat_idx, + bool has_zp_state = false) + : VariableStateIndirectKVCache(info, context, shape_predictor, beam_idx, concat_idx), + m_has_zp_state(has_zp_state) { + OPENVINO_ASSERT((has_zp_state && output_layouts.size() == 3) || + (!has_zp_state && output_layouts.size() == 2), + "[GPU] Unexpected number of output layouts for VariableStateIndirectKVCacheCompressed"); + + const auto compression_scale_layout = output_layouts[1]; + VariableStateInfo compression_scale_state_info(info.m_id + "/comp_scale", compression_scale_layout); + m_hidden_states.push_back(std::make_shared(compression_scale_state_info, context, shape_predictor)); + + if (has_zp_state) { + const auto compression_zp_layout = output_layouts[2]; + VariableStateInfo compression_zp_state_info(info.m_id + "/comp_zp", compression_zp_layout); + m_hidden_states.push_back(std::make_shared(compression_zp_state_info, context, shape_predictor)); + } + + OPENVINO_ASSERT((!m_has_zp_state && m_hidden_states.size() == 3) || (m_has_zp_state && m_hidden_states.size() == 4), + "[GPU] VariableStateIndirectKVCacheCompressed expects 3 or 4 internal states to be initialized, " + "actual number is ", m_hidden_states.size()); +} + +VariableState::Ptr VariableStateIndirectKVCacheCompressed::get_compression_scale_state() const { + return m_hidden_states[2]; +} + +void VariableStateIndirectKVCacheCompressed::set_compression_scale_layout(const cldnn::layout& new_layout) { + m_hidden_states[2]->set_layout(new_layout); +} + +VariableState::Ptr VariableStateIndirectKVCacheCompressed::get_compression_zp_state() const { + OPENVINO_ASSERT(m_has_zp_state); + return m_hidden_states[3]; +} + +void VariableStateIndirectKVCacheCompressed::set_compression_zp_layout(const cldnn::layout& new_layout) { + OPENVINO_ASSERT(m_has_zp_state); + m_hidden_states[3]->set_layout(new_layout); +} + +bool VariableStateIndirectKVCacheCompressed::has_zp_state() const { + return m_has_zp_state; +} + +void VariableStateIndirectKVCacheCompressed::set_state(const ov::SoPtr& state) { + OPENVINO_THROW("[GPU] set_state API is supported only when KV-cache compression is disabled"); +} + +ov::SoPtr VariableStateIndirectKVCacheCompressed::get_state() const { + OPENVINO_THROW("[GPU] get_state API is supported only when KV-cache compression is disabled"); +} + } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp b/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp index 0373251e45c051..85f28cbd711678 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp @@ -7,6 +7,7 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/dynamic_quantize.hpp" + namespace ov { namespace intel_gpu { @@ -15,15 +16,12 @@ static void CreateDynamicQuantizeOp(ProgramBuilder& p, const std::shared_ptrget_group_sizes(); - for (size_t i = 0; i < group_sizes.size() - 1; i++) - OPENVINO_ASSERT(group_sizes[i] == 1, "Not supported group size at ", i, ": ", group_sizes[i]); - - OPENVINO_ASSERT(group_sizes.back() == UINT64_MAX, "Not supported group size: ", group_sizes.back()); auto prim = cldnn::dynamic_quantize(primitive_name, - inputs[0], - op->get_group_sizes().back(), - get_output_data_types(op)); + inputs[0], + op->get_attrs()); + + prim.num_outputs = op->get_output_size(); + p.add_primitive(*op, prim); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/kv_cache.cpp b/src/plugins/intel_gpu/src/plugin/ops/kv_cache.cpp index c2ee336e48bf06..251c7346db9209 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/kv_cache.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/kv_cache.hpp" +#include "intel_gpu/op/kv_cache_compressed.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/kv_cache.hpp" @@ -12,6 +13,7 @@ namespace ov { namespace op { namespace internal { using KVCache = ov::intel_gpu::op::KVCache; +using KVCacheCompressed = ov::intel_gpu::op::KVCacheCompressed; } // namespace internal } // namespace op } // namespace ov @@ -26,11 +28,31 @@ void CreateKVCacheOp(ProgramBuilder& p, const std::shared_ptrget_input_partial_shape(0).size(); auto prim = cldnn::kv_cache(layer_type_name_ID(op), - inputs, - op->get_variable()->get_info(), - ov::util::normalize(op->get_concat_axis(), rank), - ov::util::normalize(op->get_gather_axis(), rank), - op->get_indirect()); + inputs, + op->get_variable()->get_info(), + ov::util::normalize(op->get_concat_axis(), rank), + ov::util::normalize(op->get_gather_axis(), rank), + op->get_indirect()); + + prim.num_outputs = op->get_output_size(); + prim.output_data_types = get_output_data_types(op); + + p.add_primitive(*op, prim); +} + +void CreateKVCacheCompressedOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {4, 5}); + auto inputs = p.GetInputInfo(op); + int64_t rank = op->get_input_partial_shape(0).size(); + auto prim = cldnn::kv_cache(layer_type_name_ID(op), + inputs, + op->get_variable()->get_info(), + ov::util::normalize(op->get_concat_axis(), rank), + ov::util::normalize(op->get_gather_axis(), rank), + op->get_indirect()); + + prim.compressed = true; + prim.quantization_attributes = op->get_quantization_attrs(); prim.num_outputs = op->get_output_size(); prim.output_data_types = get_output_data_types(op); @@ -41,6 +63,7 @@ void CreateKVCacheOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {4, 5, 6}); auto inputs = p.GetInputInfo(op); auto layerName = layer_type_name_ID(op); bool is_causal = op->get_causal(); + const auto compression_inputs = op->get_compression_inputs_num(); + validate_inputs_count(op, {4 + compression_inputs, 5 + compression_inputs, 6 + compression_inputs}); + int64_t indirect_axis = op->get_indirect_axis(); auto sdpa_prim = cldnn::scaled_dot_product_attention(layerName, inputs, @@ -75,7 +77,9 @@ static void CreateIndirectSDPAOp(ProgramBuilder& p, const std::shared_ptrget_input0_transpose_order(), op->get_input1_transpose_order(), op->get_input2_transpose_order(), - op->get_output_transpose_order()); + op->get_output_transpose_order(), + op->get_quantization_attrs(), + op->get_kv_compressed()); p.add_primitive(*op, sdpa_prim); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/split.cpp b/src/plugins/intel_gpu/src/plugin/ops/split.cpp index 32f0c9a7f36e1d..8a36cfa30995fa 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/split.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/split.cpp @@ -93,20 +93,25 @@ static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptrget_output_size(); i++) { const auto outPartialShape = op->get_output_partial_shape(i); if (outPartialShape.size() != start_offset.size()) { - OPENVINO_THROW("Invalid dimesions in split layer: ", op->get_friendly_name(), + OPENVINO_THROW("Invalid dimensions in split layer: ", op->get_friendly_name(), " output: ", op->get_output_tensor(i).get_any_name()); } for (size_t idx = 0; idx < input_pshape.size(); idx++) { if ((outPartialShape[idx].get_length() + static_cast(start_offset[idx])) > input_pshape[idx].get_length()) { - OPENVINO_THROW("Invalid dimesions in split layer: ", op->get_friendly_name(), + OPENVINO_THROW("Invalid dimensions in split layer: ", op->get_friendly_name(), " output: ", op->get_output_tensor(idx).get_any_name()); } } auto offsetTensor = tensor_from_dims(start_offset, 0); auto outTensor = tensor_from_dims(op->get_output_shape(i), 1); - auto cropPrim = cldnn::crop(get_layer_name(i), inputs[0], outTensor, offsetTensor); - p.add_primitive(*op, cropPrim); + + const auto& users = op->get_output_target_inputs(i); + // don't add crop primitive if port is not used by anyone + if (users.size() != 0) { + auto cropPrim = cldnn::crop(get_layer_name(i), inputs[0], outTensor, offsetTensor); + p.add_primitive(*op, cropPrim); + } for (size_t idx = 0; idx < input_pshape.size(); idx++) { if (outPartialShape[idx] != input_pshape[idx]) { diff --git a/src/plugins/intel_gpu/src/plugin/ops/variable.cpp b/src/plugins/intel_gpu/src/plugin/ops/variable.cpp index d655e297e4a2c6..a4354c51092ac8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/variable.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/variable.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // + #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "openvino/core/type/element_type.hpp" @@ -9,6 +10,7 @@ #include "openvino/op/read_value.hpp" #include "transformations/rt_info/original_precision_attribute.hpp" #include "intel_gpu/op/read_value.hpp" +#include "intel_gpu/op/read_values.hpp" #include "intel_gpu/primitives/assign.hpp" #include "intel_gpu/primitives/read_value.hpp" @@ -16,6 +18,7 @@ namespace ov { namespace op { namespace internal { using ReadValue = ov::intel_gpu::op::ReadValue; +using ReadValues = ov::intel_gpu::op::ReadValues; } // namespace internal } // namespace op } // namespace ov @@ -39,7 +42,7 @@ void CreateVariableAccessPrimitive(ProgramBuilder &p, const std::shared_ptr CreateVariableAccessPrimitive(p, op, op->get_variable_id()); } +void CreateReadValuesOp(ProgramBuilder& p, const std::shared_ptr& op) { + std::vector variable_layouts; + for (size_t i = 0; i < op->get_output_size(); i++) { + const auto output_pshape = op->get_output_partial_shape(i); + const auto output_dtype = cldnn::element_type_to_data_type(op->get_output_element_type(i)); + const auto output_format = cldnn::format::get_default_format(output_pshape.size()); + variable_layouts.emplace_back(output_pshape, output_dtype, output_format); + } + + auto inputs = p.GetInputInfo(op); + auto user_specified_type = get_original_precision(op); + auto prim = cldnn::read_value{layer_type_name_ID(op), + inputs, + op->get_variable_id(), + variable_layouts, + user_specified_type}; + + p.add_primitive(*op, prim); +} + } // namespace REGISTER_FACTORY_IMPL(v3, Assign); @@ -89,6 +112,7 @@ REGISTER_FACTORY_IMPL(v6, Assign); REGISTER_FACTORY_IMPL(v3, ReadValue); REGISTER_FACTORY_IMPL(v6, ReadValue); REGISTER_FACTORY_IMPL(internal, ReadValue); +REGISTER_FACTORY_IMPL(internal, ReadValues); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 9aba7ee1a117eb..7d010a9b590e2e 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -35,6 +35,7 @@ #include "openvino/runtime/performance_heuristics.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" +#include "openvino/util/weights_path.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" @@ -330,8 +331,16 @@ std::shared_ptr Plugin::import_model(std::istream& model, cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + ov::CacheMode cache_mode; + ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); + + if (cache_mode != config.get_property(ov::cache_mode)) { + return nullptr; + } + + std::string weights_path = config.get_property(ov::weights_path); if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - config.get_property(ov::weights_path).empty()) { + !ov::util::validate_weights_path(weights_path)) { return nullptr; } @@ -540,6 +549,7 @@ std::vector Plugin::get_caching_properties() const { ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}, }; return caching_properties; @@ -585,7 +595,8 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW}, ov::PropertyName{ov::device::id.name(), PropertyMutability::RW}, - ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW} + ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW} }; return supported_properties; diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 510d715e7ac805..899110872ba633 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -305,12 +305,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (auto data_prim = dynamic_cast(prim.get())) { - auto rt_info = op.get_rt_info(); - auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); - if (weightless_cache_attr != rt_info.end()) { - data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; - data_prim->original_size = weightless_cache_attr->second.as().original_size; + if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (auto data_prim = dynamic_cast(prim.get())) { + auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_cache_attr != rt_info.end()) { + data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; + data_prim->original_size = + weightless_cache_attr->second.as().original_size; + } } } diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 58e99e037fb931..985336b801b9d3 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -8,6 +8,7 @@ #include "openvino/core/validation_util.hpp" #include "intel_gpu/primitives/kv_cache.hpp" +#include "intel_gpu/primitives/read_value.hpp" #include "intel_gpu/plugin/usm_host_tensor.hpp" #include "intel_gpu/plugin/sync_infer_request.hpp" #include "intel_gpu/plugin/remote_context.hpp" @@ -591,6 +592,12 @@ void SyncInferRequest::allocate_input(const ov::Output& port, si auto element_type = port.get_element_type(); m_user_inputs[input_idx] = { create_host_tensor(shape, element_type), TensorOwner::PLUGIN }; + if (element_type == ov::element::string) { + // In case the element type is string and input data is an empty string, + // it produces the segmentation fault unless the each element of tensor.data is initialized. + auto data = m_user_inputs.at(input_idx).ptr->data(); + std::uninitialized_fill_n(data, m_user_inputs.at(input_idx).ptr->get_size(), std::string()); + } ov::ISyncInferRequest::set_tensor(port, m_user_inputs.at(input_idx).ptr); } @@ -646,19 +653,40 @@ void SyncInferRequest::allocate_states() { bool indirect_kv_cache = false; int64_t beam_axis = 0; int64_t concat_axis = 0; + bool compressed = false; + bool has_zp_state = false; auto kv_cache_shape = vi.second.m_layout.get_partial_shape(); + std::vector states_layouts; for (auto& p : state_prims) { if (auto kv_cache_prim = dynamic_cast(p)) { indirect_kv_cache = kv_cache_prim->indirect; beam_axis = ov::util::normalize(kv_cache_prim->gather_axis, kv_cache_shape.size()); concat_axis = ov::util::normalize(kv_cache_prim->concat_axis, kv_cache_shape.size()); + compressed = kv_cache_prim->compressed; + has_zp_state = kv_cache_prim->get_compression_zp_inputs_num() > 0; + } else if (auto read_value = dynamic_cast(p)) { + states_layouts = read_value->output_layouts; } } - if (indirect_kv_cache) { - m_variables.emplace(vi.first, std::make_shared(vi.second, m_context, m_shape_predictor, beam_axis, concat_axis)); + if (compressed) { + m_variables.emplace(vi.first, std::make_shared(vi.second, + m_context, + m_shape_predictor, + states_layouts, + beam_axis, + concat_axis, + has_zp_state)); + } else if (indirect_kv_cache) { + m_variables.emplace(vi.first, std::make_shared(vi.second, + m_context, + m_shape_predictor, + beam_axis, + concat_axis)); } else { - m_variables.emplace(vi.first, std::make_shared(vi.second, m_context, m_shape_predictor)); + m_variables.emplace(vi.first, std::make_shared(vi.second, + m_context, + m_shape_predictor)); } } } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 315a93190fdc90..0ed17f36c21d22 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -57,13 +57,16 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto reshape_const_m = wrap_type(); auto reshape_m = wrap_type({mul_m, reshape_const_m}, reshape_3d_to_2d); + auto mul2_const_m = wrap_type(); + auto mul2_m = wrap_type({reshape_m, mul2_const_m}); + auto transpose_input = std::make_shared(OutputVector{reshape_m, mul_m}); auto transpose_const_m = wrap_type(); auto transpose_m = wrap_type({transpose_input, transpose_const_m}); auto data_m = any_input(); auto bias_m = any_input(); - auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m}); + auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m, mul2_m}); auto fully_connected_m = wrap_type({data_m, weights_input_m, bias_m}); ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { @@ -131,6 +134,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon std::shared_ptr fc_input_zp = optional_zero_point; std::shared_ptr fc_input_bias = pattern_map.at(bias_m).get_node_shared_ptr(); std::vector> result_nodes = {}; + if (has_transpose) { const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); std::shared_ptr transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); @@ -151,6 +155,11 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon } } + if (pattern_map.count(mul2_m)) { + auto mul2_op_const = std::dynamic_pointer_cast(pattern_map.at(mul2_const_m).get_node_shared_ptr()); + fc_input_scale = ov::op::util::eltwise_fold(fc_input_scale, mul2_op_const).get_node_shared_ptr(); + } + std::shared_ptr new_fc = nullptr; if (with_zero_point) { new_fc = std::make_shared(fc_input_a, @@ -171,6 +180,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon new_fc->set_friendly_name(fc->get_friendly_name()); ov::copy_runtime_info(m.get_matched_nodes(), result_nodes); ov::replace_node(fc, new_fc); + return true; }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index eb16213bcb936c..68328160a98f82 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -61,7 +61,14 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size auto rank = m_fc->get_input_partial_shape(0).size(); std::vector shape_group_size(rank, 1); shape_group_size.back() = group_size; - auto dyn_quan = std::make_shared(m_data, shape_group_size, element::f16); + + ov::op::internal::DynamicQuantize::Attributes config; + config.quantization_dt = element::i8; + config.quantization_type = ov::op::internal::DynamicQuantize::QuantizationType::Symmetric; + config.scale_dt = element::f16; + config.group_sizes = shape_group_size; + + auto dyn_quan = std::make_shared(m_data, config); auto optional_w_zp = m_fc->get_input_size() > 4 ? m_fc->get_input_node_shared_ptr(4) : std::make_shared(); auto output_type = m_fc->get_output_type(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.cpp new file mode 100644 index 00000000000000..618578919d4024 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fc_per_layer_scaling.hpp" + +#include "intel_gpu/op/fully_connected_compressed.hpp" +#include "intel_gpu/op/placeholder.hpp" + +#include "openvino/op/multiply.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +FullyConnectedPerLayerScaling::FullyConnectedPerLayerScaling(float scale_factor) { + using namespace ov::pass::pattern; + + auto data_m = any_input(); + auto weights_m = any_input(); + auto bias_m = any_input(); + auto fc_compressed_wo_zp_m = wrap_type({data_m, weights_m, bias_m, any_input()}, consumers_count(1)); + auto fc_compressed_w_zp_m = wrap_type({data_m, weights_m, bias_m, any_input(), any_input()}, consumers_count(1)); + auto fc_compressed_m = std::make_shared(OutputVector{fc_compressed_wo_zp_m, fc_compressed_w_zp_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) { + if (scale_factor == 0.f || scale_factor == 1.f) + return false; + auto fc = std::dynamic_pointer_cast(m.get_match_root()); + if (!fc || transformation_callback(fc)) + return false; + + const auto& pattern_map = m.get_pattern_value_map(); + const auto& data = pattern_map.at(data_m).get_node_shared_ptr(); + const auto& bias = pattern_map.at(bias_m).get_node_shared_ptr(); + + ov::Shape scale_const_shape = {1}; + std::vector scale_down_value = {(1.f / scale_factor)}; + std::vector scale_up_value = {scale_factor}; + std::shared_ptr scale_down_const_f16 = std::make_shared(ov::element::f16, scale_const_shape, scale_down_value); + std::shared_ptr scale_down_const_f32 = std::make_shared(ov::element::f32, scale_const_shape, scale_down_value); + std::shared_ptr scale_up_const_f16 = std::make_shared(ov::element::f16, scale_const_shape, scale_up_value); + std::shared_ptr scale_up_const_f32 = std::make_shared(ov::element::f32, scale_const_shape, scale_up_value); + + std::shared_ptr scale_down_const = (data->get_element_type() == ov::element::f16) ? scale_down_const_f16 : scale_down_const_f32; + auto scale_down = std::make_shared(data, scale_down_const); + scale_down->set_friendly_name(fc->get_friendly_name() + "_scale_down"); + ov::copy_runtime_info(fc, scale_down); + fc->input(0).replace_source_output(scale_down); + + // If FC has bias as input, scaling must be applied to bias as well + if (!std::dynamic_pointer_cast(bias)) { + std::shared_ptr bias_scale_down_const = (bias->get_element_type() == ov::element::f16) ? scale_down_const_f16 : scale_down_const_f32; + auto bias_scale_down = std::make_shared(bias, bias_scale_down_const); + bias_scale_down->set_friendly_name(fc->get_friendly_name() + "_bias_scale_down"); + ov::copy_runtime_info(fc, bias_scale_down); + fc->input(2).replace_source_output(bias_scale_down); + } + + auto target_inputs = fc->get_output_target_inputs(0); + std::shared_ptr scale_up_const = (fc->get_element_type() == ov::element::f16) ? scale_up_const_f16 : scale_up_const_f32; + auto scale_up = std::make_shared(fc, scale_up_const); + scale_up->set_friendly_name(fc->get_friendly_name() + "_scale_up"); + ov::copy_runtime_info(fc, scale_up); + for (auto& in : target_inputs) { + in.replace_source_output(scale_up); + } + + return true; + }; + + auto m = std::make_shared(fc_compressed_m, "FullyConnectedPerLayerScaling"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.hpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.hpp new file mode 100644 index 00000000000000..5c0d7d07f5b411 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_per_layer_scaling.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class FullyConnectedPerLayerScaling: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("FullyConnectedPerLayerScaling", "0"); + FullyConnectedPerLayerScaling(float scale_factor); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.cpp new file mode 100644 index 00000000000000..561822f9661109 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.cpp @@ -0,0 +1,292 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "kv_cache_compression.hpp" + +#include "intel_gpu/op/kv_cache.hpp" +#include "intel_gpu/op/kv_cache_compressed.hpp" +#include "intel_gpu/op/indirect_sdpa.hpp" +#include "intel_gpu/op/read_value.hpp" +#include "intel_gpu/op/read_values.hpp" +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" +#include "ov_ops/dynamic_quantize.hpp" + +#include "openvino/core/node_vector.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/sink.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/visualize_tree.hpp" +#include "transformations/utils/utils.hpp" + +#include + +namespace ov { +namespace intel_gpu { + +namespace { +std::vector get_variable_infos(const ov::op::util::VariableInfo& data_variable_info, + const ov::op::internal::DynamicQuantize::Attributes& quantization_attrs) { + std::vector infos; + + // Add initial data variable info + infos.push_back(data_variable_info); + + // Infer DQ shapes + ov::op::internal::DynamicQuantize dq; + dq.set_attrs(quantization_attrs); + + auto dq_shapes = ov::op::internal::DynamicQuantize::shape_infer(&dq, {data_variable_info.data_shape}); + + const auto variable_id = data_variable_info.variable_id; + const auto scale_shape = dq_shapes[1]; + const auto scale_dt = quantization_attrs.scale_dt; + + // Add scales variable info + infos.push_back(ov::op::util::VariableInfo{scale_shape, scale_dt, variable_id}); + + if (quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + // Add zero points variable info + const auto zp_dt = quantization_attrs.zp_dt; + infos.push_back(ov::op::util::VariableInfo{scale_shape, zp_dt, variable_id}); + } + + return infos; +} + +std::shared_ptr + update_past_read_value(std::shared_ptr past_rv_node, + const ov::op::internal::DynamicQuantize::Attributes& quantization_attrs) { + auto variable = past_rv_node->get_variable(); + variable->update_data_type(quantization_attrs.quantization_dt); + + auto variable_infos = get_variable_infos(past_rv_node->get_variable()->get_info(), quantization_attrs); + auto new_past_rv_node = std::make_shared(); + + if (past_rv_node->get_input_size() == 0) { + new_past_rv_node = std::make_shared(past_rv_node->get_variable(), variable_infos); + } else { + auto initializer_dq = std::make_shared(past_rv_node->get_input_node_shared_ptr(0), + quantization_attrs); + initializer_dq->set_friendly_name(past_rv_node->get_input_node_shared_ptr(0)->get_friendly_name() + "_dyn_quan"); + ov::copy_runtime_info(past_rv_node->get_input_node_shared_ptr(0), initializer_dq); + + OutputVector initializer_outputs = { initializer_dq->output(0), initializer_dq->output(1) }; + + if (quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + initializer_outputs.push_back(initializer_dq->output(2)); + + new_past_rv_node = std::make_shared(initializer_outputs, past_rv_node->get_variable(), variable_infos); + } + + ov::copy_runtime_info(past_rv_node, new_past_rv_node); + past_rv_node->output(0).replace(new_past_rv_node->output(0)); + + return new_past_rv_node; +} + +std::shared_ptr + update_kv_cache(std::shared_ptr past_rv_node, + std::shared_ptr kv_cache_node, + const ov::op::internal::DynamicQuantize::Attributes& quantization_attrs) { + OutputVector kv_cache_inputs = { past_rv_node->output(0), + kv_cache_node->get_input_node_shared_ptr(1), + kv_cache_node->get_input_node_shared_ptr(2), + past_rv_node->output(1) }; + + if (quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + kv_cache_inputs.push_back(past_rv_node->output(2)); + + auto new_kv_cache = std::make_shared(kv_cache_inputs, + kv_cache_node->get_variable(), + kv_cache_node->get_concat_axis(), + kv_cache_node->get_gather_axis(), + quantization_attrs); + + new_kv_cache->set_friendly_name(kv_cache_node->get_friendly_name()); + ov::copy_runtime_info(kv_cache_node, new_kv_cache); + + return new_kv_cache; +} +} // namespace + +class KVCacheCompressionMatcher : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("KVCacheCompressionMatcher", "0"); + KVCacheCompressionMatcher(ov::element::Type compression_dt); +}; + +KVCacheCompressionMatcher::KVCacheCompressionMatcher(ov::element::Type compression_dt) { + using namespace ov::pass::pattern; + + if (compression_dt != element::i8) + return; + + const auto quantization_type = ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric; + const auto output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; + + bool combine_scales_and_zp = output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; + GPU_DEBUG_LOG << "KV-cache compression configuration: " + << "dt=" << compression_dt << ", " + << "asym=" << (quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric) << ", " + << "single_buffer_for_scales_and_zp=" << combine_scales_and_zp << "\n"; + + auto query = any_input(); + + auto key_past = wrap_type(); + auto key_new_token = any_input(); + auto key_beam_idx = any_input(); + auto key_cache = wrap_type({key_past, key_new_token, key_beam_idx}); + + auto value_past = wrap_type(); + auto value_new_token = any_input(); + auto value_beam_idx = any_input(); + auto value_cache = wrap_type({value_past, value_new_token, value_beam_idx}); + + auto input_attn_mask = any_input(); + auto input_scale = any_input(); + auto input_beam_table = any_input(); + + auto sdpa_without_attn_mask_m = wrap_type({ query, key_cache, value_cache, input_beam_table }); + auto sdpa_with_attn_mask_m = wrap_type({ query, key_cache, value_cache, input_attn_mask, input_beam_table }); + auto sdpa_with_attn_mask_and_scale_m = + wrap_type({ query, key_cache, value_cache, input_attn_mask, input_scale, input_beam_table }); + + auto sdpa = std::make_shared(OutputVector{sdpa_without_attn_mask_m, sdpa_with_attn_mask_m, sdpa_with_attn_mask_and_scale_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { + if (transformation_callback(m.get_match_root())) { + return false; + } + + const auto& pattern_map = m.get_pattern_value_map(); + + auto query_node = pattern_map.at(query).get_node_shared_ptr(); + + auto key_new_token_node = pattern_map.at(key_new_token).get_node_shared_ptr(); + auto key_cache_node = std::dynamic_pointer_cast(pattern_map.at(key_cache).get_node_shared_ptr()); + auto value_cache_node = std::dynamic_pointer_cast(pattern_map.at(value_cache).get_node_shared_ptr()); + auto sdpa_node = std::dynamic_pointer_cast(m.get_match_root()); + + auto key_past_rv_node = std::dynamic_pointer_cast(pattern_map.at(key_past).get_node_shared_ptr()); + auto value_past_rv_node = std::dynamic_pointer_cast(pattern_map.at(value_past).get_node_shared_ptr()); + + auto data_rank = key_cache_node->get_input_partial_shape(0).size(); + auto get_shape_group_sizes = [&](const std::vector& transposed_order) { + std::vector group_sizes(data_rank, 1); + std::vector order = transposed_order; + if (transposed_order.size() != data_rank) { + order.resize(data_rank); + std::iota(order.begin(), order.end(), 0); + } + + group_sizes[order[data_rank - 1]] = UINT64_MAX; + + return group_sizes; + }; + + // Reorder scales in static order: [batch, num_heads, seq_len, head_size] + auto get_scales_output_order = [&](const std::vector& transposed_order) { + std::vector scales_zp_output_order(data_rank); + scales_zp_output_order[0] = transposed_order[0]; + scales_zp_output_order[1] = transposed_order[1]; + scales_zp_output_order[2] = transposed_order[2]; + scales_zp_output_order[3] = transposed_order[3]; + + return scales_zp_output_order; + }; + + ov::op::internal::DynamicQuantize::Attributes config; + config.quantization_type = quantization_type; + config.group_sizes = get_shape_group_sizes(sdpa_node->get_input1_transpose_order()); + config.quantization_dt = element::i8; + config.scale_dt = query_node->get_output_element_type(0); + config.scales_zp_output_order = get_scales_output_order(sdpa_node->get_input1_transpose_order()); + config.output_storage_type = output_storage_type; + + if (config.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric) + config.zp_dt = query_node->get_output_element_type(0); + + key_past_rv_node = update_past_read_value(key_past_rv_node, config); + value_past_rv_node = update_past_read_value(value_past_rv_node, config); + + auto new_key_cache = update_kv_cache(key_past_rv_node, key_cache_node, config); + auto new_value_cache = update_kv_cache(value_past_rv_node, value_cache_node, config); + + OutputVector sdpa_inputs; + // Add Query, Key, Value, attention_mask, scale inputs + for (size_t i = 0; i < sdpa_node->get_input_size() - 1; i++) + sdpa_inputs.push_back(sdpa_node->get_input_node_shared_ptr(i)); + + // Replace Key and Value inputs with compressed ones + sdpa_inputs[1] = new_key_cache->output(0); + sdpa_inputs[2] = new_value_cache->output(0); + + // Add Key and Value compression scales + sdpa_inputs.push_back(new_key_cache->output(2)); + sdpa_inputs.push_back(new_value_cache->output(2)); + + // Add Key and Value compression zero points + if (config.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + config.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + sdpa_inputs.push_back(new_key_cache->output(3)); + sdpa_inputs.push_back(new_value_cache->output(3)); + } + + auto input0_transpose_order = sdpa_node->get_input0_transpose_order(); + auto input1_transpose_order = sdpa_node->get_input1_transpose_order(); + auto input2_transpose_order = sdpa_node->get_input2_transpose_order(); + auto output_transpose_order = sdpa_node->get_output_transpose_order(); + + auto new_sdpa = std::make_shared(sdpa_inputs, + new_key_cache->output(1), + sdpa_node->get_causal(), + sdpa_node->get_indirect_axis(), + input0_transpose_order, + input1_transpose_order, + input2_transpose_order, + output_transpose_order, + config, + sdpa_node->get_output_type()); + + new_key_cache->set_friendly_name(key_cache_node->get_friendly_name()); + ov::copy_runtime_info(key_cache_node, new_key_cache); + + new_value_cache->set_friendly_name(value_cache_node->get_friendly_name()); + ov::copy_runtime_info(value_cache_node, new_value_cache); + + new_sdpa->set_friendly_name(sdpa_node->get_friendly_name()); + ov::copy_runtime_info(sdpa_node, new_sdpa); + + ov::replace_node(sdpa_node, new_sdpa); + return true; + }; + + auto m = std::make_shared(sdpa, "KVCacheCompressionMatcher"); + this->register_matcher(m, callback); +} + +bool KVCacheCompression::run_on_model(const std::shared_ptr& m) { + return pass::GraphRewrite::run_on_model(m); +} + +KVCacheCompression::KVCacheCompression(ov::element::Type compression_dt) { + add_matcher(compression_dt); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp new file mode 100644 index 00000000000000..1587021a03ed36 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + + +/// Add dynamic quantization node and fuse it with KV cache operation +/// +/// ┌───────────┐ ┌─────────────┐ ┌───────────┐ ┌─────────────┐ +/// │ New Key │ │ New Value │ │ New Key │ │ New Value │ +/// └──────┬────┘ └──────┬──────┘ └──────┬────┘ └──────┬──────┘ +/// │ │ │ │ +/// │ f16 │ f16 │ f16 │ f16 +/// │ │ ==> │ │ +/// ┌─────────┐ ┌────────┴─────────┐ ┌────────┴───────────┐ ┌─────────┐ ┌────────┴─────────┐ ┌────────┴───────────┐ +/// │ Query │ │ KV cache │ │ KV cache │ │ Query │ │ KV cache + DQ │ │ KV cache + DQ │ +/// | | | (Key) | (Value) | | | | (Key) | | (Value) | +/// └───┬─────┘ └────────┬─────────┘ └────────┬───────────┘ └────┬────┘ └────────┬─────────┘ └────────┬───────────┘ +/// │ │ │ │ │ │ +/// │ f16 │ f16 │ f16 │ f16 i8:data │ f16:scale i8:data │ f16:scale +/// │ │ │ │ │ │ +/// │ │ │ │ │ │ +/// │ ┌────┴───┐ │ │ ┌────┴───┐ │ +/// └─────────────┤ SDPA ├─────────────────┘ └─────────────┤ SDPA ├────────────────────┘ +/// └────────┘ └────────┘ + +class KVCacheCompression : public ov::pass::GraphRewrite { +public: + OPENVINO_RTTI("KVCacheCompression", "0"); + KVCacheCompression(ov::element::Type compression_dt); + + bool run_on_model(const std::shared_ptr& m) override; +}; + + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/indirect_sdpa.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/indirect_sdpa.cpp index 681c88119efd95..73e916064a0c1c 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/indirect_sdpa.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/indirect_sdpa.cpp @@ -25,30 +25,63 @@ IndirectSDPA::IndirectSDPA(const OutputVector& data_inputs, validate_and_infer_types(); } +IndirectSDPA::IndirectSDPA(const OutputVector& data_inputs, + const ov::Output& beam_table, + const bool is_causal, + const int64_t indirect_axis, + const std::vector& order_q, + const std::vector& order_k, + const std::vector& order_v, + const std::vector& order_out, + const QuantizationAttribute& quantization_attribute, + const ov::element::Type output_type) + : ov::intel_gpu::op::SDPA(data_inputs, is_causal, order_q, order_k, order_v, order_out, quantization_attribute, output_type) + , m_indirect_axis(indirect_axis) { + auto beam_table_idx = data_inputs.size(); + set_argument(beam_table_idx, beam_table); + validate_and_infer_types(); +} + std::shared_ptr IndirectSDPA::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); // Exclude beam_table input OutputVector data_inputs(new_args.begin(), new_args.end() - 1); - return std::make_shared(data_inputs, - new_args.back(), - m_is_causal, - m_indirect_axis, - m_order_q, - m_order_k, - m_order_v, - m_order_out, - m_output_type); + if (m_compressed) { + return std::make_shared(data_inputs, + new_args.back(), + m_is_causal, + m_indirect_axis, + m_order_q, + m_order_k, + m_order_v, + m_order_out, + m_output_type); + } else { + return std::make_shared(data_inputs, + new_args.back(), + m_is_causal, + m_indirect_axis, + m_order_q, + m_order_k, + m_order_v, + m_order_out, + m_quantization_attrs, + m_output_type); + } } void IndirectSDPA::validate_and_infer_types() { const auto input_size = get_input_size(); + + const auto compression_inputs = get_compression_inputs_num(); NODE_VALIDATION_CHECK(this, - input_size == 4 || input_size == 5 || input_size == 6, + input_size >= 4 + compression_inputs && input_size <= 6 + compression_inputs, "Number of inputs is incorrect. Current value is: ", input_size, - ", expected 4, 5 or 6."); + ", expected 4, 5 or 6 data inputs and ", compression_inputs, " KV-cache compression related inputs"); + std::vector input_shapes; for (size_t i = 0; i < input_size - 1; i++) { diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp index a598e556a8f05d..12d961be6d337a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/kv_cache.hpp" +#include "intel_gpu/op/kv_cache_compressed.hpp" #include "gather_shape_inference.hpp" #include "concat_shape_inference.hpp" #include "openvino/core/partial_shape.hpp" @@ -13,19 +14,28 @@ namespace ov { namespace intel_gpu { namespace op { -KVCache::KVCache(const Output& past, - const Output& new_token_data, - const Output& beam_idx, +KVCache::KVCache(const OutputVector& inputs, const std::shared_ptr& past_variable, + bool indirect, int64_t concat_axis, int64_t gather_axis, const ov::element::Type output_type) - : Op({past, new_token_data, beam_idx}) + : Op(inputs) , m_concat_axis(concat_axis) , m_gather_axis(gather_axis) - , m_indirect(true) + , m_indirect(indirect) , m_output_type(output_type) { m_variable = past_variable; +} + +KVCache::KVCache(const Output& past, + const Output& new_token_data, + const Output& beam_idx, + const std::shared_ptr& past_variable, + int64_t concat_axis, + int64_t gather_axis, + const ov::element::Type output_type) + : KVCache({past, new_token_data, beam_idx}, past_variable, true, concat_axis, gather_axis, output_type) { if (m_indirect) set_output_size(2); validate_and_infer_types(); @@ -36,11 +46,7 @@ KVCache::KVCache(const Output& past, const std::shared_ptr& past_variable, int64_t concat_axis, const ov::element::Type output_type) - : Op({past, new_token_data}) - , m_concat_axis(concat_axis) - , m_gather_axis(0) - , m_indirect(false) - , m_output_type(output_type) { + : KVCache({past, new_token_data}, past_variable, false, concat_axis, 0, output_type) { m_variable = past_variable; validate_and_infer_types(); } @@ -54,14 +60,23 @@ bool KVCache::visit_attributes(ov::AttributeVisitor& visitor) { } void KVCache::validate_and_infer_types() { - auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + auto output_type = m_output_type; + if (m_output_type == ov::element::undefined) { + output_type = get_input_element_type(0); + } + std::vector input_shapes = {m_variable->get_info().data_shape, get_input_partial_shape(1)}; - if (get_output_size() == 2) + if (m_indirect) { input_shapes.push_back(get_input_partial_shape(2)); + } + auto shapes = shape_infer(this, input_shapes); - set_output_type(0, output_type, shapes[0]); + + size_t out_ports = 0; + set_output_type(out_ports++, output_type, shapes[0]); + if (m_indirect) { - set_output_type(1, get_input_element_type(2), shapes[1]); + set_output_type(out_ports++, get_input_element_type(2), shapes[1]); } } @@ -85,13 +100,13 @@ std::shared_ptr KVCache::clone_with_new_inputs(const ov::OutputVector& new } } -std::vector shape_infer(const KVCache* op, std::vector input_shapes) { +std::vector shape_infer(const KVCache* op, const std::vector& input_shapes) { std::vector out_shapes; out_shapes.resize(op->get_output_size()); const auto& gather_axis = op->get_gather_axis(); const auto& concat_axis = ov::util::normalize(op->get_concat_axis(), input_shapes[0].size()); - if (op->get_output_size() == 2) { + if (op->get_output_size() >= 2) { out_shapes[0] = input_shapes[0]; out_shapes[0][gather_axis] = input_shapes[2][0]; out_shapes[0][concat_axis] += input_shapes[1][concat_axis]; @@ -108,6 +123,87 @@ std::vector shape_infer(const KVCache* op, std::vector& past_variable, + int64_t concat_axis, + int64_t gather_axis, + const QuantizationAttrs& quantization_attrs, + const ov::element::Type output_type) + : KVCache(inputs, past_variable, true, concat_axis, gather_axis, output_type) + , m_compressed(true) + , m_quantization_attrs(quantization_attrs) { + OPENVINO_ASSERT(quantization_attrs.quantization_dt == ov::element::i8, + "[GPU] Only I8 data type is currently supported for KV-cache compression"); + + m_variable = past_variable; + size_t output_size = 3; + if (quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + output_size++; // add zp output + + set_output_size(output_size); + validate_and_infer_types(); +} + +void KVCacheCompressed::validate_and_infer_types() { + std::vector input_shapes = {m_variable->get_info().data_shape, get_input_partial_shape(1)}; + input_shapes.push_back(get_input_partial_shape(2)); + input_shapes.push_back(get_input_partial_shape(3)); + + if (m_quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + m_quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + input_shapes.push_back(get_input_partial_shape(4)); + + auto shapes = shape_infer(this, input_shapes); + + size_t out_ports = 0; + set_output_type(out_ports++, m_quantization_attrs.quantization_dt, shapes[0]); + set_output_type(out_ports++, get_input_element_type(2), shapes[1]); + set_output_type(out_ports++, m_quantization_attrs.scale_dt, shapes[2]); + + if (m_quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + m_quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) { + set_output_type(out_ports++, m_quantization_attrs.zp_dt, shapes[3]); + } +} + +std::shared_ptr KVCacheCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args, + m_variable, + m_concat_axis, + m_gather_axis, + m_quantization_attrs, + m_output_type); +} + +std::vector shape_infer(const KVCacheCompressed* op, + const std::vector& input_shapes) { + std::vector out_shapes = shape_infer(static_cast(op), input_shapes); + + if (op->get_output_size() >= 3) { + ov::op::internal::DynamicQuantize dq_op; + dq_op.set_attrs(op->get_quantization_attrs()); + + auto quantized_data_shapes = + ov::op::internal::DynamicQuantize::shape_infer(&dq_op, { input_shapes[1] }); + + const auto scales_concat_axis = 2; + ov::PartialShape compression_scale_shape = input_shapes[3]; + compression_scale_shape[scales_concat_axis] += quantized_data_shapes[1][scales_concat_axis]; + out_shapes[2] = compression_scale_shape; + + // add zp output + if (quantized_data_shapes.size() == 3) { + ov::PartialShape compression_zp_shape = input_shapes[4]; + compression_zp_shape[scales_concat_axis] += quantized_data_shapes[2][scales_concat_axis]; + out_shapes[3] = compression_zp_shape; + } + } + + return out_shapes; +} + } // namespace op } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/read_value.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/read_value.cpp index 5438a6e2e695b5..6cd7f778c71b3b 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/read_value.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/read_value.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/read_value.hpp" +#include "intel_gpu/op/read_values.hpp" #include "openvino/core/partial_shape.hpp" namespace ov { @@ -28,16 +29,14 @@ bool ReadValue::visit_attributes(ov::AttributeVisitor& visitor) { return true; } -void ReadValue::validate_and_infer_types() { - OPENVINO_ASSERT(m_variable, "Variable is not initialized."); - const auto& variable_info = m_variable->get_info(); +void ReadValue::validate_and_infer_types(size_t output_idx, const ov::op::util::VariableInfo& variable_info) { const auto& variable_type = variable_info.data_type; const auto& variable_shape = variable_info.data_shape; // If no inputs provided, it means this ReadValue doesn't have initial subgraph. This is valid. - if (get_input_size() > 0) { - const auto& initial_type = get_input_element_type(0); - const auto& initial_shape = get_input_partial_shape(0); + if (get_input_size() > output_idx) { + const auto& initial_type = get_input_element_type(output_idx); + const auto& initial_shape = get_input_partial_shape(output_idx); // Variable's shape/type determine a permissible range of values for shape/type inferred from initial_subgraph. // If initial_subgraph is set, then we need to check that shape/type inferred from initial_subgraph @@ -64,19 +63,25 @@ void ReadValue::validate_and_infer_types() { // dynamic rank/type can be derived from the IRs generated via the prev versions of OV, // but dynamic rank/type are not supported in plugins, // so we are trying to fix them here using the rank/type of ReadValue 1st input, if it exists - if (get_input_size() > 0 && variable_info.data_shape.rank().is_dynamic() && - variable_info.data_type.is_dynamic()) { - set_output_type(0, initial_type, initial_shape); + if (variable_info.data_shape.rank().is_dynamic() && variable_info.data_type.is_dynamic()) { + set_output_type(output_idx, initial_type, initial_shape); return; } } - set_output_type(0, variable_type, variable_shape); + set_output_type(output_idx, variable_type, variable_shape); +} + +void ReadValue::validate_and_infer_types() { + OPENVINO_ASSERT(m_variable, "Variable is not initialized."); + const auto& variable_info = m_variable->get_info(); + + validate_and_infer_types(0, variable_info); } std::shared_ptr ReadValue::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - switch (new_args.size()) { + switch (new_args.size()) { case 0: return std::make_shared(m_variable); case 1: @@ -89,6 +94,62 @@ std::shared_ptr ReadValue::clone_with_new_inputs(const ov::OutputVector& n } } +ReadValues::ReadValues(const std::shared_ptr& variable, + const std::vector& internal_states_infos) + : ReadValue(variable) + , m_internal_states_infos(internal_states_infos) { + OPENVINO_ASSERT(!internal_states_infos.empty()); + set_output_size(internal_states_infos.size()); + validate_and_infer_types(); +} + +ReadValues::ReadValues(const std::vector>& variable_initializers, + const std::shared_ptr& variable, + const std::vector& internal_states_infos) + : ReadValue(variable_initializers, variable) + , m_internal_states_infos(internal_states_infos) { + OPENVINO_ASSERT(!internal_states_infos.empty()); + set_output_size(internal_states_infos.size()); + validate_and_infer_types(); +} + +bool ReadValues::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("variable_id", m_variable); + + auto variable_info = m_variable->get_info(); + visitor.on_attribute("variable_type", variable_info.data_type); + visitor.on_attribute("variable_shape", variable_info.data_shape); + m_variable->update(variable_info); + return true; +} + +void ReadValues::validate_and_infer_types() { + OPENVINO_ASSERT(m_variable, "Variable is not initialized."); + + for (size_t i = 0; i < get_output_size(); i++) { + ReadValue::validate_and_infer_types(i, m_internal_states_infos[i]); + } +} + +std::shared_ptr ReadValues::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + OPENVINO_ASSERT(new_args.empty() || new_args.size() == m_internal_states_infos.size(), + "Unable to clone ReadValues op (name=", this->get_friendly_name(), "). ", + "Incorrect number of inputs. Expected: 0 or ", m_internal_states_infos.size(), ". ", + "Actual: ", new_args.size(), "."); + + if (new_args.size() > 0) { + return std::make_shared(new_args, m_variable, m_internal_states_infos); + } else { + return std::make_shared(m_variable, m_internal_states_infos); + } +} + +std::vector ReadValues::get_all_internal_states_info() const { + return m_internal_states_infos; +} + } // namespace op } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/sdpa.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/sdpa.cpp index 57d2899e2b2e77..09513d99153a1f 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/sdpa.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/sdpa.cpp @@ -26,7 +26,29 @@ SDPA::SDPA(const OutputVector& inputs, , m_order_k(order_k) , m_order_v(order_v) , m_order_out(order_out) - , m_output_type(output_type) { + , m_output_type(output_type) + , m_compressed(false) { + set_arguments(inputs); + set_causal(is_causal); + validate_and_infer_types(); +} + +SDPA::SDPA(const OutputVector& inputs, + const bool is_causal, + const std::vector& order_q, + const std::vector& order_k, + const std::vector& order_v, + const std::vector& order_out, + const QuantizationAttribute& quantization_attrs, + const ov::element::Type output_type) + : m_is_causal(is_causal) + , m_order_q(order_q) + , m_order_k(order_k) + , m_order_v(order_v) + , m_order_out(order_out) + , m_output_type(output_type) + , m_compressed(true) + , m_quantization_attrs(quantization_attrs) { set_arguments(inputs); set_causal(is_causal); validate_and_infer_types(); @@ -46,11 +68,13 @@ std::shared_ptr SDPA::clone_with_new_inputs(const ov::OutputVector& ne void SDPA::validate_and_infer_types() { const auto input_size = get_input_size(); + + const auto compression_inputs = get_compression_inputs_num(); NODE_VALIDATION_CHECK(this, - input_size == 3 || input_size == 4 || input_size == 5, + input_size >= 3 + compression_inputs && input_size <= 5 + compression_inputs, "Number of inputs is incorrect. Current value is: ", input_size, - ", expected 3, 4 or 5."); + ", expected 3, 4 or 5 data inputs and ", compression_inputs, " KV-cache compression related inputs"); std::vector input_shapes; for (size_t i = 0; i < input_size; i++) { @@ -77,6 +101,19 @@ bool SDPA::visit_attributes(ov::AttributeVisitor &visitor) { return true; } +size_t SDPA::get_compression_inputs_num() const { + size_t compression_inputs = 0; + if (m_compressed) { + compression_inputs += 2; // 2 * scales + + if (m_quantization_attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && + m_quantization_attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) + compression_inputs += 2; // 2 * zp + } + + return compression_inputs; +} + std::vector shape_infer(const SDPA* op, std::vector input_shapes, const std::vector& order_q, diff --git a/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp b/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp new file mode 100644 index 00000000000000..b87600ed36e347 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "optimize_subsequent_reshapes.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() { + using namespace ov::pass::pattern; + using ov::pass::pattern::op::Or; + + auto dynamic_batch_only = [](Output output) { + const auto& shape = output.get_partial_shape(); + + if (shape.rank().is_dynamic()) + return false; + + if (shape.size() <= 1) + return false; + + if (shape[0].is_static()) + return false; + + for (size_t i = 1; i < shape.size(); i++) + if (shape[i].is_dynamic()) + return false; + + return true; + }; + + auto first_reshape_data = any_input(dynamic_batch_only); + auto first_reshape_pattern = ov::pass::pattern::wrap_type(); + auto first_reshape = wrap_type({ first_reshape_data, first_reshape_pattern }, + ov::pass::pattern::all_of({ dynamic_batch_only, ov::pass::pattern::consumers_count(1) })); + + auto second_reshape_pattern = ov::pass::pattern::wrap_type(); + auto second_reshape = wrap_type({ first_reshape, second_reshape_pattern }, dynamic_batch_only); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto input_node = pattern_map.at(first_reshape_data).get_node_shared_ptr(); + auto first_reshape_node = pattern_map.at(first_reshape).get_node_shared_ptr(); + auto second_reshape_node = pattern_map.at(second_reshape).get_node_shared_ptr(); + + auto input_ps = first_reshape_node->input(0).get_partial_shape(); + auto first_reshape_ps = first_reshape_node->get_output_partial_shape(0); + auto second_reshape_ps = second_reshape_node->get_output_partial_shape(0); + + auto static_dims_product = [](ov::PartialShape& ps) { + int64_t total_dims = 1; + + for (auto& dim : ps) { + if (dim.is_static()) + total_dims *= dim.get_length(); + } + + return total_dims; + }; + + if (static_dims_product(input_ps) != static_dims_product(first_reshape_ps) || + static_dims_product(first_reshape_ps) != static_dims_product(second_reshape_ps)) + return false; + + std::vector new_pattern; + for (auto& dim : second_reshape_ps) { + if (dim.is_dynamic()) { + new_pattern.push_back(0); + } else { + new_pattern.push_back(dim.get_length()); + } + } + + auto new_pattern_const = std::make_shared(ov::element::i32, ov::Shape{new_pattern.size()}, new_pattern); + auto new_reshape = std::make_shared(first_reshape_node->input(0).get_source_output(), new_pattern_const, true); + new_reshape->set_friendly_name(second_reshape_node->get_friendly_name()); + + ov::replace_node(second_reshape_node, new_reshape); + copy_runtime_info(first_reshape_node, new_reshape); + + return true; + }; + + auto m = std::make_shared(second_reshape, "OptimizeSubsequentReshapes"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.hpp b/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.hpp new file mode 100644 index 00000000000000..3a38bb92ad5167 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +/** + * @brief This pass looks for `Reshape [ dynamic dim, n static dims] -> Reshape [dynamic dim, n static dims]` patterns + * and replaces them with a single `Reshape [dynamic dim, n static dims]` operation. + */ +class OptimizeSubsequentReshapes : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("OptimizeSubsequentReshapes", "0"); + OptimizeSubsequentReshapes(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 4b72385663bf9d..158dee2ee7ac05 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -15,11 +15,8 @@ #include "intel_gpu/plugin/transformations_pipeline.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" -#include "low_precision/add.hpp" #include "low_precision/convolution.hpp" #include "low_precision/convolution_backprop_data.hpp" -#include "low_precision/fold_convert.hpp" -#include "low_precision/fuse_convert.hpp" #include "low_precision/group_convolution.hpp" #include "low_precision/low_precision.hpp" #include "low_precision/mat_mul.hpp" @@ -28,9 +25,7 @@ #include "low_precision/pull_reshape_through_dequantization.hpp" #include "low_precision/pull_transpose_through_dequantization.hpp" #include "low_precision/recurrent_cell.hpp" -#include "low_precision/rt_info/bias_attribute.hpp" #include "low_precision/strided_slice.hpp" -#include "low_precision/transpose.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/core/validation_util.hpp" @@ -51,7 +46,6 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/rnn_cell.hpp" #include "openvino/op/rnn_sequence.hpp" -#include "openvino/op/scaled_dot_product_attention.hpp" #include "openvino/op/squeeze.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/sub_graph_base.hpp" @@ -71,15 +65,18 @@ #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/bcast_and_pad_zp_buffers.hpp" #include "plugin/transformations/print_model_statistics.hpp" +#include "plugin/transformations/fc_per_layer_scaling.hpp" #include "plugin/transformations/swiglu_fusion.hpp" #include "plugin/transformations/transpose_fusion.hpp" #include "plugin/transformations/indirect_kv_cache.hpp" +#include "plugin/transformations/kv_cache_compression.hpp" #include "plugin/transformations/convert_convolution.hpp" #include "plugin/transformations/unsqueeze_broadcast_reshape_matmul_fusion.hpp" #include "plugin/transformations/unsqueeze_broadcast_reshape_sdpa_fusion.hpp" #include "plugin/transformations/increase_position_ids_precision.hpp" #include "plugin/transformations/group_norm_composition.hpp" #include "plugin/transformations/dynamic_quantize_fully_connected.hpp" +#include "plugin/transformations/optimize_subsequent_reshapes.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/common_optimizations/rms_fusion.hpp" #include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp" @@ -183,14 +180,29 @@ static bool is_decompression_multiply(const std::shared_ptr node if (all_has_types(consumers, { ov::op::v0::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() })) return true; + auto are_multiply_from_decompression = [&all_has_types](const ov::Input consumer) { + if (!cldnn::one_of(consumer.get_node()->get_type_info(), { ov::op::v1::Multiply::get_type_info_static() })) + return false; + const auto child_consumers = consumer.get_node()->get_output_target_inputs(0); + if (all_has_types(child_consumers, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() })) + return true; + return false; + }; - auto are_converts_from_decompression = [&all_has_types](const std::set>& consumers) { + auto are_converts_from_decompression = [&all_has_types, &are_multiply_from_decompression](const std::set>& consumers) { if (!all_has_types(consumers, { ov::opset1::Convert::get_type_info_static() })) return false; for (const auto& consumer : consumers) { const auto child_consumers = consumer.get_node()->get_output_target_inputs(0); - if (!all_has_types(child_consumers, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() })) + for (const auto& child_consumer : child_consumers) { + const auto& type_info = child_consumer.get_node()->get_type_info(); + if (cldnn::one_of(type_info, { ov::opset1::MatMul::get_type_info_static(), ov::op::v8::Gather::get_type_info_static() })) + continue; + if (are_multiply_from_decompression(child_consumer)) { + continue; + } return false; + } } return true; }; @@ -318,9 +330,13 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) manager.register_pass(supported_woq_types, !device_info.supports_immad); - pass_config->set_callback([&](const std::shared_ptr node) { - return !is_decompression_multiply(node); - }); + + // Need to check if transformations work correctly for mixed models with both compression and quantization at the same time. + if (!is_model_quantized) { + pass_config->set_callback([&](const std::shared_ptr node) { + return !is_decompression_multiply(node); + }); + } const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false; @@ -689,6 +705,12 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto lptPassConfig = lptManager.get_pass_config(); // quantized LSTMSequence / GPUSequence are not supported yet. Avoid extra transformation lptPassConfig->disable(); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); lptPassConfig->set_callback([func, defaultPrecisions](const_node_ptr& node) -> bool { auto fillStaticChannel = [func](const ov::PartialShape& shape, size_t& channel) -> bool { const auto rank = shape.rank(); @@ -725,43 +747,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { || WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions); }); - lptPassConfig->set_callback([&](const_node_ptr& node) -> bool { - for (auto& user : node->get_users()) { - if (ov::is_type(user)) - return true; - } - - return false; - }); - - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - return ov::is_type(node) && !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(node); - }); - - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - return ov::marked_as_bias(node); - }); - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - const auto& consumers = node->get_output_target_inputs(0); - if (consumers.size() == 1) { - const auto consumer = consumers.begin()->get_node()->shared_from_this(); - return ov::is_type(consumer) && is_decompression_multiply(consumer); - } - return false; - }); - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - if (ov::is_type(node)) { - return ov::is_type(node) && is_decompression_multiply(node); - } else if (ov::is_type(node)) { - const auto& consumers = node->get_output_target_inputs(0); - if (consumers.size() == 1) { - const auto consumer = consumers.begin()->get_node()->shared_from_this(); - return ov::is_type(consumer) && is_decompression_multiply(consumer); - } - } - return false; - }); - lptPassConfig->set_callback([&](const_node_ptr& node) -> bool { // disable MultiplyToGroupConvolution if Multiply with Constant can be fused @@ -878,6 +863,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(device_info.supports_immad); + manager.register_pass(config.get_property(ov::hint::activations_scale_factor)); if (!device_info.supports_immad) { manager.register_pass(); @@ -886,6 +872,10 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); + + auto kv_cache_compression_dt = config.get_property(ov::hint::kv_cache_precision); + manager.register_pass(kv_cache_compression_dt); + manager.register_pass(); // This pass should be done after asymmetric quantization matching as it can move zp subtraction upper in the graph @@ -901,6 +891,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); + manager.register_pass(); + manager.register_pass(); // This Validate is needed for proper data type propagation after applying IncreasePositionIdsPrecision pass manager.register_pass(); diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index 6b1c8d0cfc993f..2b7a26ba35a292 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -123,6 +123,12 @@ ov::element::Type VariableState::get_user_specified_type() const { } ov::SoPtr VariableState::get_state() const { + if (m_memory == nullptr) { + const auto& pshape = m_layout.get_partial_shape(); + const auto& shape = get_tensor_shape(pshape); + return m_context->create_host_tensor(get_user_specified_type(), shape); + } + auto tensor = m_context->create_host_tensor(get_user_specified_type(), m_memory->get_layout().get_shape()); convert_and_copy(m_memory, tensor._ptr.get(), m_context->get_engine().get_service_stream()); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 5f943564d6f50e..5c3b3ee0c970f9 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -183,6 +183,7 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_DisableRuntimeSkipReorder", "Disable runtime skip reorder."); message_list.emplace_back("OV_GPU_DisablePrimitiveFusing", "Disable primitive fusing"); message_list.emplace_back("OV_GPU_DisableFakeAlignment", "Disable fake alignment"); + message_list.emplace_back("OV_GPU_KVCacheCompression", "Enable/Disable KV-cache compression"); message_list.emplace_back("OV_GPU_DynamicQuantizeLayersWithoutOnednn", "Enable Dynamic quantization for specified Fully connected layers only, " "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*"); message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " @@ -253,6 +254,7 @@ debug_configuration::debug_configuration() , disable_runtime_skip_reorder(0) , disable_primitive_fusing(0) , disable_fake_alignment(0) + , use_kv_cache_compression(-1) , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) , disable_horizontal_fc_fusion(0) { #ifdef GPU_DEBUG_CONFIG @@ -305,6 +307,7 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("DisableRuntimeSkipReorder", disable_runtime_skip_reorder); get_gpu_debug_env_var("DisablePrimitiveFusing", disable_primitive_fusing); get_gpu_debug_env_var("DisableFakeAlignment", disable_fake_alignment); + get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); std::string dump_iteration_str; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7661444cc4fd7b..7c8e55cddfe593 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -58,8 +58,10 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), std::make_tuple(ov::hint::dynamic_quantization_group_size, 32), + std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined), std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), std::make_tuple(ov::weights_path, ""), + std::make_tuple(ov::hint::activations_scale_factor, 0.f), // Legacy API properties std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), @@ -209,6 +211,14 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { else set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size)); } + + GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) { + GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } else { + set_property(ov::hint::kv_cache_precision(ov::element::undefined)); + } + } } void ExecutionConfig::apply_hints(const cldnn::device_info& info) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp index 88801b8b2b4e61..7ab48308cfeaf7 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp @@ -330,6 +330,14 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex ngen::Product product = {ngen::ProductFamily::Unknown, 0}; jit_generator::detectHWInfo(context.get(), device.get(), hw, product); info.arch = convert_ngen_arch(hw); + // We change the value of this flag to avoid OneDNN usage for the platforms unknown to OneDNN + // This is required to guarantee some level of forward compatibility for the new HW generations + // as OneDNN code generators are not generic and typically requires some updates for the new architectures + // Ideally, we shouldn't do that as OCL impls sometimes also check this flag, but in order to avoid that + // we need to ensure that graph transformations are not relying on this flag as indicator that onednn will be used + if (product.family == ngen::ProductFamily::Unknown) { + info.supports_immad = false; + } #else // ENABLE_ONEDNN_FOR_GPU info.arch = gpu_arch::unknown; #endif // ENABLE_ONEDNN_FOR_GPU diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp index 68098937bb9c0a..16db9d89c28b4d 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp @@ -26,6 +26,7 @@ struct Params { bool with_mask; bool with_scale; bool causal; + bool compressed; size_t batch; ov::element::Type model_element_type; size_t num_iter; @@ -46,6 +47,9 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI ov::AnyMap properties = {ov::hint::inference_precision(ov::element::f16), ov::intel_gpu::hint::enable_sdpa_optimization(true)}; + if (p.compressed) + properties.emplace(ov::hint::kv_cache_precision(ov::element::i8)); + const size_t n_heads = 16; const size_t n_features = 64; const size_t context_size = 7; @@ -58,6 +62,7 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI const bool causal = p.causal; const bool with_mask = p.with_mask; const bool with_scale = p.with_scale; + const bool compressed = p.compressed; auto model = tests::make_llm_kv_cache_sdpa_pattern(ov::Dimension::dynamic(), n_heads, @@ -284,14 +289,17 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI compare_tensors({ref_results[0]}, {sdpa_out}); } - auto variables = infer_request.query_state(); - std::vector states; - for (auto& variable : variables) { - auto state = variable.get_state(); - ASSERT_EQ(state.get_element_type(), element_type); - states.push_back(state); + if (!compressed) { + auto variables = infer_request.query_state(); + std::vector states; + for (auto& variable : variables) { + auto state = variable.get_state(); + ASSERT_EQ(state.get_element_type(), element_type); + states.push_back(state); + } + compare_tensors({ref_k_cache, ref_v_cache}, states); } - compare_tensors({ref_k_cache, ref_v_cache}, states); + infer_request.reset_state(); } } @@ -310,6 +318,7 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI result << "mask=" << p.with_mask << "_"; result << "scale=" << p.with_scale << "_"; result << "causal=" << p.causal << "_"; + result << "compressed=" << p.compressed << ""; return result.str(); } }; @@ -324,11 +333,17 @@ std::vector get_test_params() { const bool with_mask = true; const bool with_scale = true; const bool causal = true; + const bool compressed = true; + + p.push_back({with_rearrange, !with_mask, !with_scale, !causal, !compressed, 1, ov::element::Type_t::f16, 10, 1, 1, {0, 1, 2, 3}}); + p.push_back({with_rearrange, with_mask, !with_scale, !causal, !compressed, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 1, 2, 3}}); + p.push_back({with_rearrange, with_mask, !with_scale, !causal, !compressed, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 2, 1, 3}}); + p.push_back({!with_rearrange, with_mask, !with_scale, !causal, !compressed, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 2, 1, 3}}); - p.push_back({with_rearrange, !with_mask, !with_scale, !causal, 1, ov::element::Type_t::f16, 10, 1, 1, {0, 1, 2, 3}}); - p.push_back({with_rearrange, with_mask, !with_scale, !causal, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 1, 2, 3}}); - p.push_back({with_rearrange, with_mask, !with_scale, !causal, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 2, 1, 3}}); - p.push_back({!with_rearrange, with_mask, !with_scale, !causal, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 2, 1, 3}}); + // Compressed + p.push_back({with_rearrange, with_mask, !with_scale, !causal, compressed, 1, ov::element::Type_t::f16, 10, 1, 1, {0, 1, 2, 3}}); + p.push_back({with_rearrange, with_mask, !with_scale, !causal, compressed, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 2, 1, 3}}); + p.push_back({with_rearrange, with_mask, !with_scale, !causal, compressed, 1, ov::element::Type_t::f16, 10, 4, 1, {0, 1, 2, 3}}); return p; } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index fee6a02b6671d8..27c57aa072878d 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -30,7 +30,7 @@ using ov::test::InputShape; * \ / * Multiply * | - * Data(F32) Transpose(optional) + * Data(F32) Transpose(optional) or Multiply(optional) * \ / * Matmul * | @@ -56,6 +56,7 @@ using MatmulWeightsDecompressionParams = std::tuple; @@ -70,6 +71,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface +#include +#include +#include + +#include "permute_inst.h" +#include "program_wrapper.h" + +#include +#include + +using namespace cldnn; +using namespace ::tests; + +namespace skip_permute_tests { + +struct skip_permute_params { + layout input_layout_static; + std::vector permute_order; + bool expected_result1; + bool expected_result2; +}; + +class skip_permute_at_runtime_test : public testing::TestWithParam {}; + +TEST_P(skip_permute_at_runtime_test, runtime_skip) { + auto p = GetParam(); + auto& engine = get_test_engine(); + auto rank = p.input_layout_static.get_partial_shape().size(); + auto input_layout_dynamic = layout {ov::PartialShape::dynamic(rank), data_types::f16, format::get_default_format(rank)}; + topology topology(input_layout("input", input_layout_dynamic), + permute("permute", input_info("input"), p.permute_order), + reorder("reorder", input_info("permute"), format::get_default_format(rank), data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + auto permute_inst = network.get_primitive("permute"); + ASSERT_EQ(permute_inst->can_be_optimized(), p.expected_result1); + + auto input_mem = engine.allocate_memory(p.input_layout_static); + network.set_input_data("input", input_mem); + auto outputs = network.execute(); + outputs.begin()->second.get_memory(); + + ASSERT_EQ(permute_inst->can_be_optimized(), p.expected_result2); +} + +INSTANTIATE_TEST_SUITE_P(smoke, skip_permute_at_runtime_test, + testing::ValuesIn(std::vector { + { layout{ov::PartialShape{8,8}, data_types::f16, format::bfyx}, {1, 0}, true, false }, + { layout{ov::PartialShape{8,1}, data_types::f16, format::bfyx}, {1, 0}, true, true }, + { layout{ov::PartialShape{8, 2, 8}, data_types::f16, format::bfyx}, {2, 1, 0}, true, false }, + { layout{ov::PartialShape{8, 2, 1}, data_types::f16, format::bfyx}, {2, 1, 0}, true, false }, + { layout{ov::PartialShape{1, 12, 1}, data_types::f16, format::bfyx}, {2, 1, 0}, true, true }, + { layout{ov::PartialShape{2, 3, 1, 14}, data_types::f16, format::bfyx}, {1, 0, 2, 3}, true, false }, + { layout{ov::PartialShape{1, 3, 1, 14}, data_types::f16, format::bfyx}, {1, 0, 2, 3}, true, true }, + { layout{ov::PartialShape{20, 1, 30, 1}, data_types::f16, format::bfyx}, {1, 0, 3, 2}, true, true}, + { layout{ov::PartialShape{12, 3, 1, 14}, data_types::f16, format::bfyx}, {0, 2, 1, 3}, true, true }, + { layout{ov::PartialShape{12, 3, 2, 14}, data_types::f16, format::bfyx}, {0, 2, 1, 3}, true, false }, + { layout{ov::PartialShape{12, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true }, + { layout{ov::PartialShape{12, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true }, + { layout{ov::PartialShape{1, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true }, + { layout{ov::PartialShape{1, 3, 2, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, false }, + { layout{ov::PartialShape{1, 1, 4, 14}, data_types::f16, format::bfyx}, {2, 0, 1, 3}, true, true }, + { layout{ov::PartialShape{1, 4, 4, 1}, data_types::f16, format::bfyx}, {2, 0, 1, 3}, true, false }, + { layout{ov::PartialShape{1, 10, 1, 1, 11}, data_types::f16, format::bfzyx}, {3, 2, 1, 0, 4}, true, true }, + { layout{ov::PartialShape{1, 10, 2, 1, 10}, data_types::f16, format::bfzyx}, {3, 2, 1, 0, 4}, true, false }, + { layout{ov::PartialShape{1, 4, 1, 3, 4, 1}, data_types::f16, format::bfwzyx}, {0, 2, 1, 3, 5, 4}, true, true }, + { layout{ov::PartialShape{1, 4, 2, 3, 4, 1}, data_types::f16, format::bfwzyx}, {0, 2, 1, 3, 5, 4}, true, false }, + { layout{ov::PartialShape{1, 1, 1, 1, 4, 1}, data_types::f16, format::bfwzyx}, {4, 5, 2, 3, 0, 1}, true, true }, + { layout{ov::PartialShape{1, 1, 1, 1, 4, 2}, data_types::f16, format::bfwzyx}, {4, 5, 2, 3, 0, 1}, true, true }, + { layout{ov::PartialShape{1, 1, 3, 1, 4, 2}, data_types::f16, format::bfwzyx}, {4, 5, 2, 3, 0, 1}, true, false }, + })); +} // skip permute tests diff --git a/src/plugins/intel_gpu/tests/unit/dynamic_execution/stateful_model.cpp b/src/plugins/intel_gpu/tests/unit/dynamic_execution/stateful_model.cpp index 4b24fb996b3f3f..105963d1b09d73 100644 --- a/src/plugins/intel_gpu/tests/unit/dynamic_execution/stateful_model.cpp +++ b/src/plugins/intel_gpu/tests/unit/dynamic_execution/stateful_model.cpp @@ -197,7 +197,7 @@ TEST(stateful_model, check_dynamic_pad_for_kv_cache) { auto input_kv_lay = layout{info.data_shape, info.data_type, format::bfyx}; topology topology(input_layout("beam_idx", input_beam_idx_lay), input_layout("present", input_present_lay), - read_value("kv_cache", std::vector{}, info.variable_id, input_kv_lay), + read_value("kv_cache", std::vector{}, info.variable_id, {input_kv_lay}), gather("gather", input_info("kv_cache"), input_info("beam_idx"), @@ -224,7 +224,7 @@ TEST(stateful_model, check_dynamic_pad_for_kv_cache) { auto pad = tensor(0); pad.batch[0] = 1; - + { std::vector dynamic_pad_mask; const auto& dynamic_pad_dims = read_value_inst->get_output_layout(0).data_padding._dynamic_dims_mask; diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index baed5400181130..659ccaf9d8a723 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -455,8 +455,8 @@ TEST_P(gemm_2in_dynamic_add, add) { } INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_dynamic_add, ::testing::ValuesIn(std::vector{ - gemm_test_params{ CASE_GEMM_2IN_FP16_3D_1, 4, 5, "", broadcast_kinds::batch, eltwise_mode::sum }, - gemm_test_params{ CASE_GEMM_2IN_FP16_3D_1, 4, 5, "", broadcast_kinds::feature, eltwise_mode::sum }, + gemm_test_params{ CASE_GEMM_2IN_FP16_3D_1, 4, 4, "gemm_tiled_opt", broadcast_kinds::batch, eltwise_mode::sum }, + gemm_test_params{ CASE_GEMM_2IN_FP16_3D_1, 4, 4, "gemm_tiled_opt", broadcast_kinds::feature, eltwise_mode::sum }, })); class gemm_2in_act_scale_quantize_i8 : public GemmFusingTest {}; diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/read_value_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/read_value_si_test.cpp index 194bc0244f86f0..2000d826ddfad6 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/read_value_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/read_value_si_test.cpp @@ -32,7 +32,7 @@ TEST_P(read_value_test, shape_infer) { auto& engine = get_test_engine(); - const auto variable_layout = p.input_layout; + const std::vector variable_layout = {p.input_layout}; auto input_layout_prim = std::make_shared("input", p.input_layout); auto inputs = std::vector{ input_info("input") }; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 4155ac0b420e66..4f9c31064e9026 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -10134,6 +10134,113 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_weights_zp) { } } +TEST(convolution_gpu_onednn, support_activation_zero_points_for_i32) { + auto& engine = get_test_engine(); + if (!engine.get_device_info().supports_immad) + return; + + auto in_layout = layout { ov::PartialShape::dynamic(4), data_types::u8, format::bfyx }; + auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 3, 2, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 3, 1, 1 } }); + auto w_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } }); + + set_values(input, { 1, 2, 3, 4, 5, + 2, 2, 3, 4, 6, + 3, 3, 3, 5, 1, + 1, 1, 1, 1, 1, + + 1, 2, 3, 4, 5, + 2, 2, 3, 4, 6, + 3, 3, 3, 5, 1, + 1, 1, 1, 1, 1 }); + + set_values(weights, { 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5, + + 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5, + + 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5 }); + set_values(a_zp, { 2, 5, 5 }); + set_values(w_zp, { 2 }); + set_values(biases, { 1.0f, -8.0f, -8.0f }); + + VVVF output_vec = { + { + { 2.0f, -5.0f, -20.0f }, + { 12.0f, 26.0f, -10.0f } + }, + { + { -7.0f, -14.0f, -29.0f }, + { 3.0f, 17.0f, -19.0f } + }, + { + { -7.0f, -14.0f, -29.0f }, + { 3.0f, 17.0f, -19.0f } + } }; + + topology topology( + input_layout("input", in_layout), + data("weights", weights), + data("biases", biases), + data("a_zp", a_zp), + data("w_zp", w_zp), + convolution("conv", input_info("input"), "weights", "biases", "w_zp", "a_zp", "", 1, + { 2, 2 }, { 1, 1 }, { 0, 0 }, { 1, 2 }, false, data_types::f32), + reorder("out", input_info("conv"), format::bfyx, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + network.set_input_data("input", input); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.begin()->first, "out"); + + auto output_memory = outputs.at("out").get_memory(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + auto output_layout = output_memory->get_layout(); + int y_size = output_layout.spatial(1); + int x_size = output_layout.spatial(0); + int f_size = output_layout.feature(); + int b_size = output_layout.batch(); + ASSERT_EQ(output_layout.format, format::bfyx); + ASSERT_EQ(y_size, 2); + ASSERT_EQ(x_size, 3); + ASSERT_EQ(f_size, 3); + ASSERT_EQ(b_size, 1); + for (int f = 0; f < f_size; f++) + for (int y = 0; y < y_size; ++y) { + for (int x = 0; x < x_size; ++x) { + ASSERT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) << + " x="<< x << " y=" << y << " f=" << f; + } + } +} + TEST(convolution_gpu_onednn, has_proper_synchronization) { auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/dynamic_quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/dynamic_quantize_gpu_test.cpp index c1686e359e91a0..5a78360eb1f6d8 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/dynamic_quantize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/dynamic_quantize_gpu_test.cpp @@ -22,35 +22,53 @@ using namespace cldnn; using namespace ::tests; +using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType; class dynamic_quantization_gpu_tests: public ::testing::Test { public: - void test_dynamic_quantization(bool is_caching_test, bool is_dynamic, int batch = 1, int ifm = 1024) { + void test_dynamic_quantization(bool is_caching_test, + const ov::PartialShape& input_shape, + const ov::Shape& data_shape, + const QuantizationType quantization_type = QuantizationType::Symmetric, + const std::string& impl_name = "") { tests::random_generator rg(GET_SUITE_NAME); auto& engine = get_test_engine(); - long int batch_num = batch; - long int ifm_num = ifm; - - bool is_4d = true; - - auto input_ps = is_4d ? ov::PartialShape{ batch_num, 1, 1, ifm_num } : ov::PartialShape{ batch_num, ifm_num}; - auto dyn_input_ps = is_4d ? ov::PartialShape{ -1, 1, 1, ifm_num } : ov::PartialShape{ -1, ifm_num}; + auto input_ps = data_shape; + auto dyn_input_ps = input_shape; + auto scales_ps = ov::PartialShape::dynamic(dyn_input_ps.size()); auto input_mem = engine.allocate_memory({ input_ps, data_types::f32, format::bfyx }); + auto group_sizes = std::vector(dyn_input_ps.size(), 1); + group_sizes.back() = UINT64_MAX; - auto input_data = rg.generate_random_1d(batch_num * ifm_num, -16.0f, 16.0f); + auto input_data = rg.generate_random_1d(ov::shape_size(data_shape), -16.0f, 16.0f); set_values(input_mem, input_data); - auto in_layout_f32 = is_dynamic ? layout{ dyn_input_ps, data_types::f32, format::bfyx } - : layout{ input_ps, data_types::f32, format::bfyx }; + auto in_layout_f32 = input_shape.is_dynamic() ? layout{ dyn_input_ps, data_types::f32, format::bfyx } + : layout{ input_ps, data_types::f32, format::bfyx }; + + auto in_layout = input_shape.is_dynamic() ? layout{ dyn_input_ps, data_types::f16, format::bfyx } + : layout{ input_ps, data_types::f16, format::bfyx }; - auto in_layout = is_dynamic ? layout{ dyn_input_ps, data_types::f16, format::bfyx } - : layout{ input_ps, data_types::f16, format::bfyx }; + dynamic_quantize::Attributes dq_config; + dq_config.quantization_type = quantization_type; + dq_config.quantization_dt = data_types::i8; + dq_config.scale_dt = data_types::f16; + dq_config.zp_dt = data_types::undefined; + dq_config.group_sizes = group_sizes; + dq_config.scales_zp_output_order = { 0, 1, 2, 3 }; + dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::Planar; + + if (quantization_type == QuantizationType::Asymmetric) { + dq_config.zp_dt = data_types::f16; + dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; + } auto reorder_1 = reorder("reorder_1", input_info("input"), layout{ input_ps, data_types::f16, format::bfyx }); - auto dyn_quan_prim = dynamic_quantize("dyn_quan_prim", input_info("reorder_1"), 32, {data_types::f16, data_types::i8}); - auto reorder_2 = reorder("reorder_2", input_info("dyn_quan_prim"), layout{ input_ps, data_types::f16, format::bfyx }); + auto dyn_quan_prim = dynamic_quantize("dyn_quan_prim", input_info("reorder_1"), dq_config); + auto reorder_data = reorder("reorder_data", input_info("dyn_quan_prim", 0), layout{ input_ps, data_types::f16, format::bfyx }); + auto reorder_scale = reorder("reorder_scale", input_info("dyn_quan_prim", 1), layout{ scales_ps, data_types::f16, format::bfyx }); // Implemented dynamic quantize kernel auto get_ref_results = [&]() { @@ -58,7 +76,8 @@ class dynamic_quantization_gpu_tests: public ::testing::Test { input_layout("input", in_layout_f32), reorder_1, dyn_quan_prim, - reorder_2 + reorder_data, + reorder_scale ); auto config = get_test_default_config(engine); @@ -83,13 +102,18 @@ class dynamic_quantization_gpu_tests: public ::testing::Test { input_layout("input", in_layout_f32), reorder_1, dyn_quan_prim, - reorder_2 + reorder_data ); auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); + if (impl_name != "") { + ov::intel_gpu::ImplementationDesc dyn_quan_impl_desc = { format::bfyx, impl_name, impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"dyn_quan_prim", dyn_quan_impl_desc} })); + } + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_mem); @@ -118,37 +142,69 @@ class dynamic_quantization_gpu_tests: public ::testing::Test { }; TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_large_size) { - this->test_dynamic_quantization(false, false, 2048, 4096); + this->test_dynamic_quantization(false, {11, 1, 1, 4096}, {2048, 1, 1, 4096}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_large_size_dynamic) { - this->test_dynamic_quantization(false, true, 2048, 4096); + this->test_dynamic_quantization(false, {-1, 1, 1, 4096}, {2048, 1, 1, 4096}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_small_size) { - this->test_dynamic_quantization(false, false, 64, 4096); + this->test_dynamic_quantization(false, {1, 1, 1, 4096}, {64, 1, 1, 4096}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_single_batch) { - this->test_dynamic_quantization(false, false, 1, 4096); + this->test_dynamic_quantization(false, {-1, 1, 1, 4096}, {1, 1, 1, 4096}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_ref_only) { - this->test_dynamic_quantization(false, false, 16, 33); + this->test_dynamic_quantization(false, {-1, 1, 1, 33}, {16, 1, 1, 33}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_ref_only_dynamic) { - this->test_dynamic_quantization(false, true, 16, 33); + this->test_dynamic_quantization(false, {1, 1, 1, 33}, {16, 1, 1, 33}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_invalid) { - this->test_dynamic_quantization(false, false, 16, 7); + this->test_dynamic_quantization(false, {-1, 1, 1, 7}, {16, 1, 1, 7}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_unaligned) { - this->test_dynamic_quantization(false, false, 16, 32); + this->test_dynamic_quantization(false, {-1, 1, 1, 32}, {16, 1, 1, 32}); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_unaligned_dynamic) { - this->test_dynamic_quantization(false, true, 16, 32); + this->test_dynamic_quantization(false, {1, 1, 1, 32}, {16, 1, 1, 32}); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache) { + this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched) { + this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_reordered) { + this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_reordered) { + this->test_dynamic_quantization(false, {-1, -1, 4, 64}, {1, 35, 4, 64}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_asym) { + this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_asym) { + this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_reordered_asym) { + this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_reordered_asym) { + this->test_dynamic_quantization(false, {-1, -1, 4, 64}, {1, 35, 4, 64}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index eed9760348f669..6bf44a31add0f4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1589,7 +1589,7 @@ class fully_connected_gpu_tests: public ::testing::Test { count++; OPENVINO_ASSERT(abs_diff < 256); } - GPU_DEBUG_LOG << "---> count: " << count << ", max_diff:" << max_diff << ", avg_diff: " << (avg/count) << std::endl; + std::cout << "---> count: " << count << ", max_diff:" << max_diff << ", avg_diff: " << (avg/count) << std::endl; } void test_compressed_int4_scale(bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128, bool is_wei_dyn = false) { @@ -2903,7 +2903,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); - ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; + ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); config.set_property(ov::hint::dynamic_quantization_group_size(0)); @@ -2965,11 +2965,137 @@ class fully_connected_gpu_tests: public ::testing::Test { max_diff = abs_diff; avg += abs_diff; count++; - OPENVINO_ASSERT(abs_diff < 5); + OPENVINO_ASSERT(abs_diff < 6); } GPU_DEBUG_LOG << "---> count: " << count << ", max_diff:" << max_diff << ", avg_diff: " << (avg/count) << std::endl; OPENVINO_ASSERT((avg/count) < 0.5); } + + void test_compressed_int8_scale_dyn_quan_weight_u8(bool is_dynamic, int batch = 1, int ifm = 512, int ofm = 2048, + int quantize_group_size = 32, int scales_group_size = 128, + bool is_wzp_test = false, bool is_wzp_scalar = false) { + tests::random_generator rg(GET_SUITE_NAME); + auto& engine = get_test_engine(); + + if (engine.get_device_info().dev_type == device_type::discrete_gpu) + GTEST_SKIP(); + + long int batch_num = batch; + long int ifm_num = ifm; + long int ofm_num = ofm; + long int wzp_num = is_wzp_scalar ? 1 : ofm_num; + + auto input_ps = ov::PartialShape{ batch_num, 1, ifm_num }; + auto input_mem = engine.allocate_memory({ input_ps, data_types::f16, format::bfyx }); + + auto weights_mem = engine.allocate_memory({ {ofm_num, ifm_num}, data_types::u8, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {ofm_num, ifm_num / scales_group_size}, data_types::f16, format::fbyx }); + auto dcomp_zp_mem = engine.allocate_memory({ {wzp_num, 1}, data_types::u8, format::bfyx }); + + + auto input_data = rg.generate_random_1d(batch_num * ifm_num, -2.f, 2.f); + set_values(input_mem, input_data); + + auto weigths_data = rg.generate_random_1d(ofm_num * ifm_num, 0, 4); + set_values(weights_mem, weigths_data); + + auto scale_data = rg.generate_random_1d(ofm_num * ifm_num / scales_group_size, -2.f, 2.f); + set_values(scale_mem, scale_data); + + if (is_wzp_test) { + auto zp_data = rg.generate_random_1d(wzp_num, 0, 2); + set_values(dcomp_zp_mem, zp_data); + } + + auto in_layout = is_dynamic ? layout{ ov::PartialShape{ -1, -1, -1 }, data_types::f16, format::bfyx } + : layout{ input_ps, data_types::f16, format::bfyx }; + + auto dcomp_zp_name = is_wzp_test ? "wzp" : ""; + auto fc_prim = fully_connected("fc_prim", input_info("input"), "weights", "", "scale", dcomp_zp_name, data_types::f16, 3, 2); + + if (is_wzp_test) { + fc_prim.compressed_weights = true; + fc_prim.decompression_zero_point = is_wzp_test ? "wzp" : ""; + } + + // Implemented dynamic quantize kernel + auto get_ref_results = [&]() { + topology topo; + topo.add(input_layout("input", in_layout)); + topo.add(data("weights", weights_mem)); + topo.add(data("scale", scale_mem)); + topo.add(data("wzp", dcomp_zp_mem)); + topo.add(fc_prim); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); + + network network(engine, topo, config); + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(outputs.begin()->first == "fc_prim"); + + auto output_layout = outputs.begin()->second.get_layout(); + auto output_mem = outputs.begin()->second.get_memory(); + + return engine.reinterpret_buffer(*output_mem, output_layout); + }; + + topology topology( + input_layout("input", in_layout), + data("weights", weights_mem), + data("scale", scale_mem), + data("wzp", dcomp_zp_mem), + fc_prim + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); + + if (is_dynamic && !engine.get_device_info().supports_immad) { + auto inst = network->get_primitive("fc_prim"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != NULL); + auto kernel_num = (is_dynamic) ? 3 : 2; + kernel_num = (quantize_group_size < 32) ? 2 : kernel_num; + ASSERT_EQ(impl->get_kernels().size(), size_t(kernel_num)); + } + + network->set_input_data("input", input_mem); + + auto outputs = network->execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + auto output_mem = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr (output_mem, get_test_stream()); + + auto ref_output_mem = get_ref_results(); + cldnn::mem_lock output_ptr_ref (ref_output_mem, get_test_stream()); + + size_t count = 0; + float max_diff = 0.f; + float avg = 0.f; + for (size_t i = 0; i < output_ptr_ref.size(); ++i) { + auto abs_diff = std::abs((float)output_ptr_ref[i] - (float)output_ptr[i]); + if (max_diff < abs_diff) + max_diff = abs_diff; + avg += abs_diff; + count++; + OPENVINO_ASSERT(abs_diff < 8); + } + GPU_DEBUG_LOG << "---> count: " << count << ", max_diff:" << max_diff << ", avg_diff: " << (avg/count) << std::endl; + OPENVINO_ASSERT((avg/count) < 0.8); + } }; using shared_dims = std::tuple; @@ -4064,6 +4190,34 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta this->test_compressed_int4_scale_dyn_quan_weight_i4(false, 320, 1024, 1024, 32, 32, true); } +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 4096, 4096, 128, 128, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_ifm_1024) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 1024, 1024, 32, 32, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_ifm_2048) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 2048, 2048, 32, 32, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_ifm_4096) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 4096, 4096, 32, 32, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_large_unaligned) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 310, 1024, 1024, 32, 32, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_small) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 16, 1024, 1024, 128, 128, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_single) { + this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1, 1024, 1024, 128, 128, true); +} + TEST_F(fully_connected_gpu_tests, compressed_scale_bias) { this->test_compressed_scale_bias(false); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 3b41f44050e527..df493544624b64 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -473,6 +473,9 @@ class gemm_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_ref", gemm_impl} })); + network network(engine, topology, config); network.set_input_data("input1", input1_mem); network.set_input_data("input2", input2_mem); @@ -498,6 +501,10 @@ class gemm_gpu_tests: public ::testing::Test { ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm", gemm_impl} })); + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input1", input1_mem); network->set_input_data("input2", input2_mem); @@ -1246,10 +1253,12 @@ class gemm_gpu_tests: public ::testing::Test { network->set_input_data("input0", input0_mem); network->set_input_data("input1", input1_mem); - auto inst = network->get_primitive("gemm"); - auto impl = inst->get_impl(); - ASSERT_TRUE(impl != nullptr); - ASSERT_TRUE(impl->is_dynamic() == is_input_dynamic); + if (!engine.get_device_info().supports_immad) { + auto inst = network->get_primitive("gemm"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic() == is_input_dynamic); + } auto outputs = network->execute(); @@ -1533,10 +1542,12 @@ class gemm_gpu_tests: public ::testing::Test { network->set_input_data("input0", input0_mem); network->set_input_data("input1", input1_mem); - auto inst = network->get_primitive("gemm"); - auto impl = inst->get_impl(); - ASSERT_TRUE(impl != nullptr); - ASSERT_TRUE(impl->is_dynamic() == is_input_dynamic); + if (!engine.get_device_info().supports_immad) { + auto inst = network->get_primitive("gemm"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic() == is_input_dynamic); + } auto outputs = network->execute(); @@ -2853,8 +2864,10 @@ class gemm_onednn: public ::testing::Test { auto inst = network->get_primitive("gemm"); auto impl = inst->get_impl(); - ASSERT_TRUE(impl != nullptr); - ASSERT_TRUE(impl->is_dynamic()); + if (!engine.get_device_info().supports_immad) { + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + } auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp index 3bf1a771512ae8..59e31547602252 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp @@ -35,7 +35,7 @@ struct variable_test : public ::testing::TestWithParam> { topology topology; topology.add(input_layout("input", input_data->get_layout())); - topology.add(read_value{"read_value", { input_info("input") }, "v0", variable_layout}); + topology.add(read_value{"read_value", { input_info("input") }, "v0", { variable_layout }}); topology.add(eltwise{"sum", { input_info("input"), input_info("read_value") }, eltwise_mode::sum, {}, variable_layout.data_type}); topology.add(assign{"assign", { input_info("sum") }, "v0", variable_layout}); @@ -129,7 +129,7 @@ void test_exception_on_wrong_layout(bool is_caching_test) { topology topology; topology.add(input_layout("input", input_data->get_layout())); - topology.add(read_value{"read_value", { input_info("input") }, "v0", variable_layout}); + topology.add(read_value{"read_value", { input_info("input") }, "v0", { variable_layout }}); topology.add(input_layout("wrong_input", wrong_input_data->get_layout())); topology.add(assign{"assign", { input_info("wrong_input") }, "v0", wrong_layout}); @@ -218,14 +218,14 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) { topology.add(assign{"assign_2", { input_info("input_2") }, "v2", variable_layout}); topology.add(data("dummy1", dummy1)); - topology.add(read_value{"read_value_1", { input_info("dummy1") }, "v1", variable_layout}); - topology.add(read_value{"read_value_2", { input_info("dummy1") }, "v2", variable_layout}); + topology.add(read_value{"read_value_1", { input_info("dummy1") }, "v1", { variable_layout }}); + topology.add(read_value{"read_value_2", { input_info("dummy1") }, "v2", { variable_layout }}); topology.add(eltwise{"sum", { input_info("read_value_1"), input_info("read_value_2") }, eltwise_mode::sum, {}, variable_layout.data_type}); topology.add(assign{"assign_result", { input_info("sum") }, "v_result", variable_layout}); topology.add(data("dummy2", dummy2)); - topology.add(read_value{"read_result", { input_info("dummy2") }, "v_result", variable_layout}); + topology.add(read_value{"read_result", { input_info("dummy2") }, "v_result", { variable_layout }}); cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/unit/transformations/fc_per_layer_scaling_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/fc_per_layer_scaling_test.cpp new file mode 100644 index 00000000000000..2d2f21b57d7152 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/fc_per_layer_scaling_test.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "common_test_utils/graph_comparator.hpp" +#include "common_test_utils/ov_test_utils.hpp" + +#include +#include + +#include "openvino/op/constant.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/pass/manager.hpp" + +#include +#include "plugin/transformations/fc_per_layer_scaling.hpp" +#include "intel_gpu/op/placeholder.hpp" +#include "intel_gpu/op/fully_connected_compressed.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +TEST_F(TransformationTestsF, FullyConnectedPerLayerScalingTest1) { + float scale_factor = 2.f; + { + auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input, weights_const, no_bias, scale_const, zp_const); + auto convert = std::make_shared(fc_compressed, ov::element::f32); + auto result = std::make_shared(convert); + + model = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input}); + manager.register_pass(scale_factor); + } + { + auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto scale_down_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 1.f / scale_factor }); + auto scale_down = std::make_shared(input, scale_down_const); + auto fc_compressed = std::make_shared(scale_down, weights_const, no_bias, scale_const, zp_const); + auto scale_up_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { scale_factor }); + auto scale_up = std::make_shared(fc_compressed, scale_up_const); + auto convert = std::make_shared(scale_up, ov::element::f32); + auto result = std::make_shared(convert); + + model_ref = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input}); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, FullyConnectedPerLayerScalingTest2) { + float scale_factor = 2.f; + { + auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto bias = std::make_shared(ov::element::f16, ov::Shape{ 1, 32 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input, weights_const, bias, scale_const, zp_const); + auto convert = std::make_shared(fc_compressed, ov::element::f32); + auto result = std::make_shared(convert); + + model = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input}); + manager.register_pass(scale_factor); + } + { + auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto bias = std::make_shared(ov::element::f16, ov::Shape{ 1, 32 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto scale_down_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 1.f / scale_factor }); + auto scale_down = std::make_shared(input, scale_down_const); + auto bias_scale_down_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 1.f / scale_factor }); + auto bias_scale_down = std::make_shared(bias, scale_down_const); + auto fc_compressed = std::make_shared(scale_down, weights_const, bias_scale_down, scale_const, zp_const); + auto scale_up_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { scale_factor }); + auto scale_up = std::make_shared(fc_compressed, scale_up_const); + auto convert = std::make_shared(scale_up, ov::element::f32); + auto result = std::make_shared(convert); + + model_ref = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input}); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, FullyConnectedPerLayerScalingTest3) { + { + auto input = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto no_bias = std::make_shared(); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input, weights_const, no_bias, scale_const, zp_const); + auto convert = std::make_shared(fc_compressed, ov::element::f32); + auto result = std::make_shared(convert); + + model = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input}); + manager.register_pass(1.f); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_gpu/tests/unit/transformations/kv_cache_compression.cpp b/src/plugins/intel_gpu/tests/unit/transformations/kv_cache_compression.cpp new file mode 100644 index 00000000000000..67123f1d84cfe7 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/kv_cache_compression.cpp @@ -0,0 +1,344 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_test_utils.hpp" + +#include "openvino/core/model.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/op/abs.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/unsqueeze.hpp" + +#include "intel_gpu/op/read_value.hpp" +#include "intel_gpu/op/read_values.hpp" +#include "intel_gpu/op/kv_cache.hpp" +#include "intel_gpu/op/kv_cache_compressed.hpp" +#include "intel_gpu/op/indirect_sdpa.hpp" + +#include "plugin/transformations/kv_cache_compression.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +TEST_F(TransformationTestsF, KVCacheCompression) { + bool causal = false; + bool with_mask = true; + bool with_scale = true; + size_t concat_axis = 2; + size_t gather_axis = 0; + ov::element::Type_t element_type = ov::element::f16; + std::vector qkv_order = {0, 1, 2, 3}; + std::shared_ptr mask = nullptr; + std::shared_ptr scale = nullptr; + ov::PartialShape input_shape = ov::PartialShape{1, 32, -1, 80}; + + { + auto query = std::make_shared(element_type, input_shape); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + + auto key_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v0"}); + auto key_current = std::make_shared(ov::element::f16, input_shape); + auto key_past = std::make_shared(key_variable); + auto key_cache = std::make_shared(key_past, key_current, beam_idx, key_variable, concat_axis, gather_axis); + + auto value_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v1"}); + auto value_current = std::make_shared(ov::element::f16, input_shape); + auto value_past = std::make_shared(value_variable); + auto value_cache = std::make_shared(value_past, value_current, beam_idx, value_variable, concat_axis, gather_axis); + + ov::ParameterVector params{ beam_idx, query, key_current, value_current }; + + if (with_mask) { + auto attn_mask = std::make_shared(element_type, ov::PartialShape::dynamic(4)); + mask = attn_mask; + params.push_back(attn_mask); + } + + if (with_mask && with_scale) { + auto scale_input = std::make_shared(element_type, ov::PartialShape{1}); + scale = scale_input; + params.push_back(scale_input); + } + + ov::OutputVector sdpa_inputs = { query, key_cache->output(0), value_cache->output(0) }; + + if (mask) { + sdpa_inputs.push_back(mask); + } + + if (scale) { + sdpa_inputs.push_back(scale); + } + + std::shared_ptr sdpa = nullptr; + sdpa = std::make_shared(sdpa_inputs, + key_cache->output(1), + causal, + gather_axis, + qkv_order, + qkv_order, + qkv_order, + ov::intel_gpu::op::SDPA::default_order(4)); + + auto result = std::make_shared(sdpa); + + ov::ResultVector results{ result }; + + model = std::make_shared(results, params); + manager.register_pass(ov::element::i8); + } + { + ov::op::internal::DynamicQuantize::Attributes dq_config; + dq_config.quantization_type = ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric; + dq_config.quantization_dt = ov::element::i8; + dq_config.scale_dt = ov::element::f16; + dq_config.zp_dt = ov::element::f16; + dq_config.group_sizes = { 1, 1, 1, UINT64_MAX }; + dq_config.scales_zp_output_order = { 0, 1, 2, 3 }; + dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; + + auto query = std::make_shared(element_type, input_shape); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + + auto key_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v0"}); + auto key_current = std::make_shared(ov::element::f16, input_shape); + auto key_past_variable_infos = { ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::i8, "v0"}, + ov::op::util::VariableInfo{{1, 32, -1, 2}, ov::element::f16, "v0"} }; + auto key_past_compressed = std::make_shared(key_variable, key_past_variable_infos); + auto key_cache_inputs = ov::OutputVector{ key_past_compressed->output(0), key_current, beam_idx, key_past_compressed->output(1) }; + auto key_cache = std::make_shared(key_cache_inputs, + key_variable, + concat_axis, + gather_axis, + dq_config); + + auto value_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v1"}); + auto value_current = std::make_shared(ov::element::f16, input_shape); + auto value_past_variable_infos = { ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::i8, "v1"}, + ov::op::util::VariableInfo{{1, 32, -1, 2}, ov::element::f16, "v1"} }; + auto value_past_compressed = std::make_shared(value_variable, value_past_variable_infos); + auto value_cache_inputs = ov::OutputVector{ value_past_compressed->output(0), value_current, beam_idx, value_past_compressed->output(1) }; + auto value_cache = std::make_shared(value_cache_inputs, + value_variable, + concat_axis, + gather_axis, + dq_config); + + ov::ParameterVector params{ beam_idx, query, key_current, value_current }; + + if (with_mask) { + auto attn_input = std::make_shared(element_type, ov::PartialShape::dynamic(4)); + mask = attn_input; + params.push_back(attn_input); + } + + if (with_mask && with_scale) { + auto scale_input = std::make_shared(element_type, ov::PartialShape{1}); + scale = scale_input; + params.push_back(scale_input); + } + + ov::OutputVector sdpa_inputs = { query, key_cache->output(0), value_cache->output(0) }; + if (mask) { + sdpa_inputs.push_back(mask); + } + + if (scale) { + sdpa_inputs.push_back(scale); + } + + sdpa_inputs.push_back(key_cache->output(2)); + sdpa_inputs.push_back(value_cache->output(2)); + + std::shared_ptr sdpa = nullptr; + sdpa = std::make_shared(sdpa_inputs, + key_cache->output(1), + causal, + gather_axis, + qkv_order, + qkv_order, + qkv_order, + ov::intel_gpu::op::SDPA::default_order(4), + dq_config); + + auto result = std::make_shared(sdpa); + + ov::ResultVector results{ result }; + + model_ref = std::make_shared(results, params); + } +} + +TEST_F(TransformationTestsF, KVCacheCompressionWithInitializers) { + bool causal = false; + bool with_mask = true; + bool with_scale = true; + size_t concat_axis = 2; + size_t gather_axis = 0; + ov::element::Type_t element_type = ov::element::f16; + std::vector qkv_order = {0, 1, 2, 3}; + std::shared_ptr mask = nullptr; + std::shared_ptr scale = nullptr; + ov::PartialShape input_shape = ov::PartialShape{1, 32, -1, 80}; + + { + auto query = std::make_shared(element_type, input_shape); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + + auto key_variable_initializer = std::make_shared(ov::element::f16, input_shape); + auto key_current = std::make_shared(ov::element::f16, input_shape); + auto key_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v0"}); + auto key_past = std::make_shared(key_variable_initializer, key_variable); + auto key_cache = std::make_shared(key_past, key_current, beam_idx, key_variable, concat_axis, gather_axis); + + auto value_variable_initializer = std::make_shared(ov::element::f16, input_shape); + auto value_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v1"}); + auto value_current = std::make_shared(ov::element::f16, input_shape); + auto value_past = std::make_shared(value_variable_initializer, value_variable); + auto value_cache = std::make_shared(value_past, value_current, beam_idx, value_variable, concat_axis, gather_axis); + + ov::ParameterVector params{ beam_idx, query, key_current, value_current, key_variable_initializer, value_variable_initializer }; + + if (with_mask) { + auto attn_mask = std::make_shared(element_type, ov::PartialShape::dynamic(4)); + mask = attn_mask; + params.push_back(attn_mask); + } + + if (with_mask && with_scale) { + auto scale_input = std::make_shared(element_type, ov::PartialShape{1}); + scale = scale_input; + params.push_back(scale_input); + } + + ov::OutputVector sdpa_inputs = { query, key_cache->output(0), value_cache->output(0) }; + + if (mask) { + sdpa_inputs.push_back(mask); + } + + if (scale) { + sdpa_inputs.push_back(scale); + } + + std::shared_ptr sdpa = nullptr; + sdpa = std::make_shared(sdpa_inputs, + key_cache->output(1), + causal, + gather_axis, + qkv_order, + qkv_order, + qkv_order, + ov::intel_gpu::op::SDPA::default_order(4)); + + auto result = std::make_shared(sdpa); + + ov::ResultVector results{ result }; + + model = std::make_shared(results, params); + manager.register_pass(ov::element::i8); + } + { + ov::op::internal::DynamicQuantize::Attributes dq_config; + dq_config.quantization_type = ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric; + dq_config.quantization_dt = ov::element::i8; + dq_config.scale_dt = ov::element::f16; + dq_config.zp_dt = ov::element::f16; + dq_config.group_sizes = { 1, 1, 1, UINT64_MAX }; + dq_config.scales_zp_output_order = { 0, 1, 2, 3 }; + dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; + + auto query = std::make_shared(element_type, input_shape); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + + auto key_past_variable_infos = { ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::i8, "v0"}, + ov::op::util::VariableInfo{{1, 32, -1, 2}, ov::element::f16, "v0"} }; + auto key_current = std::make_shared(ov::element::f16, input_shape); + auto key_variable_initializer = std::make_shared(ov::element::f16, input_shape); + auto key_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v0"}); + + auto key_initializer_dq = + std::make_shared(key_variable_initializer, dq_config); + auto key_past_initializers = ov::OutputVector{ key_initializer_dq->output(0), key_initializer_dq->output(1) }; + auto key_past_compressed = std::make_shared(key_past_initializers, key_variable, key_past_variable_infos); + auto key_cache_inputs = ov::OutputVector{ key_past_compressed->output(0), key_current, beam_idx, key_past_compressed->output(1) }; + auto key_cache = std::make_shared(key_cache_inputs, + key_variable, + concat_axis, + gather_axis, + dq_config); + + auto value_past_variable_infos = { ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::i8, "v1"}, + ov::op::util::VariableInfo{{1, 32, -1, 2}, ov::element::f16, "v1"} }; + + auto value_current = std::make_shared(ov::element::f16, input_shape); + auto value_variable_initializer = std::make_shared(ov::element::f16, input_shape); + auto value_variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f16, "v1"}); + + auto value_initializer_dq = + std::make_shared(value_variable_initializer, dq_config); + auto value_past_initializers = ov::OutputVector{ value_initializer_dq->output(0), value_initializer_dq->output(1) }; + auto value_past_compressed = std::make_shared(value_past_initializers, value_variable, value_past_variable_infos); + auto value_cache_inputs = ov::OutputVector{ value_past_compressed->output(0), value_current, beam_idx, value_past_compressed->output(1) }; + auto value_cache = std::make_shared(value_cache_inputs, + value_variable, + concat_axis, + gather_axis, + dq_config); + + ov::ParameterVector params{ beam_idx, query, key_current, value_current, key_variable_initializer, value_variable_initializer }; + + if (with_mask) { + auto attn_input = std::make_shared(element_type, ov::PartialShape::dynamic(4)); + mask = attn_input; + params.push_back(attn_input); + } + + if (with_mask && with_scale) { + auto scale_input = std::make_shared(element_type, ov::PartialShape{1}); + scale = scale_input; + params.push_back(scale_input); + } + + ov::OutputVector sdpa_inputs = { query, key_cache->output(0), value_cache->output(0) }; + if (mask) { + sdpa_inputs.push_back(mask); + } + + if (scale) { + sdpa_inputs.push_back(scale); + } + + sdpa_inputs.push_back(key_cache->output(2)); + sdpa_inputs.push_back(value_cache->output(2)); + + std::shared_ptr sdpa = nullptr; + sdpa = std::make_shared(sdpa_inputs, + key_cache->output(1), + causal, + gather_axis, + qkv_order, + qkv_order, + qkv_order, + ov::intel_gpu::op::SDPA::default_order(4), + dq_config); + + auto result = std::make_shared(sdpa); + + ov::ResultVector results{ result }; + + model_ref = std::make_shared(results, params); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp new file mode 100644 index 00000000000000..732a14be03bf39 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp @@ -0,0 +1,97 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include + +#include "openvino/pass/manager.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/coordinate_diff.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" + +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +TEST_F(TransformationTestsF, OptimizeSubsequentReshapes1) { + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 1, 4096 }); + auto first_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{ 0, 0, 32, 128 }); + auto first_reshape = std::make_shared(input, first_reshape_pattern, true); + + auto second_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{ 0, -1 }); + auto second_reshape = std::make_shared(first_reshape, second_reshape_pattern, true); + auto result = std::make_shared(second_reshape); + + model = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 1, 4096 }); + auto reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{ 0, 4096 }); + auto reshape = std::make_shared(input, reshape_pattern, true); + auto result = std::make_shared(reshape); + + model_ref = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + } + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, OptimizeSubsequentReshapes2) { + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 1, 4096 }); + auto first_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{ 0, 0, 32, 128 }); + auto first_reshape = std::make_shared(input, first_reshape_pattern, true); + + auto second_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{ 0, 32, 1, 0 }); + auto second_reshape = std::make_shared(first_reshape, second_reshape_pattern, true); + auto result = std::make_shared(second_reshape); + + model = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 1, 4096 }); + auto reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{ 0, 32, 1, 128 }); + auto reshape = std::make_shared(input, reshape_pattern, true); + auto result = std::make_shared(reshape); + + model_ref = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + } + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, OptimizeSubsequentReshapes3) { + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 32, 1, 128 }); + auto first_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{ 0, 1, 32, 0 }); + auto first_reshape = std::make_shared(input, first_reshape_pattern, true); + + auto second_reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{ 0, -1 }); + auto second_reshape = std::make_shared(first_reshape, second_reshape_pattern, true); + auto result = std::make_shared(second_reshape); + + model = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::i64, ov::PartialShape{ -1, 32, 1, 128 }); + auto reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{ 0, 4096 }); + auto reshape = std::make_shared(input, reshape_pattern, true); + auto result = std::make_shared(reshape); + + model_ref = std::make_shared(ov::NodeVector{ result }, ov::ParameterVector{ input }); + } + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index e99a84e4914a81..1722066ad4c0f1 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit e99a84e4914a818c64165a4b52785f606e405c2b +Subproject commit 1722066ad4c0f15495f2d0fcbe9deb2bfd188c36 diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md index b7508c68704e32..980faa71a15937 100644 --- a/src/plugins/intel_npu/README.md +++ b/src/plugins/intel_npu/README.md @@ -78,7 +78,7 @@ There is currently no support for multiple devices, which means only one level-z ### Inference pipeline -The result of the model compilation is represented through a NetworkDescription. This model description is passed by the plugin to the driver to create a level zero graph instance and obtain a graph handle that can later be used to execute multiple inferences in parallel for the same model. Since the same model instance is shared across all subsequent inference objects, this initialization step is performed by default right after the model is compiled and it can be postponed until the creation of the first inference request through the use of an environment variable: "IE_NPU_CREATE_EXECUTOR" (IE_NPU_CREATE_EXECUTOR=0 to postpone the initialization). +The result of the model compilation is represented through an IGraph object, which contains a valid level zero graph handle that can later be used to execute multiple inferences in parallel for the same model. By default, weights are loaded into the NPU memory right after the model is compiled, but this step can be postponed until the creation of the first inference request through the use of an internal NPU property: "NPU_DEFER_WEIGHTS_LOAD". Users can create one or more inference requests for a compiled model using OpenVINO API: diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 0dde0f9d67f6e5..7d34c52c6d1292 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -4,29 +4,10 @@ ov_option(ENABLE_MLIR_COMPILER "Enable compilation of npu_mlir_compiler libraries" ON) -ov_option(ENABLE_NPU_RUNTIME_COMMON "Enable compilation of npu runtime common libraries" ON) +ov_option(ENABLE_NPU_PLUGIN_ENGINE "Enable compilation of NPU plugin engine" ON) -# if ENABLE_ZEROAPI_BACKEND=ON, it adds the ze_loader dependency for driver compiler -ov_dependent_option(ENABLE_NPU_PLUGIN_ENGINE "Enable compilation of NPU plugin engine" ON "ENABLE_NPU_RUNTIME_COMMON" OFF) - -ov_dependent_option(ENABLE_ZEROAPI_BACKEND "Enable zero-api as a plugin backend" ON "ENABLE_NPU_RUNTIME_COMMON;ENABLE_NPU_PLUGIN_ENGINE" OFF) - -ov_dependent_option(ENABLE_DRIVER_COMPILER_ADAPTER "Enable NPU Compiler inside driver" ON "ENABLE_ZEROAPI_BACKEND" OFF) - -if((NOT ENABLE_NPU_PLUGIN_ENGINE OR NOT ENABLE_NPU_RUNTIME_COMMON) AND ENABLE_TESTS) - message(FATAL_ERROR "Tests depends on npu plugin engine and npu runtime common libraries!") -endif() - -if((NOT ENABLE_NPU_PLUGIN_ENGINE OR NOT ENABLE_NPU_RUNTIME_COMMON) AND ENABLE_ZEROAPI_BACKEND) - message(FATAL_ERROR "Zero backend depends on npu plugin engine and npu common libraries!") -endif() - -if(NOT ENABLE_ZEROAPI_BACKEND AND ENABLE_DRIVER_COMPILER_ADAPTER) - message(FATAL_ERROR "Compiler adapter depends on zero backend to use same context!") -endif() - -if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMPILER_ADAPTER) - message(FATAL_ERROR "No compiler found for static build!") +if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS) + message(FATAL_ERROR "Tests depends on npu plugin engine!") endif() ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF) diff --git a/src/plugins/intel_npu/src/CMakeLists.txt b/src/plugins/intel_npu/src/CMakeLists.txt index 5530eb1f3e59e5..f5d1fd5b41226c 100644 --- a/src/plugins/intel_npu/src/CMakeLists.txt +++ b/src/plugins/intel_npu/src/CMakeLists.txt @@ -9,18 +9,9 @@ add_subdirectory(utils) add_subdirectory(al) -if (ENABLE_NPU_RUNTIME_COMMON) +if (ENABLE_NPU_PLUGIN_ENGINE) add_subdirectory(common) -endif() - -if(ENABLE_DRIVER_COMPILER_ADAPTER AND ENABLE_ZEROAPI_BACKEND) - add_subdirectory(compiler) -endif() - -if(ENABLE_ZEROAPI_BACKEND) + add_subdirectory(compiler_adapter) add_subdirectory(backend) -endif() - -if (ENABLE_NPU_PLUGIN_ENGINE) add_subdirectory(plugin) endif() diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp index d52c25f6a3e6a5..510ab7fc43b0c8 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp @@ -131,6 +131,38 @@ struct CREATE_EXECUTOR final : OptionBase { } }; +// +// DEFER_WEIGHTS_LOAD +// + +struct DEFER_WEIGHTS_LOAD final : OptionBase { + static std::string_view key() { + return ov::intel_npu::defer_weights_load.name(); + } + + static int64_t defaultValue() { + return false; + } + + static constexpr std::string_view getTypeName() { + return "bool"; + } + +#ifdef NPU_PLUGIN_DEVELOPER_BUILD + static std::string_view envVar() { + return "OV_NPU_DEFER_WEIGHTS_LOAD"; + } +#endif + + static bool isPublic() { + return false; + } + + static OptionMode mode() { + return OptionMode::RunTime; + } +}; + // // NUM_STREAMS // diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp index e0a02f12aa2e17..53696396603d9a 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp @@ -6,128 +6,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - #include "intel_npu/config/config.hpp" -#include "openvino/core/partial_shape.hpp" -#include "openvino/core/type/element_type.hpp" -#include "openvino/runtime/common.hpp" +#include "intel_npu/network_metadata.hpp" #include "openvino/runtime/profiling_info.hpp" namespace intel_npu { -/** - * @brief A helper structure used for storing metadata corresponding to one input/output entry. - */ -struct IODescriptor { - /** - * @brief The name of the input/output assigned by the compiler. - * @details This value may differ from other name attributes: - * - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not - * found in the original IR model. - * - The compiler may append indices to names in the case where duplicate names are found. - * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape - * tensors) were removed prior to initializing this field. - */ - std::string nameFromCompiler; - - ov::element::Type precision; - - ov::PartialShape shapeFromCompiler; - - /** - * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor. - * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and - * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isStateInput = false; - - /** - * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor. - * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and - * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isStateOutput = false; - - /** - * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced - * tensor. - * @details This flag is set if the compiler prefixed the name using a "shape" prefix. - * - * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to - * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isShapeTensor = false; - - /** - * @brief Points towards a related descriptor. - * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor) - * pairs. - */ - std::optional relatedDescriptorIndex; - - /** - * @brief The friendly name of the node extracted from the IR model. - * @details In some cases, this field is required for constructing a dummy model which uses the same input/output - * metadata as the original IR model. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the - * compiler). - */ - std::string nodeFriendlyName; - - /** - * @brief The names of the output tensors extracted from the IR model. - * @details In some cases, this field is required for constructing a dummy model which uses the same input/output - * metadata as the original IR model. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the - * compiler). - */ - std::unordered_set outputTensorNames; - - /** - * @brief The shape extracted from the IR model. - * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the - * plugin. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added - * by the compiler). - */ - std::optional shapeFromIRModel = std::nullopt; -}; - -struct NetworkMetadata final { - std::string name; - - std::vector inputs; - std::vector outputs; - std::vector profilingOutputs; - - size_t numStreams = 1; - - // Used primarily in the CID path to pass the level zero graph handle from compiler to the backend executor - void* graphHandle = nullptr; - - /** - * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the - * "relatedDescriptorIndex" attribute. - * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the - * same name. The reverse is also applied. - * - * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set - * to the index of the entry which bears the same name. - */ - void bindRelatedDescriptors(); - -}; // namespace intel_npu - /** * @struct NetworkDescription * @brief The object returned by the compiler @@ -138,7 +22,6 @@ struct NetworkDescription final { NetworkDescription(std::vector&& compiledNetwork, NetworkMetadata&& metadata) : compiledNetwork(std::move(compiledNetwork)), metadata(std::move(metadata)) {} - NetworkDescription(NetworkMetadata&& metadata) : metadata(std::move(metadata)) {} // Force move semantics to prevent blob copies NetworkDescription(const NetworkDescription&) = delete; NetworkDescription(NetworkDescription&&) = default; @@ -151,32 +34,6 @@ struct NetworkDescription final { NetworkMetadata metadata; }; -/** - * @struct CompiledNetwork - * @brief Custom container for compiled network, used for export - * @var CompiledNetwork::data - * Pointer to the address of compiled network - * @var CompiledNetwork:size - * Size of the compiled network - * @var CompiledNetwork::ownedStorage - * Plugin owned compiled network storage that is required in case of a driver that - * doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network. - * @note It's unsafe to store either data or size outside of the compiled network object as its destructor - * would release the owning container - */ - -struct CompiledNetwork { - const uint8_t* data; - size_t size; - CompiledNetwork(const uint8_t* data, size_t size, std::vector storage) - : data(data), - size(size), - ownedStorage(std::move(storage)) {} - -private: - std::vector ownedStorage; -}; - /** * @interface ICompiler * @brief An interface to be implemented by a concrete compiler to provide @@ -184,12 +41,6 @@ struct CompiledNetwork { */ class ICompiler : public std::enable_shared_from_this { public: - /** - * @brief Returns the maximum OpenVino opset version supported by the compiler - * @return opset version e.g. 11 for opset11 - */ - virtual uint32_t getSupportedOpsetVersion() const = 0; - /** * @brief Transforms a network from the OpenVINO model representation to a format executable * by a NPU device @@ -216,8 +67,6 @@ class ICompiler : public std::enable_shared_from_this { * @param config a reference to NPUConfig containing plugin config options * Note: compilation options will be ignored, * since the network is already compiled - * @param netName a reference to the string describing network name - * to be used for creating network description * @return a shared pointer on an object implementing NetworkDescription interface */ virtual NetworkMetadata parse(const std::vector& network, const Config& config) const = 0; @@ -226,15 +75,6 @@ class ICompiler : public std::enable_shared_from_this { const std::vector& network, const Config& config) const = 0; - // Driver compiler can use this to release graphHandle, if we do not have executor - virtual void release([[maybe_unused]] std::shared_ptr networkDescription){}; - - virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) { - return CompiledNetwork(networkDescription.compiledNetwork.data(), - networkDescription.compiledNetwork.size(), - networkDescription.compiledNetwork); - } - protected: virtual ~ICompiler() = default; }; diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp new file mode 100644 index 00000000000000..b7a78b3dfd43e1 --- /dev/null +++ b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Compiler Interface + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "intel_npu/config/config.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/runtime/common.hpp" + +namespace intel_npu { + +/** + * @brief A helper structure used for storing metadata corresponding to one input/output entry. + */ +struct IODescriptor { + /** + * @brief The name of the input/output assigned by the compiler. + * @details This value may differ from other name attributes: + * - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not + * found in the original IR model. + * - The compiler may append indices to names in the case where duplicate names are found. + * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape + * tensors) were removed prior to initializing this field. + */ + std::string nameFromCompiler; + + ov::element::Type precision; + + ov::PartialShape shapeFromCompiler; + + /** + * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor. + * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateInput = false; + + /** + * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor. + * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateOutput = false; + + /** + * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced + * tensor. + * @details This flag is set if the compiler prefixed the name using a "shape" prefix. + * + * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to + * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isShapeTensor = false; + + /** + * @brief Points towards a related descriptor. + * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor) + * pairs. + */ + std::optional relatedDescriptorIndex; + + /** + * @brief The friendly name of the node extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ + std::string nodeFriendlyName; + + /** + * @brief The names of the output tensors extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ + std::unordered_set outputTensorNames; + + /** + * @brief The shape extracted from the IR model. + * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the + * plugin. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added + * by the compiler). + */ + std::optional shapeFromIRModel = std::nullopt; +}; + +struct NetworkMetadata final { + std::string name; + + std::vector inputs; + std::vector outputs; + std::vector profilingOutputs; + + size_t numStreams = 1; + + /** + * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the + * "relatedDescriptorIndex" attribute. + * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the + * same name. The reverse is also applied. + * + * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set + * to the index of the entry which bears the same name. + */ + void bindRelatedDescriptors(); + +}; // namespace intel_npu + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp index 0c5a04ce0c0d83..d8fabee177b2b9 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp @@ -351,6 +351,13 @@ static constexpr ov::Property batch_mode{"NPU_BATCH_MODE"}; */ static constexpr ov::Property create_executor{"NPU_CREATE_EXECUTOR"}; +/** + * @brief [Only for NPU Plugin] + * Type: boolean, default is false + * This option allows to omit loading the weights until inference is created + */ +static constexpr ov::Property defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}; + /** * @brief Read-only property to get the name of used backend */ diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp index 10f9b4a7c7222b..759956b6f597df 100644 --- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp +++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp @@ -21,6 +21,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); desc.add(); desc.add(); desc.add(); diff --git a/src/plugins/intel_npu/src/backend/CMakeLists.txt b/src/plugins/intel_npu/src/backend/CMakeLists.txt index 01465a8179dc24..5a1585c0a63073 100644 --- a/src/plugins/intel_npu/src/backend/CMakeLists.txt +++ b/src/plugins/intel_npu/src/backend/CMakeLists.txt @@ -25,7 +25,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE openvino::npu_al openvino::npu_common - openvino_npu_zero_result_parser ze_loader ) @@ -33,31 +32,3 @@ target_link_libraries(${TARGET_NAME} # targets install # ov_install_static_lib(${TARGET_NAME} ${NPU_INTERNAL_COMPONENT}) - -if(TARGET ze_loader) - if(NOT BUILD_SHARED_LIBS) - # Support link of static runtime in case system does not have ze_loader - install(TARGETS ze_loader EXPORT OpenVINOTargets - RUNTIME DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - ARCHIVE DESTINATION ${OV_CPACK_ARCHIVEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - LIBRARY DESTINATION ${OV_CPACK_LIBRARYDIR} COMPONENT ${NPU_PLUGIN_COMPONENT}) - - install(TARGETS utils EXPORT OpenVINOTargets - RUNTIME DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - ARCHIVE DESTINATION ${OV_CPACK_ARCHIVEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - LIBRARY DESTINATION ${OV_CPACK_LIBRARYDIR} COMPONENT ${NPU_PLUGIN_COMPONENT}) - - # export to local tree to build against static build tree - export(TARGETS ze_loader NAMESPACE openvino:: - APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") - - export(TARGETS utils NAMESPACE openvino:: - APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") - endif() - - # Support tests to run with ze_loader - install(TARGETS ze_loader - RUNTIME DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL - LIBRARY DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) -endif() - diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index 68e4f9434418a6..038c7c1d2d9bf9 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -9,7 +9,7 @@ #include "intel_npu/common/npu.hpp" #include "intel_npu/utils/logger/logger.hpp" -#include "zero_init.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" namespace intel_npu { class ZeroEngineBackend final : public IEngineBackend { @@ -29,15 +29,14 @@ class ZeroEngineBackend final : public IEngineBackend { bool isCommandQueueExtSupported() const override; bool isLUIDExtSupported() const override; + const std::shared_ptr& getInitStruct() const; + void* getContext() const override; - void* getDriverHandle() const; - void* getDeviceHandle() const; - ze_graph_dditable_ext_curr_t& getGraphDdiTable() const; void updateInfo(const Config& config) override; private: - std::shared_ptr _instance; + std::shared_ptr _initStruct; std::map> _devices{}; Logger _logger; diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp index e87a602613a92a..50f0d28ed210cd 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp @@ -10,9 +10,9 @@ #include "intel_npu/common/icompiled_model.hpp" #include "intel_npu/common/npu.hpp" #include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" -#include "zero_init.hpp" -#include "zero_types.hpp" namespace intel_npu { @@ -20,9 +20,6 @@ class ZeroDevice : public IDevice { public: ZeroDevice(const std::shared_ptr& initStructs); - std::shared_ptr createExecutor(const std::shared_ptr& networkDescription, - const Config& config) override; - std::string getName() const override; std::string getFullDeviceName() const override; Uuid getUuid() const override; @@ -36,7 +33,6 @@ class ZeroDevice : public IDevice { ov::device::Type getDeviceType() const override; std::shared_ptr createInferRequest(const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) override; void updateInfo(const Config& config) override { log.setLevel(config.get()); @@ -76,8 +72,6 @@ class ZeroDevice : public IDevice { {ov::element::u8, 0.f}, {ov::element::i8, 0.f}}; - uint32_t _group_ordinal; - Logger log; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp deleted file mode 100644 index eeb96defc16441..00000000000000 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include - -#include "intel_npu/common/npu.hpp" -#include "intel_npu/utils/logger/logger.hpp" -#include "openvino/runtime/properties.hpp" -#include "zero_init.hpp" -#include "zero_wrappers.hpp" - -namespace intel_npu { - -class ZeroExecutor final : public IExecutor { -public: - ZeroExecutor(const std::shared_ptr& initStructs, - const std::shared_ptr& networkDescription, - const Config& config, - const uint32_t& group_ordinal); - - ZeroExecutor(const ZeroExecutor&) = delete; - ZeroExecutor& operator=(const ZeroExecutor&) = delete; - - ~ZeroExecutor() override; - - struct ArgumentDescriptor { - ze_graph_argument_properties_3_t info; - uint32_t idx; - }; - - void setArgumentValue(uint32_t argi_, const void* argv_) const; - void setWorkloadType(const ov::WorkloadType workloadType) const override; - void mutexLock() const; - void mutexUnlock() const; - inline ze_graph_handle_t graph() const { - return _graph; - } - inline std::shared_ptr getInitStructs() const { - return _initStructs; - } - inline const std::shared_ptr& getNetworkDesc() const { - return _networkDesc; - } - inline const std::shared_ptr& getCommandQueue() const { - return _command_queues; - } - inline const uint32_t& get_group_ordinal() const { - return _group_ordinal; - } - inline const std::vector& get_input_descriptors() const { - return _input_descriptors; - } - inline const std::vector& get_output_descriptors() const { - return _output_descriptors; - } - -private: - void initialize_graph_through_command_list() const; - - const Config _config; - Logger _logger; - - const std::shared_ptr _initStructs; - std::shared_ptr _networkDesc; - - ze_graph_dditable_ext_curr_t& _graph_ddi_table_ext; - - const uint32_t _group_ordinal; - - ze_graph_handle_t _graph = nullptr; - - std::vector _input_descriptors; - std::vector _output_descriptors; - - std::shared_ptr _command_queues; - - mutable std::mutex _mutex; -}; - -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp index 52000930e2a751..a214c8e2cb2b5d 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp @@ -5,8 +5,8 @@ #pragma once #include "intel_npu/config/config.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/itensor.hpp" -#include "zero_init.hpp" #include "zero_remote_tensor.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index 48aad52010a4c2..31248b582250da 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -11,19 +11,17 @@ #include "intel_npu/common/sync_infer_request.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" #include "zero_pipeline.hpp" #include "zero_profiling.hpp" #include "zero_remote_tensor.hpp" -#include "zero_wrappers.hpp" namespace intel_npu { class ZeroInferRequest final : public SyncInferRequest { public: - explicit ZeroInferRequest(const std::shared_ptr& backendPtr, + explicit ZeroInferRequest(const std::shared_ptr& initStructs, const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config); ov::SoPtr get_tensor(const ov::Output& port) const override; @@ -85,8 +83,7 @@ class ZeroInferRequest final : public SyncInferRequest { std::vector>& get_input_tensors_data(size_t index) const; const std::shared_ptr _initStructs; - const std::shared_ptr _executorPtr; - const ZeroExecutor* _executor; + const std::shared_ptr _graph; const Config _config; Logger _logger; diff --git a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp index 6ecbde0d546110..992f409b86a928 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp @@ -11,7 +11,7 @@ #include #include "intel_npu/utils/logger/logger.hpp" -#include "zero_init.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" namespace { diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 62c8481d28ac1a..92a473a9fc412c 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -4,11 +4,11 @@ #pragma once +#include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" #include "zero_memory.hpp" #include "zero_profiling.hpp" -#include "zero_wrappers.hpp" namespace intel_npu { @@ -21,13 +21,15 @@ struct TensorData { struct Pipeline { public: Pipeline(const Config& config, - const std::shared_ptr& executorPtr, + const std::shared_ptr& initStructs, + const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, - const size_t numberOfCommandLists); + size_t numberOfCommandLists, + uint32_t group_ordinal); Pipeline(const Pipeline&) = delete; Pipeline& operator=(const Pipeline&) = delete; @@ -42,8 +44,7 @@ struct Pipeline { protected: const Config _config; - const ZeroExecutor* _executor; - CommandQueue& _command_queue; + std::shared_ptr _command_queue; std::vector> _command_lists; std::vector> _fences; EventPool _event_pool; diff --git a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp index 505a7f0185e135..17e263a7aaf620 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp @@ -12,8 +12,8 @@ #include "intel_npu/config/compiler.hpp" #include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" #include "openvino/runtime/profiling_info.hpp" -#include "zero_types.hpp" namespace intel_npu { namespace zeroProfiling { diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index 0211bd5bd08962..5b08643704b651 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -9,8 +9,8 @@ #include #include "intel_npu/common/remote_tensor.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" -#include "zero_init.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index d74692400b0d90..55aaad102e8b8f 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -14,31 +14,31 @@ namespace intel_npu { ZeroEngineBackend::ZeroEngineBackend(const Config& config) : _logger("ZeroEngineBackend", Logger::global().level()) { _logger.debug("ZeroEngineBackend - initialize started"); - _instance = std::make_shared(); + _initStruct = std::make_shared(); - auto device = std::make_shared(_instance); + auto device = std::make_shared(_initStruct); _devices.emplace(std::make_pair(device->getName(), device)); _logger.debug("ZeroEngineBackend - initialize completed"); } uint32_t ZeroEngineBackend::getDriverVersion() const { - return _instance->getDriverVersion(); + return _initStruct->getDriverVersion(); } uint32_t ZeroEngineBackend::getGraphExtVersion() const { - return _instance->getGraphDdiTable().version(); + return _initStruct->getGraphDdiTable().version(); } bool ZeroEngineBackend::isBatchingSupported() const { - return _instance->isExtensionSupported(std::string(ZE_GRAPH_EXT_NAME_1_6), ZE_MAKE_VERSION(1, 6)); + return _initStruct->isExtensionSupported("ZE_extension_graph_1_6", ZE_MAKE_VERSION(1, 6)); } bool ZeroEngineBackend::isCommandQueueExtSupported() const { - return _instance->isExtensionSupported(std::string(ZE_COMMAND_QUEUE_NPU_EXT_NAME), ZE_MAKE_VERSION(1, 0)); + return _initStruct->isExtensionSupported(std::string(ZE_COMMAND_QUEUE_NPU_EXT_NAME), ZE_MAKE_VERSION(1, 0)); } bool ZeroEngineBackend::isLUIDExtSupported() const { - return _instance->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0)); + return _initStruct->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0)); } ZeroEngineBackend::~ZeroEngineBackend() = default; @@ -69,19 +69,11 @@ const std::vector ZeroEngineBackend::getDeviceNames() const { } void* ZeroEngineBackend::getContext() const { - return _instance->getContext(); + return _initStruct->getContext(); } -void* ZeroEngineBackend::getDriverHandle() const { - return _instance->getDriver(); -} - -void* ZeroEngineBackend::getDeviceHandle() const { - return _instance->getDevice(); -} - -ze_graph_dditable_ext_curr_t& ZeroEngineBackend::getGraphDdiTable() const { - return _instance->getGraphDdiTable(); +const std::shared_ptr& ZeroEngineBackend::getInitStruct() const { + return _initStruct; } void ZeroEngineBackend::updateInfo(const Config& config) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_device.cpp b/src/plugins/intel_npu/src/backend/src/zero_device.cpp index ac60e4741947bd..6e16dde3b120bf 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_device.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_device.cpp @@ -7,7 +7,6 @@ #include "intel_npu/common/itt.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" #include "zero_host_tensor.hpp" #include "zero_infer_request.hpp" #include "zero_remote_tensor.hpp" @@ -64,38 +63,6 @@ ZeroDevice::ZeroDevice(const std::shared_ptr& initStructs device_gops[ov::element::i8] = gops; device_gops[ov::element::f16] = 0.5f * gops; } - - std::vector command_group_properties; - uint32_t command_queue_group_count = 0; - // Discover all command queue groups - THROW_ON_FAIL_FOR_LEVELZERO( - "zeDeviceGetCommandQueueGroupProperties", - zeDeviceGetCommandQueueGroupProperties(_initStructs->getDevice(), &command_queue_group_count, nullptr)); - - log.debug("ZeroDevice::ZeroDevice - resize command_queue_group_count"); - command_group_properties.resize(command_queue_group_count); - - for (auto& prop : command_group_properties) { - prop.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; - prop.pNext = nullptr; - } - - THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetCommandQueueGroupProperties", - zeDeviceGetCommandQueueGroupProperties(_initStructs->getDevice(), - &command_queue_group_count, - command_group_properties.data())); - - // Find the corresponding command queue group. - log.debug("ZeroDevice::ZeroDevice - findGroupOrdinal"); - _group_ordinal = zeroUtils::findGroupOrdinal(command_group_properties, device_properties); - log.debug("ZeroDevice::ZeroDevice - init completed"); -} - -std::shared_ptr ZeroDevice::createExecutor( - const std::shared_ptr& networkDescription, - const Config& config) { - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Device::createExecutor"); - return std::make_shared(_initStructs, networkDescription, config, _group_ordinal); } std::string ZeroDevice::getName() const { @@ -162,12 +129,30 @@ uint64_t ZeroDevice::getAllocMemSize() const { } uint64_t ZeroDevice::getTotalMemSize() const { +#define LEGACY_MAX_MEM_ALLOC_SIZE_BYTES (2147483648) // 2GB in base-2 + ze_graph_memory_query_t query{}; ze_result_t result = _graph_ddi_table_ext.pfnQueryContextMemory(_initStructs->getContext(), ZE_GRAPH_QUERY_MEMORY_DDR, &query); THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryContextMemory", result, _graph_ddi_table_ext); - return query.total; + // For drivers with graph_extension < 1.9 we report fixed 2GB max allocation size (old drivers don't support more) + // For drivers with graph_extension > 1.9 we report the value they return + if (_initStructs->isExtensionSupported(std::string(ZE_GRAPH_EXT_NAME), ZE_MAKE_VERSION(1, 9))) { + // we are safe here, can return the value directly from driver + return query.total; + } +#if defined(_WIN32) || defined(__CYGWIN__) + // Special case for windows drivers with graph_extension v 1.8 + if (_initStructs->isExtensionSupported(std::string("ZE_extension_graph_1_8"), ZE_MAKE_VERSION(1, 8))) { + // query here returns total system memory in KB, which we need to + // divide by 2 (OS limitation) and convert to bytes + return (query.total << 9); + } +#endif + + // Default for older drivers: return 2GB + return LEGACY_MAX_MEM_ALLOC_SIZE_BYTES; } ov::device::PCIInfo ZeroDevice::getPciInfo() const { @@ -187,9 +172,8 @@ ov::device::Type ZeroDevice::getDeviceType() const { std::shared_ptr ZeroDevice::createInferRequest( const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) { - return std::make_shared(_initStructs, compiledModel, executor, config); + return std::make_shared(_initStructs, compiledModel, config); } ov::SoPtr ZeroDevice::createRemoteTensor(std::shared_ptr context, diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp deleted file mode 100644 index 32da2b2e0e4189..00000000000000 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "zero_executor.hpp" - -#include - -#include -#include -#include -#include - -#include "intel_npu/common/itt.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/prefix.hpp" -#include "intel_npu/utils/zero/zero_utils.hpp" -#include "openvino/runtime/properties.hpp" -#include "ze_command_queue_npu_ext.h" -#include "zero_device.hpp" - -using namespace intel_npu; - -ZeroExecutor::ZeroExecutor(const std::shared_ptr& initStructs, - const std::shared_ptr& networkDescription, - const Config& config, - const uint32_t& group_ordinal) - : _config(config), - _logger("Graph", _config.get()), - _initStructs(initStructs), - _networkDesc(networkDescription), - _graph_ddi_table_ext(_initStructs->getGraphDdiTable()), - _group_ordinal(group_ordinal), - _command_queues{std::make_shared(_initStructs->getDevice(), - _initStructs->getContext(), - zeroUtils::toZeQueuePriority(_config.get()), - _initStructs->getCommandQueueDdiTable(), - _config, - group_ordinal)} { - _logger.debug("ZeroExecutor::ZeroExecutor - create graph"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate"); - - // _graph is a nullptr for CIP path, a new handle will be obtained from the driver based on the given - // compiledNetwork _graph gets (reuses) graphHandle from the compiler for CID path - if (_networkDesc->metadata.graphHandle == nullptr) { - _logger.debug("create graph handle on executor"); - ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NATIVE, - _networkDesc->compiledNetwork.size(), - _networkDesc->compiledNetwork.data(), - nullptr}; - ze_result_t result = - _graph_ddi_table_ext.pfnCreate(_initStructs->getContext(), _initStructs->getDevice(), &desc, &_graph); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _graph_ddi_table_ext); - - } else { - _logger.debug("reuse graph handle created from compiler"); - _graph = static_cast(_networkDesc->metadata.graphHandle); - } - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties"); - _logger.debug("performing pfnGetProperties"); - ze_graph_properties_t props{}; - props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - - ze_result_t result = _graph_ddi_table_ext.pfnGetProperties(_graph, &props); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _graph_ddi_table_ext); - - auto targetDriverExtVersion = _graph_ddi_table_ext.version(); - if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) { - OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please " - "update the driver version"); - } - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3"); - _logger.debug("performing pfnGetArgumentProperties3"); - for (uint32_t index = 0; index < props.numGraphArgs; ++index) { - ze_graph_argument_properties_3_t arg3{}; - arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; - ze_result_t result = _graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _graph_ddi_table_ext); - - if (arg3.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { - _input_descriptors.push_back(ArgumentDescriptor{arg3, index}); - } else { - _output_descriptors.push_back(ArgumentDescriptor{arg3, index}); - } - } - - if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) { - initialize_graph_through_command_list(); - } else { - ze_graph_properties_2_t properties = {}; - properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - _graph_ddi_table_ext.pfnGetProperties2(_graph, &properties); - - if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize"); - _graph_ddi_table_ext.pfnGraphInitialize(_graph); - } - - if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { - initialize_graph_through_command_list(); - } - } - - if (config.has()) { - setWorkloadType(config.get()); - } -} - -void ZeroExecutor::initialize_graph_through_command_list() const { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, - itt::domains::LevelZeroBackend, - "Executor::ZeroExecutor", - "initialize_graph_through_command_list"); - - _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); - CommandList graph_command_list(_initStructs->getDevice(), - _initStructs->getContext(), - _graph_ddi_table_ext, - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); - CommandQueue graph_command_queue(_initStructs->getDevice(), - _initStructs->getContext(), - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, - _initStructs->getCommandQueueDdiTable(), - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); - Fence fence(graph_command_queue, _config); - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize"); - _logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize"); - graph_command_list.appendGraphInitialize(_graph); - _logger.debug("ZeroExecutor::ZeroExecutor - closing graph command list"); - graph_command_list.close(); - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "queue_execute"); - _logger.debug("ZeroExecutor::ZeroExecutor - performing executeCommandList"); - graph_command_queue.executeCommandList(graph_command_list, fence); - _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); - fence.hostSynchronize(); - _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); -} - -void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const { - ze_command_queue_workload_type_t zeWorkloadType; - switch (workloadType) { - case ov::WorkloadType::DEFAULT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; - break; - case ov::WorkloadType::EFFICIENT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; - break; - default: - OPENVINO_THROW("Unknown value for WorkloadType!"); - } - - _command_queues->setWorkloadType(zeWorkloadType); -} - -void ZeroExecutor::setArgumentValue(uint32_t argi_, const void* argv_) const { - ze_result_t result = _graph_ddi_table_ext.pfnSetArgumentValue(_graph, argi_, argv_); - if (ZE_RESULT_SUCCESS != result) { - THROW_ON_FAIL_FOR_LEVELZERO_EXT("zeGraphSetArgumentValue", result, _graph_ddi_table_ext); - } -} - -void ZeroExecutor::mutexLock() const { - _mutex.lock(); -} - -void ZeroExecutor::mutexUnlock() const { - _mutex.unlock(); -} - -ZeroExecutor::~ZeroExecutor() { - _logger.debug("~ZeroExecutor() - pfnDestroy _graph "); - auto result = _graph_ddi_table_ext.pfnDestroy(_graph); - if (ZE_RESULT_SUCCESS != result) { - _logger.error("_graph_ddi_table_ext.pfnDestroy failed %#X", uint64_t(result)); - } -} diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index dd2629372dc7d8..1c5ceecfac1961 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -31,8 +31,7 @@ constexpr bool OUTPUT = false; * @param ioDescriptor The OpenVINO API specific I/O descriptor which shall be compared. * @param zeDescriptor The Level Zero specific structure used for comparison. */ -void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, - const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { +void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const ArgumentDescriptor& zeDescriptor) { std::string zeDescriptorName = zeDescriptor.info.name; if (isStateInputName(zeDescriptorName)) { @@ -158,38 +157,35 @@ std::optional ZeroInferRequest::get_batch_size(const NetworkMetadata& me //------------------------------------------------------------------------------ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& initStructs, const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) : SyncInferRequest(compiledModel, config), _initStructs(initStructs), - _executorPtr(executor), - _executor(static_cast(_executorPtr.get())), + _graph(compiledModel->get_graph()), _config(config), _logger("ZeroInferRequest", config.get()), _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), _inputTensorsData(_metadata.inputs.size(), std::vector>(1, std::nullopt)), _outputTensorsData(_metadata.outputs.size(), std::nullopt), - _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()), - _profilingQuery(0, - _executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getProfilingDdiTable()) { + _profilingPool(static_cast(_graph->get_handle()), + zeroProfiling::POOL_SIZE, + _initStructs->getProfilingDdiTable()), + _profilingQuery(0, _initStructs->getDevice(), _initStructs->getProfilingDdiTable()) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); - const std::vector& executorInputDescriptors = _executor->get_input_descriptors(); - const std::vector& executorOutputDescriptors = - _executor->get_output_descriptors(); + const std::vector& executorInputDescriptors = _graph->get_input_descriptors(); + const std::vector& executorOutputDescriptors = _graph->get_output_descriptors(); auto proftype = config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { _logger.debug("ZeroInferRequest::ZeroInferRequest - profiling type == ov::intel_npu::ProfilingType::INFER"); - _npuProfiling = std::make_shared(_executor->getInitStructs()->getContext(), - _executor->getInitStructs()->getDevice(), + _npuProfiling = std::make_shared(_initStructs->getContext(), + _initStructs->getDevice(), _config.get()); } _properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", - zeDeviceGetProperties(_executor->getInitStructs()->getDevice(), &_properties)); + zeDeviceGetProperties(_initStructs->getDevice(), &_properties)); _outputAllocator = std::make_shared(_initStructs); _inputAllocator = @@ -278,17 +274,24 @@ void ZeroInferRequest::create_pipeline() { _levelZeroOutputTensors.at(outputIndex)->get_byte_size()}); } + // Find the corresponding command queue group. + _logger.debug("ZeroDevice::ZeroDevice - findGroupOrdinal"); + auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties); + _logger.debug("ZeroDevice::ZeroDevice - init completed"); + _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline"); - // Construct pipeline + // Construct pipeline _pipeline = std::make_unique(_config, - _executorPtr, + _initStructs, + _graph, _profilingPool, _profilingQuery, _npuProfiling, _inputTensorsData, _outputTensorsData, - _numberOfCommandLists); + _numberOfCommandLists, + groupOrdinal); _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed"); } @@ -338,8 +341,8 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); _pipeline->updateCommandList(*tensorsData, - isInput ? _executor->get_input_descriptors().at(index).idx - : _executor->get_output_descriptors().at(index).idx); + isInput ? _graph->get_input_descriptors().at(index).idx + : _graph->get_output_descriptors().at(index).idx); } } } @@ -370,9 +373,9 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrupdateCommandList(*tensorsData, - isInput ? _executor->get_input_descriptors().at(index).idx - : _executor->get_output_descriptors().at(index).idx); + _pipeline->updateCommandList( + *tensorsData, + isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx); } } @@ -390,13 +393,17 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const if (foundPort.is_input()) { if (get_user_input(foundPort.idx)._ptr == tensor._ptr) { // Got set_tensor with the same object - do nothing + _logger.debug("ZeroInferRequest::set_tensor - got the same tensor, do nothing"); return; } if (is_batched_input(foundPort.idx)) { // resize vector size to 1 if set_tensor is called after set_tensors get_input_tensors_data(foundPort.idx).resize(1); + get_input_tensors_data(foundPort.idx).shrink_to_fit(); get_level_zero_inputs(foundPort.idx).resize(1); + get_level_zero_inputs(foundPort.idx).shrink_to_fit(); get_user_inputs(foundPort.idx).resize(1); + get_user_inputs(foundPort.idx).shrink_to_fit(); } get_user_input(foundPort.idx) = tensor; @@ -485,7 +492,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, if (_pipelineIsCreated) { OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList"); _pipeline->updateCommandList(*get_input_tensor_data(foundPort.idx, i), - _executor->get_input_descriptors().at(foundPort.idx).idx, + _graph->get_input_descriptors().at(foundPort.idx).idx, i); } } @@ -537,14 +544,16 @@ void ZeroInferRequest::infer_async() { _logger.debug("InferRequest::infer_async started"); OV_ITT_TASK_CHAIN(ZERO_INFER, itt::domains::LevelZeroBackend, "infer_async", "start"); - _executor->mutexLock(); - if (!_pipelineIsCreated) { - OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); - create_pipeline(); + { + std::lock_guard lock(_graph->get_mutex()); - _pipelineIsCreated = true; + if (!_pipelineIsCreated) { + OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); + create_pipeline(); + + _pipelineIsCreated = true; + } } - _executor->mutexUnlock(); size_t inputIndex = 0; for (const auto& userTensor : _userInputTensors) { @@ -740,12 +749,9 @@ std::vector ZeroInferRequest::get_profiling_info() const { if (compilerType == ov::intel_npu::CompilerType::MLIR) { // For plugin compiler retreive raw profiling data from backend and delegate // processing to the compiler - const auto& networkDesc = compiledModel.get_network_description(); - const auto& compiler = compiledModel.get_compiler(); - const auto& blob = networkDesc->compiledNetwork; auto profData = get_raw_profiling_data(); _logger.debug("InferRequest::get_profiling_info complete with compiler->process_profiling_output()."); - return compiler->process_profiling_output(profData, blob, compilerConfig); + return _graph->process_profiling_output(profData, compilerConfig); } else { auto proftype = _config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 009eee6541e8ef..34eb71eaf112f7 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -11,25 +11,25 @@ #include "intel_npu/prefix.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_api.hpp" -#include "zero_types.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" namespace intel_npu { Pipeline::Pipeline(const Config& config, - const std::shared_ptr& executorPtr, + const std::shared_ptr& initStructs, + const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, - const size_t numberOfCommandLists) + size_t numberOfCommandLists, + uint32_t group_ordinal) : _config(config), - _executor(static_cast(executorPtr.get())), - _command_queue(*_executor->getCommandQueue()), - _event_pool{_executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getContext(), - numberOfCommandLists ? static_cast(numberOfCommandLists) : 1, - _config}, + _command_queue(graph->get_command_queue()), + _event_pool{initStructs->getDevice(), + initStructs->getContext(), + numberOfCommandLists ? static_cast(numberOfCommandLists) : 1}, _npu_profiling(std::move(npu_profiling)), _logger("Pipeline", _config.get()) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline"); @@ -45,38 +45,37 @@ Pipeline::Pipeline(const Config& config, _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < numberOfCommandLists; i++) { _command_lists.emplace_back( - std::make_unique(_executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getContext(), - _executor->getInitStructs()->getGraphDdiTable(), - _config, - _executor->get_group_ordinal(), - _executor->getInitStructs()->getMutableCommandListVersion() ? true : false)); - _events.emplace_back(std::make_unique(_event_pool.handle(), static_cast(i), _config)); - _fences.emplace_back(std::make_unique(_command_queue, _config)); + std::make_unique(initStructs->getDevice(), + initStructs->getContext(), + initStructs->getGraphDdiTable(), + group_ordinal, + initStructs->getMutableCommandListVersion() ? true : false)); + _events.emplace_back(std::make_unique(_event_pool.handle(), static_cast(i))); + _fences.emplace_back(std::make_unique(*_command_queue)); } for (size_t i = 0; i < numberOfCommandLists; i++) { size_t ioIndex = 0; - for (const auto& desc : _executor->get_input_descriptors()) { + for (const auto& desc : graph->get_input_descriptors()) { if (inputTensorsData.at(ioIndex).size() > 1) { - _executor->setArgumentValue(desc.idx, inputTensorsData.at(ioIndex).at(i)->mem); + graph->set_argument_value(desc.idx, inputTensorsData.at(ioIndex).at(i)->mem); ++ioIndex; continue; } - _executor->setArgumentValue(desc.idx, - static_cast(inputTensorsData.at(ioIndex).at(0)->mem) + - (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists); + graph->set_argument_value(desc.idx, + static_cast(inputTensorsData.at(ioIndex).at(0)->mem) + + (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists); ++ioIndex; } ioIndex = 0; - for (const auto& desc : _executor->get_output_descriptors()) { - _executor->setArgumentValue(desc.idx, - static_cast(outputTensorsData.at(ioIndex)->mem) + - (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists); + for (const auto& desc : graph->get_output_descriptors()) { + graph->set_argument_value(desc.idx, + static_cast(outputTensorsData.at(ioIndex)->mem) + + (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists); ++ioIndex; } @@ -86,7 +85,8 @@ Pipeline::Pipeline(const Config& config, _command_lists.at(i)->appendNpuTimestamp(reinterpret_cast(_npu_profiling->npu_ts_infer_start)); } - _command_lists.at(i)->appendGraphExecute(_executor->graph(), profiling_query.getHandle()); + _command_lists.at(i)->appendGraphExecute(static_cast(graph->get_handle()), + profiling_query.getHandle()); /// append timestamp command if feature was activated if (_npu_profiling != nullptr) { @@ -108,11 +108,11 @@ void Pipeline::push() { _logger.debug("Pipeline - push() started"); for (size_t i = 0; i < _command_lists.size(); ++i) { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); + OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); if (sync_output_with_fences_) { - _command_queue.executeCommandList(*_command_lists.at(i), *_fences.at(i)); + _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i)); } else { - _command_queue.executeCommandList(*_command_lists.at(i)); + _command_queue->executeCommandList(*_command_lists.at(i)); } } @@ -121,7 +121,7 @@ void Pipeline::push() { void Pipeline::pull() { _logger.debug("Pipeline - pull() started"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); + OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); for (size_t i = 0; i < _command_lists.size(); ++i) { if (sync_output_with_fences_) { diff --git a/src/plugins/intel_npu/src/common/CMakeLists.txt b/src/plugins/intel_npu/src/common/CMakeLists.txt index 2d1f5d9cbb39ea..1aa93cce1bc291 100644 --- a/src/plugins/intel_npu/src/common/CMakeLists.txt +++ b/src/plugins/intel_npu/src/common/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::npu_al openvino::npu_logger_utils - openvino::runtime::dev + openvino::npu_zero_utils ) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp index eb6a3de57e41fc..19023a1fca883f 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp @@ -7,8 +7,8 @@ #include #include +#include "intel_npu/common/igraph.hpp" #include "intel_npu/config/common.hpp" -#include "intel_npu/icompiler.hpp" #include "openvino/runtime/icompiled_model.hpp" namespace intel_npu { @@ -17,17 +17,10 @@ class ICompiledModel : public ov::ICompiledModel { public: using ov::ICompiledModel::ICompiledModel; - virtual const std::shared_ptr& get_network_description() const = 0; + virtual const std::shared_ptr& get_graph() const = 0; virtual const Config& get_config() const = 0; - // Compiler is used for post-processing profiling data when using PERF_COUNT property - virtual const ov::SoPtr& get_compiler() const = 0; - - const NetworkMetadata& get_network_metadata() const { - return get_network_description()->metadata; - } - protected: std::shared_ptr shared_from_this() const { return std::dynamic_pointer_cast(ov::ICompiledModel::shared_from_this()); diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp new file mode 100644 index 00000000000000..51c4a4cf26eafd --- /dev/null +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -0,0 +1,103 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "intel_npu/network_metadata.hpp" +#include "intel_npu/utils/zero/zero_utils.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" +#include "openvino/runtime/profiling_info.hpp" + +namespace intel_npu { + +class IGraph : public std::enable_shared_from_this { +public: + IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional> blob) + : _handle(handle), + _metadata(std::move(metadata)) { + if (blob.has_value()) { + _blob = std::move(*blob); + } + } + + virtual void export_blob(std::ostream& stream) const = 0; + + virtual std::vector process_profiling_output(const std::vector& profData, + const Config& config) const = 0; + + virtual void set_argument_value(uint32_t argi, const void* argv) const = 0; + + virtual void initialize(const Config& config) = 0; + + virtual ~IGraph() = default; + + const NetworkMetadata& get_metadata() const { + return _metadata; + } + + ze_graph_handle_t get_handle() const { + return _handle; + } + + void update_network_name(std::string_view name) { + _metadata.name = name; + } + + inline const std::vector& get_input_descriptors() const { + return _input_descriptors; + } + + inline const std::vector& get_output_descriptors() const { + return _output_descriptors; + } + + inline const std::shared_ptr& get_command_queue() const { + return _command_queue; + } + + void set_workload_type(const ov::WorkloadType workloadType) const { + if (_command_queue == nullptr) { + return; + } + + ze_command_queue_workload_type_t zeWorkloadType; + switch (workloadType) { + case ov::WorkloadType::DEFAULT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; + break; + case ov::WorkloadType::EFFICIENT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; + break; + default: + OPENVINO_THROW("Unknown value for WorkloadType!"); + } + + _command_queue->setWorkloadType(zeWorkloadType); + } + + std::mutex& get_mutex() { + return _mutex; + } + +protected: + ze_graph_handle_t _handle = nullptr; + NetworkMetadata _metadata; + + std::vector _input_descriptors; + std::vector _output_descriptors; + + std::shared_ptr _command_queue; + + // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the + // first inference starts running + std::mutex _mutex; + + std::vector _blob; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 8c1eb57fe34fc3..b34f2deee6c61e 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -7,9 +7,9 @@ #include #include "intel_npu/common/icompiled_model.hpp" +#include "intel_npu/common/igraph.hpp" #include "intel_npu/common/sync_infer_request.hpp" #include "intel_npu/config/config.hpp" -#include "intel_npu/icompiler.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" @@ -54,11 +54,14 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ -class IExecutor { +class ICompilerAdapter { public: - virtual ~IExecutor() = default; + virtual std::shared_ptr compile(const std::shared_ptr& model, + const Config& config) const = 0; + virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; - virtual void setWorkloadType(const ov::WorkloadType workloadType) const = 0; + virtual ~ICompilerAdapter() = default; }; //------------------------------------------------------------------------------ @@ -67,10 +70,6 @@ class IDevice : public std::enable_shared_from_this { public: using Uuid = ov::device::UUID; - virtual std::shared_ptr createExecutor( - const std::shared_ptr& networkDescription, - const Config& config) = 0; - virtual std::string getName() const = 0; virtual std::string getFullDeviceName() const = 0; virtual Uuid getUuid() const; @@ -85,7 +84,6 @@ class IDevice : public std::enable_shared_from_this { virtual std::shared_ptr createInferRequest( const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) = 0; virtual void updateInfo(const Config& config) = 0; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index 99f9ce7cb0eb28..788ce87136a04d 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -5,8 +5,9 @@ #pragma once #include "intel_npu/common/icompiled_model.hpp" +#include "intel_npu/common/igraph.hpp" #include "intel_npu/common/variable_state.hpp" -#include "intel_npu/icompiler.hpp" +#include "intel_npu/network_metadata.hpp" #include "openvino/runtime/iinfer_request.hpp" #include "openvino/runtime/iplugin.hpp" diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index 0ae0832fe29d72..0eeefccf43906d 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -21,7 +21,7 @@ namespace intel_npu { SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel, const Config& config) : _compiledModel(compiledModel), - _metadata(compiledModel->get_network_metadata()), + _metadata(compiledModel->get_graph()->get_metadata()), _logger("SyncInferRequest", config.get()), _userInputTensors(_metadata.inputs.size(), std::vector>(1, {nullptr})), _userOutputTensors(_metadata.outputs.size(), {nullptr}) { diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp deleted file mode 100644 index addd9ca5308c65..00000000000000 --- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include "intel_npu/common/npu.hpp" -#include "intel_npu/icompiler.hpp" -#include "intel_npu/utils/logger/logger.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -/** - * @brief Adapter for Compiler in driver - * @details Wrap compiler in driver calls and do preliminary actions (like opset conversion) - */ -class LevelZeroCompilerAdapter final : public ICompiler { -public: - LevelZeroCompilerAdapter(std::shared_ptr iEngineBackend); - - uint32_t getSupportedOpsetVersion() const override final; - - NetworkDescription compile(const std::shared_ptr& model, - const Config& config) const override final; - - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override final; - - NetworkMetadata parse(const std::vector& network, const Config& config) const override final; - - std::vector process_profiling_output(const std::vector& profData, - const std::vector& network, - const Config& config) const override final; - - void release(std::shared_ptr networkDescription) override; - - CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; - -private: - /** - * @brief Separate externals calls to separate class - */ - std::shared_ptr apiAdapter; - Logger _logger; -}; - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp deleted file mode 100644 index 658f138a72c102..00000000000000 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include -#include - -#include "intel_npu/icompiler.hpp" -#include "intel_npu/utils/logger/logger.hpp" -#include "intel_npu/utils/zero/zero_api.hpp" -#include "zero_executor.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -using SerializedIR = std::pair>; - -#define NotSupportQuery(T) (std::is_same::value) - -// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, -// pfnQueryNetworkGetSupportedLayers) -#define SupportAPIGraphQueryNetworkV1(T) \ - (std::is_same::value || std::is_same::value) - -// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) -#define SupportAPIGraphQueryNetworkV2(T) ((!NotSupportQuery(T) && !SupportAPIGraphQueryNetworkV1(T))) - -// For ext version >= 1.5, pfnCreate2 api is avaible -#define NotSupportGraph2(T) \ - (std::is_same::value || std::is_same::value || \ - std::is_same::value) - -// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to -// "ze_graph_dditable_ext_1_6_t". -// See: E#117498 -#define NotSupportArgumentMetadata(T) \ - (std::is_same::value || std::is_same::value || \ - std::is_same::value || std::is_same::value) - -#define UseCopyForNativeBinary(T) \ - (std::is_same::value || std::is_same::value || \ - std::is_same::value || std::is_same::value || \ - std::is_same::value) - -/** - * Adapter to use CiD through ZeroAPI - */ -template -class LevelZeroCompilerInDriver final : public ICompiler { -public: - LevelZeroCompilerInDriver(ze_driver_handle_t driverHandle, - ze_device_handle_t deviceHandle, - ze_context_handle_t zeContext, - ze_graph_dditable_ext_curr_t& graph_ddi_table_ext); - LevelZeroCompilerInDriver(const LevelZeroCompilerInDriver&) = delete; - LevelZeroCompilerInDriver& operator=(const LevelZeroCompilerInDriver&) = delete; - ~LevelZeroCompilerInDriver() override; - - uint32_t getSupportedOpsetVersion() const override final; - - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; - - NetworkDescription compile(const std::shared_ptr& model, - const Config& config) const override final; - - ze_result_t seriazlideIRModelAndCreateGraph(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - ze_graph_handle_t& graphHandle) const; - - NetworkMetadata parse(const std::vector& network, const Config& config) const override final; - - std::vector process_profiling_output(const std::vector& profData, - const std::vector& network, - const Config& config) const override final { - OPENVINO_THROW("Profiling post-processing is not implemented."); - } - - template = true> - std::unordered_set getQueryResultFromSupportedLayers( - ze_result_t result, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - /** - * @brief Serialize input / output information to string format. - * @details Format: - * --inputs_precisions="0: [1:]" - * --inputs_layouts="0: [1:]" - * --outputs_precisions="0:" - * --outputs_layouts="0:" - * - * For older compiler versions, the name of the inputs/outputs may be used instead of their indices. - * - * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV - * API, the layout fields shall be filled with default values in order to assure the backward compatibility - * with the driver. - */ - static std::string serializeIOInfo(const std::shared_ptr& model, const bool useIndices); - - void release(std::shared_ptr networkDescription) override; - - CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; - -private: - NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; - - SerializedIR serializeIR(const std::shared_ptr& model, - ze_graph_compiler_version_info_t compilerVersion) const; - std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const; - - template = true> - void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const; - - template = true> - void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const; - - template = true> - void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::vector& /* unusedBlob */, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - const ze_device_handle_t& _deviceHandle, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - template = true> - ze_result_t seriazlideIRModelAndQueryNetworkCreateV1(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - const ze_device_handle_t& _deviceHandle, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, - // pfnQueryNetworkGetSupportedLayers) - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - // For ext version < 1.3 - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - template = true> - ze_result_t createGraph(const ze_graph_format_t& format, - const SerializedIR& serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - - template = true> - ze_result_t createGraph(const ze_graph_format_t& format, - const SerializedIR& serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - -private: - ze_driver_handle_t _driverHandle = nullptr; - ze_device_handle_t _deviceHandle = nullptr; - ze_context_handle_t _context = nullptr; - - ze_graph_dditable_ext_curr_t& _graphDdiTableExt; - Logger _logger; -}; - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp deleted file mode 100644 index 84bca75106483d..00000000000000 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "driver_compiler_adapter.hpp" - -#include "graph_transformations.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/utils/zero/zero_api.hpp" -#include "intel_npu/utils/zero/zero_result.hpp" -#include "ze_intel_npu_uuid.h" -#include "zero_backend.hpp" -#include "zero_compiler_in_driver.hpp" -#include "zero_init.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr iEngineBackend) - : _logger("LevelZeroCompilerAdapter", Logger::global().level()) { - _logger.debug("initialize LevelZeroCompilerAdapter start"); - - std::shared_ptr zeroBackend = nullptr; - zeroBackend = std::dynamic_pointer_cast(iEngineBackend); - if (!zeroBackend) { - OPENVINO_THROW("LevelZeroCompilerAdapter init failed to cast zeroBackend, zeroBackend is a nullptr"); - } - - ze_context_handle_t zeContext = static_cast(zeroBackend->getContext()); - ze_driver_handle_t driverHandle = static_cast(zeroBackend->getDriverHandle()); - ze_device_handle_t deviceHandle = static_cast(zeroBackend->getDeviceHandle()); - ze_graph_dditable_ext_curr_t& graph_ddi_table_ext = zeroBackend->getGraphDdiTable(); - - uint32_t graphExtVersion = graph_ddi_table_ext.version(); - - if (driverHandle == nullptr) { - OPENVINO_THROW("LevelZeroCompilerAdapter failed to get properties about zeDriver"); - } - - _logger.info("LevelZeroCompilerAdapter creating adapter using graphExtVersion"); - - switch (graphExtVersion) { - case ZE_GRAPH_EXT_VERSION_1_3: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_4: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_5: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_6: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_7: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_8: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - default: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - } - - _logger.info("initialize LevelZeroCompilerAdapter complete, using graphExtVersion: %d.%d", - ZE_MAJOR_VERSION(graphExtVersion), - ZE_MINOR_VERSION(graphExtVersion)); -} - -uint32_t LevelZeroCompilerAdapter::getSupportedOpsetVersion() const { - return apiAdapter->getSupportedOpsetVersion(); -} - -NetworkDescription LevelZeroCompilerAdapter::compile(const std::shared_ptr& model, - const Config& config) const { - _logger.debug("compile start"); - return apiAdapter->compile(model, config); -} - -ov::SupportedOpsMap LevelZeroCompilerAdapter::query(const std::shared_ptr& model, - const Config& config) const { - _logger.debug("query start"); - return apiAdapter->query(model, config); -} - -NetworkMetadata LevelZeroCompilerAdapter::parse(const std::vector& network, const Config& config) const { - _logger.debug("parse start"); - return apiAdapter->parse(network, config); -} - -std::vector LevelZeroCompilerAdapter::process_profiling_output(const std::vector&, - const std::vector&, - const Config&) const { - OPENVINO_THROW("Profiling post-processing is not implemented."); -} - -void LevelZeroCompilerAdapter::release(std::shared_ptr networkDescription) { - _logger.info("release - using adapter to release networkDescription"); - apiAdapter->release(std::move(networkDescription)); -} - -CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) { - _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)"); - return apiAdapter->getCompiledNetwork(networkDescription); -} - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp deleted file mode 100644 index 47bc7611d132c3..00000000000000 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ /dev/null @@ -1,1081 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "zero_compiler_in_driver.hpp" - -#include -#include - -#include "graph_transformations.hpp" -#include "intel_npu/common/itt.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/config/compiler.hpp" -#include "intel_npu/config/runtime.hpp" -#include "intel_npu/prefix.hpp" -#include "intel_npu/utils/zero/zero_result.hpp" -#include "openvino/core/model.hpp" - -namespace { - -constexpr std::string_view INPUTS_PRECISIONS_KEY = "--inputs_precisions"; -constexpr std::string_view INPUTS_LAYOUTS_KEY = "--inputs_layouts"; -constexpr std::string_view OUTPUTS_PRECISIONS_KEY = "--outputs_precisions"; -constexpr std::string_view OUTPUTS_LAYOUTS_KEY = "--outputs_layouts"; - -//