diff --git a/.github/actions/cache/package-lock.json b/.github/actions/cache/package-lock.json index 3c3887ba4b29c3..d452b53dccbc22 100644 --- a/.github/actions/cache/package-lock.json +++ b/.github/actions/cache/package-lock.json @@ -4283,9 +4283,9 @@ } }, "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", "dev": true, "license": "MIT", "dependencies": { diff --git a/.github/actions/install_ov_wheels/action.yml b/.github/actions/install_ov_wheels/action.yml deleted file mode 100644 index 82c03aeb4e4f2c..00000000000000 --- a/.github/actions/install_ov_wheels/action.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: 'Find and install OpenVINO Python wheels' -description: 'Finds the OpenVINO Python wheels suitable for the "python3" executable and installs them' -inputs: - wheels-dir-path: - description: 'Path to the directory in which wheels are located' - required: true - wheels-to-install: - description: 'List of wheel names to install in the form of "openvino openvino_tokenizers"' -runs: - using: 'composite' - steps: - - name: Install OpenVINO Python wheels (Windows) - shell: pwsh - if: runner.os == 'Windows' - run: | - # Get the Python version - $pyVersion = python3 -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')" - - foreach ($wheel in $("${{ inputs.wheels-to-install }}" -split ' ')) { - # Search for the python-specific wheel version and install it if exists - $wheelPath = Get-ChildItem -Path ${{ inputs.wheels-dir-path }} -Filter "$wheel-*cp$pyVersion*.whl" | Select-Object -First 1 - if ($wheelPath) { - python3 -m pip install $wheelPath.FullName - } else { - # If the python-specific version does not exist, install by name only - $wheelPathByName = Get-ChildItem -Path ${{ inputs.wheels-dir-path }} -Filter "$wheel-*.whl" | Select-Object -First 1 - python3 -m pip install $wheelPathByName.FullName - } - } - - - name: Install OpenVINO Python wheels (Linux and macOS) - shell: bash - if: runner.os != 'Windows' - run: | - py_version=$(python3 -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')") - for wheel in ${{ inputs.wheels-to-install }}; do - echo "Installing the ${wheel} wheel" - - # Search for the python-specific wheel version and install it if exists - wheel_path=$(find ${{ inputs.wheels-dir-path }} -name "$wheel-*cp$py_version*.whl") - echo "Wheel path: ${wheel_path}" - if [ -n "${wheel_path}" ]; then - python3 -m pip install $wheel_path - else - # If the python-specific version does not exist, install by name only - python3 -m pip install ${{ inputs.wheels-dir-path }}/$wheel-*.whl - fi - done diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index a17986f35d3723..dd1078bb0d4353 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -177,7 +177,7 @@ runs: else ov_package_url=$(curl -s ${{ inputs.nightly_package_source }} | jq -r '.${{ inputs.platform }}_${{ inputs.arch }}') fi - cd ${{ inputs.install_dir || github.workspace }} + cd ${{ inputs.install_dir || env.GITHUB_WORKSPACE }} package_basename=$(basename $ov_package_url) wget $ov_package_url --progress=bar:force:noscroll -O $package_basename package_folder=${package_basename%.*} @@ -196,7 +196,7 @@ runs: uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: ${{ steps.openvino_s3_download.outputs.ov_artifact_name }} - path: ${{ github.workspace }}/${{ steps.openvino_s3_download.outputs.ov_package_path }} + path: ${{ steps.openvino_s3_download.outputs.ov_package_path }} if-no-files-found: 'error' - name: Get wheel diff --git a/.github/actions/openvino_provider/get_s3_package.py b/.github/actions/openvino_provider/get_s3_package.py index 02ea99cb2f3403..df253a422421ec 100644 --- a/.github/actions/openvino_provider/get_s3_package.py +++ b/.github/actions/openvino_provider/get_s3_package.py @@ -54,10 +54,6 @@ def main(product, version_pattern, platform, arch, folder): matching_files = filter_files_by_criteria(all_files, product, version_pattern, platform, arch, folder) if matching_files: logger.info(f"Matching packages: {sorted(matching_files)}") - if len(matching_files) > 1: - custom_release_build_pattern = fr".*/{version_pattern}/(linux_|windows_|macos_).*/.*" - # Exclude custom release builds, if any, from matches - matching_files = [file for file in matching_files if not re.search(custom_release_build_pattern, file)] package_url = f"https://storage.openvinotoolkit.org{sorted(matching_files)[-1]}" logger.info(f"Returning package URL: {package_url}") action_utils.set_github_output("package_url", package_url) diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml index d1290508ab778f..96968f55636df9 100644 --- a/.github/actions/setup_python/action.yml +++ b/.github/actions/setup_python/action.yml @@ -22,6 +22,7 @@ inputs: runs: using: 'composite' steps: + - name: Check if Python is already installed (Linux) if: ${{ runner.os == 'Linux' }} shell: bash @@ -53,11 +54,13 @@ runs: with: python-version: ${{ inputs.version }} - - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64' && steps.check_python.outputs.installed == 'false') }} + - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64' && steps.check_python.outputs.installed == 'false' ) }} name: Setup Python ${{ inputs.version }} uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ inputs.version }} + env: + PIP_CACHE_DIR: ${{ inputs.self-hosted-runner == 'true' && inputs.pip-cache-path || '' }} - if: ${{ inputs.should-setup-pip-paths == 'true' && runner.os != 'Windows' }} name: Setup pip variables (cache and install path) diff --git a/.github/components.yml b/.github/components.yml index f0764d50d9f812..8de51a2ced3343 100644 --- a/.github/components.yml +++ b/.github/components.yml @@ -110,6 +110,7 @@ IR_FE: ONNX_FE: revalidate: + - MO - OVC - ONNX_RT build: @@ -118,6 +119,7 @@ ONNX_FE: PDPD_FE: revalidate: + - MO - OVC build: - CPU @@ -125,6 +127,7 @@ PDPD_FE: TF_FE: revalidate: + - MO - OVC build: - CPU @@ -133,6 +136,7 @@ TF_FE: TFL_FE: revalidate: + - MO - OVC build: - CPU @@ -140,14 +144,15 @@ TFL_FE: PyTorch_FE: revalidate: + - MO - OVC build: - CPU - Python_API - - TOKENIZERS # PyTorch_FE tests depend on tokenizers build JAX_FE: revalidate: + - MO - OVC build: - CPU @@ -165,10 +170,10 @@ C_API: Python_API: revalidate: - samples + - MO - OVC - tools - TF_FE - - docs_snippets build: - CPU - HETERO @@ -221,6 +226,14 @@ OVC: - Python_API - TOKENIZERS # TF_FE tests depends on tokenizers build +MO: + revalidate: + - PyTorch_FE + - TF_FE + build: + - Python_API + - TOKENIZERS # TF_FE tests depends on tokenizers build + tools: build: - CPU @@ -229,12 +242,6 @@ tools: docs: revalidate: [] build: [] - -docs_snippets: - revalidate: - - docs_snippets - build: - - Python_API licensing: revalidate: [] diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5eac7709e32703..359ff683c9b22a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -141,18 +141,3 @@ updates: - "mryzhov" - "ilya-lavrenov" open-pull-requests-limit: 3 - - # Docker images - - package-ecosystem: docker - directory: "/" - schedule: - interval: "daily" - time: "09:00" - timezone: "Asia/Dubai" - allow: - - dependency-type: "direct" - assignees: - - "akashchi" - - "mryzhov" - - "ilya-lavrenov" - open-pull-requests-limit: 3 \ No newline at end of file diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 37b0ae41c049c4..3783a7e8d5600a 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-28142 +pr-27430 diff --git a/.github/dockerfiles/ov_build/fedora_29/Dockerfile b/.github/dockerfiles/ov_build/fedora_29/Dockerfile index 0b9911ac707b13..e5f400e2915e9c 100644 --- a/.github/dockerfiles/ov_build/fedora_29/Dockerfile +++ b/.github/dockerfiles/ov_build/fedora_29/Dockerfile @@ -3,11 +3,7 @@ FROM ${REGISTRY}/library/fedora:29 USER root -# dnf configuration -RUN echo "timeout=60" >> /etc/dnf/dnf.conf && \ - echo "retries=10" >> /etc/dnf/dnf.conf - -RUN dnf update -y && dnf install -y \ +RUN yum update -y && yum install -y \ git \ curl \ python3 \ diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile index 8b955def2aec00..5911016b37d008 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile @@ -62,13 +62,10 @@ RUN echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main restricte RUN dpkg --add-architecture riscv64 && \ apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/riscv64-sources.list && \ - apt-get install -y --no-install-recommends libpython3-dev:riscv64 && \ - apt-get install libgomp1:riscv64 && \ - apt-get install libatomic1:riscv64 + apt-get install -y --no-install-recommends libpython3-dev:riscv64 # Setup pip ENV PIP_VERSION="24.0" RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ rm -f get-pip.py - diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv_xuantie/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv_xuantie/Dockerfile deleted file mode 100644 index 6887f83b0256c1..00000000000000 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv_xuantie/Dockerfile +++ /dev/null @@ -1,102 +0,0 @@ -ARG REGISTRY="docker.io" -FROM ${REGISTRY}/library/ubuntu:22.04 - -USER root - -# APT configuration -RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ - echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf - -ENV DEBIAN_FRONTEND="noninteractive" \ - TZ="Europe/London" - -RUN apt-get update && \ - apt-get install software-properties-common && \ - add-apt-repository --yes --no-update ppa:git-core/ppa && \ - add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ - apt-get update && \ - # install compilers to build OpenVINO for RISC-V 64 - apt-get install gcc-riscv64-linux-gnu g++-riscv64-linux-gnu && \ - apt-get install \ - curl \ - git \ - cmake \ - ccache \ - ninja-build \ - fdupes \ - patchelf \ - ca-certificates \ - gpg-agent \ - tzdata \ - # parallel gzip - pigz \ - # Python \ - python3-dev \ - python3-pip \ - python3-venv \ - python3-distutils \ - # Compilers - gcc \ - g++ \ - # xuantie-gnu-toolchain build dependencies - autoconf \ - automake \ - autotools-dev \ - libmpc-dev \ - libmpfr-dev\ - libgmp-dev \ - gawk \ - build-essential \ - bison \ - flex \ - texinfo \ - gperf \ - libtool \ - patchutils \ - bc \ - zlib1g-dev \ - libexpat-dev \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install RISC-V native debian packages -RUN echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main restricted > riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted >> riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy universe >> riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates universe >> riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy multiverse >> riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates multiverse >> riscv64-sources.list && \ - echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse >> riscv64-sources.list && \ - echo deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security main restricted >> riscv64-sources.list && \ - echo deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security universe >> riscv64-sources.list && \ - echo deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security multiverse >> riscv64-sources.list && \ - echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy main >> riscv64-sources.list && \ - echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy universe >> riscv64-sources.list && \ - echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main >> riscv64-sources.list && \ - echo deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security main >> riscv64-sources.list && \ - mv riscv64-sources.list /etc/apt/sources.list.d/ - -RUN dpkg --add-architecture riscv64 && \ - apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/riscv64-sources.list && \ - apt-get install -y --no-install-recommends libpython3-dev:riscv64 - -# build xuintie toolchain -ARG XUANTIE_VERSION="V2.8.1" -ARG XUANTIE_REPO="https://github.com/XUANTIE-RV/xuantie-gnu-toolchain" -ARG XUINTIE_PATH="/opt/riscv" -ARG XUINTIE_TMP_PATH="/tmp/xuantie" -ARG XUINTIE_SRC="/tmp/xuantie/src" - -RUN mkdir -p ${XUINTIE_TMP_PATH} && cd ${XUINTIE_TMP_PATH} && \ - git clone --branch ${XUANTIE_VERSION} --depth 1 ${XUANTIE_REPO} ${XUINTIE_SRC} && cd ${XUINTIE_SRC} && \ - ./configure --prefix=${XUINTIE_PATH} --disable-gdb && \ - make linux -j$(nproc) && make install && \ - rm -rf ${XUINTIE_TMP_PATH} - -# Setup pip -ENV PIP_VERSION="24.0" -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ - python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ - rm -f get-pip.py diff --git a/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile b/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile deleted file mode 100644 index e7dbadf5a414ba..00000000000000 --- a/.github/dockerfiles/ov_test/debian_10_py310/Dockerfile +++ /dev/null @@ -1,76 +0,0 @@ -ARG REGISTRY="docker.io" -FROM ${REGISTRY}/library/debian:10.13 - -USER root - -# APT configuration -RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ - echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf - -ENV DEBIAN_FRONTEND="noninteractive" \ - TZ="Europe/London" - -RUN apt-get update && \ - apt-get install \ - git \ - libc6-dev \ - # parallel gzip - pigz \ - # Python - python3 \ - python3-pip \ - python3-dev \ - python3-venv \ - python3-distutils \ - # To build Python 3.10 from source - build-essential \ - libffi-dev \ - libgdbm-dev \ - libc6-dev \ - libssl-dev \ - zlib1g-dev \ - libbz2-dev \ - libreadline-dev \ - libsqlite3-dev \ - libncurses5-dev \ - libncursesw5-dev \ - xz-utils \ - tk-dev \ - libxml2-dev \ - libxmlsec1-dev \ - liblzma-dev \ - wget \ - curl \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install openvino dependencies -ADD scripts/install_dependencies/install_openvino_dependencies.sh /install_openvino_dependencies.sh -RUN chmod +x /install_openvino_dependencies.sh && \ - /install_openvino_dependencies.sh && \ - rm -rf /var/lib/apt/lists/* - -# Setup Python 3.10 -RUN wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tar.xz - -RUN tar -xf Python-3.10.9.tar.xz && \ - cd Python-3.10.9 && \ - ./configure --enable-optimizations && \ - make -j 8 && \ - make altinstall - -# Setup pip -ENV PIP_VERSION="24.0" -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ - python3.10 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ - rm -f get-pip.py - -# Use Python 3.10 as default instead of Python 3.7 -# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build -RUN python3.10 -m venv venv -ENV PATH="/venv/bin:$PATH" - -ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} -ENV PIP_INSTALL_PATH=/venv/lib/python3.10/site-packages diff --git a/.github/dockerfiles/ov_test/fedora_33/Dockerfile b/.github/dockerfiles/ov_test/fedora_33/Dockerfile index 4c5b2037e60578..c059c82c7d3cf2 100644 --- a/.github/dockerfiles/ov_test/fedora_33/Dockerfile +++ b/.github/dockerfiles/ov_test/fedora_33/Dockerfile @@ -3,20 +3,10 @@ FROM ${REGISTRY}/library/fedora:33 USER root -# dnf configuration -RUN echo "timeout=60" >> /etc/dnf/dnf.conf && \ - echo "retries=10" >> /etc/dnf/dnf.conf - -RUN dnf update -y && dnf install -y \ +RUN yum update -y && yum install -y \ git \ curl \ - python3 \ - findutils \ - ocl-icd \ - ocl-icd-devel \ - # parallel gzip - pigz \ - xz + python3 # Install Node ENV NODE_VERSION=21.7.3 diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile deleted file mode 100644 index b6b99f81305dee..00000000000000 --- a/.github/dockerfiles/ov_test/ubuntu_20_04_x64_py313/Dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -ARG REGISTRY="docker.io" -FROM ${REGISTRY}/library/ubuntu:20.04 - -USER root - -# APT configuration -RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ - echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf - -ENV DEBIAN_FRONTEND="noninteractive" \ - TZ="Europe/London" - -RUN apt-get update && \ - apt-get install software-properties-common && \ - add-apt-repository --yes --no-update ppa:git-core/ppa && \ - add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install \ - curl \ - git \ - gpg-agent \ - tzdata \ - # parallel gzip - pigz \ - # Python - python3.13-dev \ - python3.13-venv \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install openvino dependencies -ADD scripts/install_dependencies/install_openvino_dependencies.sh /install_openvino_dependencies.sh -RUN chmod +x /install_openvino_dependencies.sh && \ - /install_openvino_dependencies.sh && \ - rm -rf /var/lib/apt/lists/* - -# Setup pip -ENV PIP_VERSION="24.0" -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ - python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ - python3.13 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ - rm -f get-pip.py - -# Use Python 3.13 as default instead of Python 3.8 -# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build -RUN python3.13 -m venv venv -ENV PATH="/venv/bin:$PATH" - -ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} -ENV PIP_INSTALL_PATH=/venv/lib/python3.13/site-packages diff --git a/.github/dockerfiles/ov_test/ubuntu_22_04_riscv_xuantie/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_22_04_riscv_xuantie/Dockerfile deleted file mode 100644 index a9304f4affec09..00000000000000 --- a/.github/dockerfiles/ov_test/ubuntu_22_04_riscv_xuantie/Dockerfile +++ /dev/null @@ -1,56 +0,0 @@ -ARG REGISTRY="docker.io" -FROM ${REGISTRY}/library/ubuntu:22.04 - -USER root - -# APT configuration -RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ - echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ - echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf - -ENV DEBIAN_FRONTEND="noninteractive" \ - TZ="Europe/London" - -RUN apt-get update && \ - apt-get install software-properties-common && \ - add-apt-repository --yes --no-update ppa:git-core/ppa && \ - add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install \ - curl \ - git \ - ninja-build \ - ca-certificates \ - gpg-agent \ - tzdata \ - # parallel gzip - pigz \ - # Compilers - gcc \ - g++ \ - # qemu build dependencies - gcc-riscv64-linux-gnu \ - g++-riscv64-linux-gnu \ - libc6-riscv64-cross \ - build-essential \ - pkg-config \ - libglib2.0-dev \ - libpixman-1-dev \ - zlib1g-dev \ - && \ - rm -rf /var/lib/apt/lists/* - -# build xuintie qemu emulator only -ARG XUANTIE_VERSION="V2.8.1" -ARG XUANTIE_REPO="https://github.com/XUANTIE-RV/xuantie-gnu-toolchain" -ARG XUINTIE_PATH="/opt/riscv" -ARG XUINTIE_TMP_PATH="/tmp/xuantie" -ARG XUINTIE_SRC="/tmp/xuantie/src" - -RUN mkdir -p ${XUINTIE_TMP_PATH} && cd ${XUINTIE_TMP_PATH} && \ - git clone --branch ${XUANTIE_VERSION} --depth 1 ${XUANTIE_REPO} ${XUINTIE_SRC} && \ - cd ${XUINTIE_SRC} && git submodule update --init -- qemu && \ - cd ${XUINTIE_SRC}/qemu && ./configure --prefix=${XUINTIE_PATH} --interp-prefix=/usr/riscv64-linux-gnu --target-list=riscv64-linux-user && \ - make -j$(nproc) && make install && \ - rm -rf ${XUINTIE_TMP_PATH} diff --git a/.github/github_org_control/config.json b/.github/github_org_control/config.json index 52a29f2790481b..7fc23b7888c170 100644 --- a/.github/github_org_control/config.json +++ b/.github/github_org_control/config.json @@ -36,6 +36,7 @@ "openvino-tf-frontend-maintainers": "category: TF FE", "openvino-onnx-frontend-maintainers": "category: ONNX FE", "openvino-ie-tests-maintainers": "category: IE Tests", + "openvino-mo-maintainers": "category: MO", "openvino-ovc-maintainers": "category: OVC", "openvino-ngraph-maintainers": "category: Core", "openvino-scripts-maintainers": "category: build", diff --git a/.github/labeler.yml b/.github/labeler.yml index cb05d3dea36960..e9b2acb26c9072 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -57,12 +57,10 @@ 'category: docs': - '**/*.md' - any: ['docs/**/*', - '!docs/snippets/**/*', - '!docs/articles_en/assets/snippets/**/*'] + '!docs/snippets/**/*'] 'category: docs_snippets': - 'docs/snippets/**/*' -- 'docs/articles_en/assets/snippets/**/*' 'category: extensions': - 'src/core/include/openvino/core/extension.hpp' diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json index d8fe6ac2df03d2..b1074fa43e7200 100644 --- a/.github/scripts/workflow_rerun/errors_to_look_for.json +++ b/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -8,7 +8,7 @@ "ticket": 135715 }, { - "error_text": "error: RPC failed", + "error_text": "GnuTLS recv error", "ticket": 131918 }, { @@ -82,29 +82,5 @@ { "error_text": "Upload progress stalled", "ticket": 152933 - }, - { - "error_text": "because the GET request got Content-Type", - "ticket": 158400 - }, - { - "error_text": "Unable to make request:", - "ticket": 158401 - }, - { - "error_text": "Failed to make request", - "ticket": 158401 - }, - { - "error_text": "Failure when receiving data from the peer", - "ticket": 159323 - }, - { - "error_text": "HTTP response code said error", - "ticket": 159398 - }, - { - "error_text": "download failed after attempts", - "ticket": 159547 } ] \ No newline at end of file diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index b760d9746d7842..e0954871f4b51e 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -25,7 +25,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -55,7 +54,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -101,7 +99,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' @@ -120,7 +117,6 @@ jobs: - name: Clone vcpkg uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'microsoft/vcpkg' ref: ${{ env.VCPKG_VERSION }} diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index efd14541010730..b0b46c662abdbb 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -28,7 +28,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +57,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -100,14 +98,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO GenAI uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino.genai' path: ${{ env.OPENVINO_GENAI_REPO }} diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index c0dac9816598e1..8c78375e61769c 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -19,7 +19,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' lfs: 'true' diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml index 91d6a2a497a8cd..f7f66be299876c 100644 --- a/.github/workflows/check_pr_commits.yml +++ b/.github/workflows/check_pr_commits.yml @@ -10,7 +10,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - name: Install dependencies run: python3 -m pip install -r ./.github/github_org_control/requirements.txt diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index c3aac30ccd4379..3fc69b21374093 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -4,7 +4,7 @@ on: schedule: # at 00:00 on the 1st day of every month - cron: '0 0 1 * *' - + permissions: read-all jobs: @@ -49,7 +49,6 @@ jobs: steps: - name: Checkout cach action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/cache @@ -61,8 +60,8 @@ jobs: cache-path: ${{ env.CCACHE_PATH }} recursive: true key: '.' - - + + Cleanup_ccache_win: name: Cleanup Windows ccache runs-on: 'aks-win-4-cores-8gb' @@ -72,7 +71,6 @@ jobs: steps: - name: Checkout cach action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/cache diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 5916f91447abc9..9337fdff4b2905 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -29,7 +29,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 3969da2b97c5a1..a70d2641cb57f3 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -15,7 +15,6 @@ jobs: pull-requests: write steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' @@ -39,36 +38,6 @@ jobs: level: warning fail_on_error: true - clang-format-aarch64: - runs-on: ubuntu-22.04 - if: ${{ github.repository_owner == 'openvinotoolkit' }} - permissions: - pull-requests: write - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - submodules: 'true' - - - name: Install clang-format-15 and cross-compilation dependencies - run: | - sudo apt update - sudo apt --assume-yes install binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu scons clang-format-15 - - # Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector - - name: CMake configure - run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64 - - - name: Create code style diff - run: cmake --build build_arm64 --target clang_format_fix_all -j8 - - - name: suggester / clang-format - if: startsWith(github.event_name, 'pull_request') - uses: reviewdog/action-suggester@db4abb16fbaabe386831e5addb7be1485d0d63d3 # v1.18.0 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - level: warning - fail_on_error: true - ShellCheck: runs-on: ubuntu-22.04 if: ${{ github.repository_owner == 'openvinotoolkit' }} @@ -76,7 +45,6 @@ jobs: pull-requests: write steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' @@ -109,7 +77,6 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fd6a029abfaa67..cde1b9cf67e2fc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -33,7 +33,6 @@ jobs: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: submodules: 'true' diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 52ac10c9a6882a..5a08ec084dadac 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -35,7 +35,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -64,7 +63,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -100,7 +98,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -108,7 +105,6 @@ jobs: - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index 20b1daa0c5dc8d..73426222253adb 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -25,7 +25,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -50,7 +49,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-4-cores-8gb-arm-docker-build + runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -60,7 +59,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -77,7 +75,7 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.debian_10_arm }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -106,7 +104,7 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-8-cores-16gb-arm' + runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'debian_10' @@ -118,8 +116,6 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: - # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, - # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} python-version: '3.7' diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml index 690c789cb65222..59a1eaa6e1c26f 100644 --- a/.github/workflows/dependency_review.yml +++ b/.github/workflows/dependency_review.yml @@ -10,7 +10,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - name: Dependency Review uses: actions/dependency-review-action@72eb03d02c7872a771aacd928f3123ac62ad6d3a # v4.3.3 diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index 5ed82e8330778c..ba458da5d3ec1a 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -33,7 +33,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -67,7 +66,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -112,7 +110,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -299,7 +296,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/export_workflow_metrics.yml b/.github/workflows/export_workflow_metrics.yml deleted file mode 100644 index aef00244f8175b..00000000000000 --- a/.github/workflows/export_workflow_metrics.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Export workflow metrics - -on: - workflow_run: - workflows: - - "Android ARM64 with vcpkg" - - "Android x64" - - "Cleanup caches" - - "Coverity (Ubuntu 20.04, Python 3.11)" - - "Debian 10 ARM" - - "Fedora 29 (RHEL 8.4), Python 3.9" - - "Linux (Ubuntu 20.04, Python 3.9)" - - "Linux (Ubuntu 22.04, Python 3.11)" - - "Linux (Ubuntu 24.04, Python 3.12)" - - "Linux ARM64 (Ubuntu 20.04, Python 3.11)" - - "Linux Static CC (Ubuntu 22.04, Python 3.11, Clang)" - - "Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10)" - - "Linux (Ubuntu 22.04, Python 3.11, Intel DPC\\+\\+ Compiler)" - - "Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04)" - - "Linux Sanitizers (Ubuntu 20.04, Python 3.9)" - - "macOS (Python 3.11)" - - "macOS ARM64 (Python 3.11)" - - "Manylinux 2014" - - "Webassembly" - - "Windows (VS 2019, Python 3.11, Release)" - - "Windows (VS 2019, Python 3.11, Debug)" - - "Windows Conditional Compilation (VS 2022, Python 3.11)" - - "Rerun Workflow with Known Errors" - types: - - completed - -permissions: read-all - -jobs: - export-workflow-metrics: - name: Export finished workflow metrics - runs-on: aks-linux-2-cores-8gb-stats - if: ${{ github.repository_owner == 'openvinotoolkit' }} - - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - sparse-checkout: '.github' - - - name: Install deps - run: | - pip3 install -r .github/scripts/requirements.txt - # dependency review action has these as an exception - # yet it still complains, so install them here - pip3 install PyGithub==2.2.0 psycopg2-binary==2.9.9 - - - name: Send metrics to SQL database - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - RUN_ID: ${{ github.event.workflow_run.id }} - PGHOST: ${{ secrets.METRICS_DATABASE_HOST }} - PGUSER: ${{ secrets.METRICS_DATABASE_USERNAME }} - PGPASSWORD: ${{ secrets.METRICS_DATABASE_PASSWORD }} - PGDATABASE: ${{ secrets.METRICS_DATABASE_NAME }} - PGPORT: 5432 - LOGLEVEL: "${{ runner.debug == '1' && 'DEBUG' || 'INFO' }}" - run: | - python3 .github/scripts/collect_github_metrics.py \ - --run-id ${{ github.event.workflow_run.id }} \ - --repository-name ${GITHUB_REPOSITORY} diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml index 6d128f33fca274..f3b101327f76dc 100644 --- a/.github/workflows/fedora_29.yml +++ b/.github/workflows/fedora_29.yml @@ -25,7 +25,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -60,7 +59,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -131,10 +129,10 @@ jobs: # install previous release version mv /tmp/openvino-2023.repo /etc/yum.repos.d - dnf install -y openvino + yum install -y openvino # install current version - dnf install --allowerasing -y *.rpm + yum install --allowerasing -y *.rpm working-directory: ${{ env.RPM_PACKAGES_DIR }} - name: Test RPM packages diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml index c263afed1fe465..2768e731b6578b 100644 --- a/.github/workflows/files_size.yml +++ b/.github/workflows/files_size.yml @@ -13,7 +13,6 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - name: git ls-tree run: git ls-tree -r -t -l --full-name HEAD | sort -n -r -k 4 diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index c56de5872cc2df..3964f049be2abb 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -92,7 +92,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -108,7 +107,6 @@ jobs: - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index d5d42ffcfea8d2..8a39f348d824c3 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -21,11 +21,6 @@ on: description: 'A string of options passed to CMake' type: string required: true - build-additional-python-wheels: - description: 'Whether to build additional, i.e., non-system Python wheels. Should have Python 3.9-3.12 installed' - type: boolean - required: false - default: false permissions: read-all @@ -60,14 +55,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -164,7 +157,8 @@ jobs: run: echo SSL_CERT_FILE=$(python3 -m certifi) >> $env:GITHUB_ENV - name: CMake configure - run: cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} + run: | + cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} - name: Clean ccache stats run: '& ccache --zero-stats' @@ -182,62 +176,6 @@ jobs: cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_TEST_DIR }} --component tests working-directory: ${{ env.BUILD_DIR }} - # Setup additional Python versions for wheels building - - name: Setup Python 3.9 - if: ${{ inputs.build-additional-python-wheels }} - uses: ./openvino/.github/actions/setup_python - with: - version: '3.9' - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - - # Setup additional Python versions for wheels building - - name: Setup Python 3.10 - if: ${{ inputs.build-additional-python-wheels }} - uses: ./openvino/.github/actions/setup_python - with: - version: '3.10' - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - - # Setup additional Python versions for wheels building - - name: Setup Python 3.12 - if: ${{ inputs.build-additional-python-wheels }} - uses: ./openvino/.github/actions/setup_python - with: - version: '3.12' - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - - - name: Build additional Python wheels - if: ${{ inputs.build-additional-python-wheels }} - run: | - $pyVersions = '3.9', '3.10', '3.12' - foreach ($pyVersion in $pyVersions) { - $pyBuildDir = "${{ github.workspace }}/py$pyVersion" - New-Item -ItemType Directory -Path "$pyBuildDir" -Force - - $pythonCommand = "py -$pyVersion -c `"import sys; print(f'{sys.executable}')`"" - $pythonExecutablePath = & cmd /c $pythonCommand - - $pipVersion = & $pythonExecutablePath -c "import pip; print(pip.__version__)" - Write-Host "Using pip version: $pipVersion for $pyVersion" - $env:PIP_CACHE_DIR="${{ env.PIP_CACHE_PATH }}/$pipVersion" - - & $pythonExecutablePath -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt - - cmake -DPython3_EXECUTABLE="$pythonExecutablePath" -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B "$pyBuildDir" && - cmake --build "$pyBuildDir" --parallel --config ${{ env.CMAKE_BUILD_TYPE }} && - cmake --install "$pyBuildDir" --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels - if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to build Python wheels for Python $pyVersion" - exit 1 - } - } - - name: Pack Artifacts run: | $file = Get-ChildItem -Path "${{ env.INSTALL_DIR }}" @@ -282,7 +220,7 @@ jobs: uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels - path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + path: ${{ env.BUILD_DIR }}/wheels/*.whl if-no-files-found: 'error' - name: Upload openvino tests package diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 568c33d39e307b..0366ec47ff437e 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -72,7 +72,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_jax_layer_tests.yml b/.github/workflows/job_jax_layer_tests.yml deleted file mode 100644 index 25f171060f43be..00000000000000 --- a/.github/workflows/job_jax_layer_tests.yml +++ /dev/null @@ -1,133 +0,0 @@ -name: JAX Layer Tests - -on: - workflow_call: - inputs: - runner: - description: 'Machine on which the tests would run' - type: string - required: true - container: - description: 'JSON to be converted to the value of the "container" configuration for the job' - type: string - required: false - default: '{"image": null}' - affected-components: - description: 'Components that are affected by changes in the commit defined by the Smart CI Action' - type: string - required: true - python-version: - description: 'Python version to setup. E.g., "3.11"' - type: string - required: true - -permissions: read-all - -env: - PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux - PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" - -jobs: - JAX_Layer_Tests: - name: JAX Layer Tests - timeout-minutes: 40 - runs-on: ${{ inputs.runner }} - container: ${{ fromJSON(inputs.container) }} - defaults: - run: - shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }} - env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - OPENVINO_REPO: ${{ github.workspace }}/openvino - INSTALL_DIR: ${{ github.workspace }}/install - INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests - INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels - LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests - steps: - - name: Download OpenVINO artifacts (tarballs) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_[tests]* - path: ${{ env.INSTALL_DIR }} - merge-multiple: true - - - name: Download OpenVINO artifacts (wheels) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_[wheels]* - path: ${{ env.INSTALL_WHEELS_DIR }} - merge-multiple: true - - # Needed as ${{ github.workspace }} is not working correctly when using Docker - - name: Setup Variables - if: runner.os != 'Windows' - run: | - echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" - echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" - echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - - name: Install OpenVINO dependencies (mac) - if: runner.os == 'macOS' - run: brew install pigz - - - name: Extract OpenVINO packages (Linux, macOS) - if: runner.os != 'Windows' - run: | - pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} - working-directory: ${{ env.INSTALL_DIR }} - - - name: Extract OpenVINO artifacts (Windows) - if: runner.os == 'Windows' - run: | - Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} - working-directory: ${{ env.INSTALL_DIR }} - - - name: Fetch setup_python and install wheels actions - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - sparse-checkout: | - .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml - sparse-checkout-cone-mode: false - path: 'openvino' - - - name: Setup Python ${{ inputs.python-version }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ inputs.python-version }} - pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} - should-setup-pip-paths: ${{ runner.os != 'macOS' }} - self-hosted-runner: ${{ runner.os != 'macOS' }} - - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' - - - name: Install JAX Layer tests dependencies - run: | - # jax test requirements - python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax - - - name: JAX Layer Tests - if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/jax_tests ${PARALLEL} -m precommit_jax_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-jax.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - JAX_TRACE_MODE: JAXPR - PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}} - - - name: Upload Test Results - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - if: ${{ !cancelled() }} - with: - name: test-results-python-jax-layers - path: | - ${{ env.INSTALL_TEST_DIR }}/TEST*.html - ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'warn' diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 57eb07a83aa423..43fa8f2a7f1740 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -65,7 +65,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -89,7 +88,7 @@ jobs: - name: Install JAX tests requirements for precommit run: | - python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax + python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/jax/requirements.txt - name: JAX/Flax Models Tests from Hugging Face if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly' }} diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index 92f86511e99e4a..df50c4f3e2ad3c 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -64,7 +64,6 @@ jobs: - name: Fetch ONNX runtime version and skip tests list uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | src/frontends/onnx/tests/ci_utils/onnxruntime @@ -79,7 +78,6 @@ jobs: - name: Clone ONNX Runtime uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'microsoft/onnxruntime' path: ${{ env.ONNX_RUNTIME_REPO }} diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index fd04d8842daae7..ecb278fdb54ca3 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -33,7 +33,6 @@ jobs: steps: - name: Fetch OpenVINO JS sources uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | src/bindings/js diff --git a/.github/workflows/job_python_api_tests.yml b/.github/workflows/job_python_api_tests.yml deleted file mode 100644 index e12001cd67afba..00000000000000 --- a/.github/workflows/job_python_api_tests.yml +++ /dev/null @@ -1,151 +0,0 @@ -name: Python API tests - -on: - workflow_call: - inputs: - runner: - description: 'Machine on which the tests would run' - type: string - required: true - container: - description: 'JSON to be converted to the value of the "container" configuration for the job' - type: string - required: false - default: '{"image": null}' - affected-components: - description: 'Components that are affected by changes in the commit defined by the Smart CI Action' - type: string - required: true - python-version: - description: 'Python version to setup. E.g., "3.11"' - type: string - required: true - -permissions: read-all - -env: - PIP_CACHE_PATH: /mount/caches/pip/linux - -jobs: - Python_Unit_Tests: - name: Python API tests - if: ${{ fromJSON(inputs.affected-components).Python_API.test || fromJSON(inputs.affected-components).docs_snippets.test }} - timeout-minutes: 30 - runs-on: ${{ inputs.runner }} - container: ${{ fromJSON(inputs.container) }} - defaults: - run: - shell: bash - env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - OPENVINO_REPO: ${{ github.workspace }}/openvino - INSTALL_DIR: ${{ github.workspace }}/install - INSTALL_TEST_DIR: ${{ github.workspace }}/install/openvino_tests - INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/openvino_wheels - steps: - - name: Download OpenVINO artifacts (tarballs and wheels) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_@(wheels|tests) - path: ${{ env.INSTALL_DIR }} - - # Needed as ${{ github.workspace }} is not working correctly when using Docker - - name: Setup Variables - run: | - echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" - echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" - echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/openvino_tests" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/openvino_wheels" >> "$GITHUB_ENV" - - - name: Install OpenVINO dependencies (mac) - if: runner.os == 'macOS' - run: brew install pigz - - - name: Extract OpenVINO packages - run: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_TEST_DIR} - working-directory: ${{ env.INSTALL_TEST_DIR }} - - - name: Fetch setup_python and install wheels actions - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - timeout-minutes: 15 - with: - sparse-checkout: | - .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml - sparse-checkout-cone-mode: false - path: 'action_root' - - - name: Setup Python ${{ inputs.python-version }} - uses: ./action_root/.github/actions/setup_python - with: - version: ${{ inputs.python-version }} - pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} - should-setup-pip-paths: ${{ runner.os == 'Linux' }} - self-hosted-runner: ${{ runner.os == 'Linux' }} - - # - # Tests - # - - name: Install OpenVINO Python wheels - uses: ./action_root/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' - - - name: Install Python API tests dependencies - run: python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt - - # - # Tests - # - - - name: Python API Tests - if: fromJSON(inputs.affected-components).Python_API.test - run: | - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH - python3 -m pytest -sv ${INSTALL_TEST_DIR}/tests/pyopenvino \ - --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ - --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py - - - name: Python API Tests -- numpy<2.0.0 - if: fromJSON(inputs.affected-components).Python_API.test - run: | - python3 -m pip uninstall -y numpy - python3 -m pip install "numpy~=1.26.0" - python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH - python3 -m pytest -sv ${INSTALL_TEST_DIR}/tests/pyopenvino \ - --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph_new_numpy.xml \ - --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py - - - name: Clone API snippets - if: ${{ runner.os != 'macOS' && fromJSON(inputs.affected-components).docs_snippets.test }} - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - timeout-minutes: 15 - with: - sparse-checkout: docs/articles_en/assets/snippets - path: ${{ env.OPENVINO_REPO }} - submodules: 'false' - - - name: Docs Python snippets - if: ${{ runner.os != 'macOS' && fromJSON(inputs.affected-components).docs_snippets.test }} - run: | - # torch, onnx - python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/python/preprocess/torchvision/requirements.txt -r ${INSTALL_TEST_DIR}/tests/requirements_onnx - # to find 'snippets' module in docs - export PYTHONPATH=${OPENVINO_REPO}/docs/articles_en/assets - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH - python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py - - - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 - if: ${{ !cancelled() }} - with: - name: test-results-python-api-${{ inputs.python-version }} - path: | - ${{ env.INSTALL_TEST_DIR }}/TEST*.html - ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'warn' diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index b7c538cd58e6da..8075f3299fe063 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -65,23 +65,21 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' run: brew install pigz - name: Extract OpenVINO packages - run: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} + run: | + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'action_root' @@ -94,10 +92,11 @@ jobs: self-hosted-runner: ${{ runner.os == 'Linux' }} - name: Install OpenVINO Python wheels - uses: ./action_root/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' + run: | + # Install the core OV wheel + python3 -m pip install ./openvino-*.whl + + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python API tests dependencies run: | @@ -122,6 +121,15 @@ jobs: # Tests # + - name: Python API Tests + if: ${{ fromJSON(inputs.affected-components).Python_API.test }} + run: | + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH + python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ + --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ + --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py + - name: Python ONNX operators tests if: (fromJSON(inputs.affected-components).Python_API.test || fromJSON(inputs.affected-components).ONNX_FE.test) && @@ -133,11 +141,11 @@ jobs: --ignore=${INSTALL_TEST_DIR}/onnx/test_python/test_zoo_models.py - name: OVC unit tests - if: fromJSON(inputs.affected-components).OVC.test + if: fromJSON(inputs.affected-components).MO.test run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml - name: OVC Python API Tests - if: fromJSON(inputs.affected-components).OVC.test + if: fromJSON(inputs.affected-components).MO.test run: | # Import 'test_utils' installed in '/tests/python/openvino' export PYTHONPATH=${INSTALL_TEST_DIR}/python @@ -162,6 +170,14 @@ jobs: export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml + - name: JAX Layer Tests - JAX FE + if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' && runner.os != 'macOS' }} + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit_jax_fe --junitxml=${INSTALL_TEST_DIR}/TEST-jax_fe.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + JAX_TRACE_MODE: JAXPR + - name: TensorFlow Lite Layer Tests - TFL FE if: fromJSON(inputs.affected-components).TFL_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml @@ -169,6 +185,35 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 + - name: Clone API snippets + if: runner.os != 'macOS' + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: docs/articles_en/assets/snippets + path: ${{ env.OPENVINO_REPO }} + submodules: 'false' + + - name: Docs Python snippets + if: runner.os != 'macOS' + run: | + # to find 'snippets' module in docs + export PYTHONPATH=${OPENVINO_REPO}/docs/articles_en/assets + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH + python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py + + - name: Python API Tests -- numpy>=2.0.0 + if: ${{ fromJSON(inputs.affected-components).Python_API.test }} + run: | + python3 -m pip uninstall -y numpy + python3 -m pip install "numpy>=2.0.0,<2.2.0" + python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH + python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ + --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ + --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py + - name: Upload Test Results uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 9a9abaf72ade62..b0eba0a278e582 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -65,7 +65,6 @@ jobs: echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) @@ -84,13 +83,11 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -102,11 +99,20 @@ jobs: should-setup-pip-paths: ${{ runner.os != 'macOS' }} self-hosted-runner: ${{ runner.os != 'macOS' }} - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' + - name: Install OpenVINO Python wheels (Linux and macOS) + if: runner.os != 'Windows' + run: | + # Install the core OV wheel + python3 -m pip install ./openvino-*.whl + working-directory: ${{ env.INSTALL_WHEELS_DIR }} + + - name: Install OpenVINO Python wheels (Windows) + if: runner.os == 'Windows' + run: | + # Find and install the core OV wheel + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Pytorch Layer tests dependencies run: | diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index af304b18a5688f..d52b819981d821 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -78,7 +78,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 07fc17b797592e..e144aa0cfb95aa 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -54,7 +54,6 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' @@ -66,13 +65,13 @@ jobs: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action + # Python is already installed on Ubuntu within Dockerfile + if: runner.os != 'Linux' uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -114,12 +113,6 @@ jobs: # Tests # - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' - - name: Samples tests if: fromJSON(inputs.affected-components).samples.test run: | @@ -129,7 +122,7 @@ jobs: export SHARE=$INSTALL_TEST_DIR/smoke_tests/samples_smoke_tests_data # Install Python benchmark_app by installing openvino-*.whl - python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt + python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_WHEELS_DIR/openvino-*.whl export LD_LIBRARY_PATH=${IE_APP_PATH}:$LD_LIBRARY_PATH source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index fb905f8ec4820b..0de1708527739a 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -66,7 +66,6 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' @@ -84,13 +83,11 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -102,11 +99,27 @@ jobs: should-setup-pip-paths: ${{ runner.os != 'macOS' }} self-hosted-runner: ${{ runner.os != 'macOS' }} - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino openvino_tokenizers' + - name: Install OpenVINO Python wheels (Linux and macOS) + if: runner.os != 'Windows' + run: | + # Install the core OV wheel + python3 -m pip install ./openvino-*.whl + + # Install the core OV Tokenizers wheel + python3 -m pip install ./openvino_tokenizers-*.whl + working-directory: ${{ env.INSTALL_WHEELS_DIR }} + + - name: Install OpenVINO Python wheels (Windows) + if: runner.os == 'Windows' + run: | + # Find and install the core OV wheel + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + + # Find and install the core OV Tokenizers wheel + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino_tokenizers-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python Layer tests dependencies run: | diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index de5cf95484256a..5321beb8703de1 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -70,7 +70,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 89d572885b1abe..4b84bee25c78f4 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -54,16 +54,13 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "OPENVINO_TOKENIZERS_REPO=$GITHUB_WORKSPACE/openvino_tokenizers" >> "$GITHUB_ENV" echo "EXTENSION_BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" - echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - - name: checkout actions + - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python .github/actions/cache - .github/actions/install_ov_wheels/action.yml install_build_dependencies.sh - name: Install OpenVINO dependencies (mac) @@ -80,7 +77,6 @@ jobs: - name: Clone OpenVINO Tokenizers uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_tokenizers' path: ${{ env.OPENVINO_TOKENIZERS_REPO }} @@ -97,11 +93,22 @@ jobs: # Dependencies # - - name: Install OpenVINO Python wheels - uses: ./.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' + - name: Install OpenVINO Python wheel (Linux and macOS) + if: runner.os != 'Windows' + run: | + # Find and install wheel + wheel_name=$(find . -name 'openvino-*.whl') + python3 -m pip install $wheel_name + working-directory: ${{ env.INSTALL_WHEELS_DIR }} + + + - name: Install OpenVINO Python wheel (Windows) + if: runner.os == 'Windows' + run: | + # Find and install wheel + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} # # Build diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 063b920eed80e9..00f3a321e0dd1f 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -27,7 +27,6 @@ jobs: steps: - name: Checkout Labeller Script uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: '.github' diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index ca1ca6e056e23d..10de6867c7d0e2 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -29,7 +29,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -54,7 +53,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-4-cores-8gb-arm-docker-build + runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -64,7 +63,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -80,7 +78,7 @@ jobs: needs: [ Docker, Smart_CI ] uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -99,7 +97,6 @@ jobs: -DENABLE_SYSTEM_OPENCL=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCPACK_GENERATOR=TGZ \ - -DENABLE_WHEEL=ON \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON Debian_Packages: @@ -108,7 +105,7 @@ jobs: if: ${{ 'false' }} uses: ./.github/workflows/job_debian_packages.yml with: - runner: 'aks-linux-4-cores-8gb-arm' + runner: 'aks-linux-16-cores-arm' image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' Samples: @@ -116,7 +113,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).samples uses: ./.github/workflows/job_samples_tests.yml with: - runner: 'aks-linux-8-cores-16gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -126,7 +123,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API uses: ./.github/workflows/job_openvino_js.yml with: - runner: 'aks-linux-4-cores-8gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}"}' ONNX_Runtime: @@ -136,7 +133,7 @@ jobs: needs: [ Build, Smart_CI, Docker ] uses: ./.github/workflows/job_onnx_runtime.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' sccache-azure-key-prefix: 'ubuntu20_aarch64_onnxruntime' @@ -145,7 +142,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_tokenizers.yml with: - runner: 'aks-linux-8-cores-16gb-arm' + runner: 'aks-linux-16-cores-arm' shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -157,7 +154,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-8-cores-16gb-arm' + runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'ubuntu_20_04' @@ -166,16 +163,6 @@ jobs: name: Python unit tests needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_python_unit_tests.yml - with: - runner: 'aks-linux-8-cores-16gb-arm' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - - Python_API_Tests: - name: Python API tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml with: runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' @@ -187,7 +174,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -197,17 +184,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: - runner: 'aks-linux-16-cores-32gb-arm' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Build, Docker, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -218,8 +195,6 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: - # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, - # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} python-version: '3.11' @@ -232,7 +207,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' @@ -243,7 +218,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_models_tests.yml with: - runner: 'aks-linux-16-cores-32gb-arm' + runner: 'aks-linux-16-cores-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index f198e64f7ad2ed..27f54da6ecdc60 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -30,7 +30,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -65,7 +64,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -112,14 +110,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} @@ -216,6 +212,7 @@ jobs: tar -cvf - \ tests/ov_cpu_func_tests \ tests/libopenvino_template_extension.so \ + tests/libze_loader.so* \ tests/libhwloc* \ tests/libtbb* \ tests/functional_test_utils/layer_tests_summary/* \ @@ -286,14 +283,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 389579ee5c5808..85b0db8c36294e 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -29,7 +29,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -65,7 +64,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -105,7 +103,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' @@ -139,6 +136,7 @@ jobs: git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gflags git submodule update --init -- ${OPENVINO_REPO}/thirdparty/telemetry git submodule update --init -- ${OPENVINO_REPO}/src/plugins/intel_cpu + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/open_model_zoo git submodule update --init -- ${OPENVINO_REPO}/thirdparty/flatbuffers/flatbuffers popd diff --git a/.github/workflows/linux_riscv_xuantie_dev_cpu.yml b/.github/workflows/linux_riscv_xuantie_dev_cpu.yml deleted file mode 100644 index 25282f95bae2e4..00000000000000 --- a/.github/workflows/linux_riscv_xuantie_dev_cpu.yml +++ /dev/null @@ -1,250 +0,0 @@ -name: Linux RISC-V with Xuantie (Ubuntu 22.04, Python 3.10) -on: - workflow_dispatch: - inputs: - testFilter: - description: 'Filter for google tests' - required: true - default: '*smoke_AdaPoolAvg4DLayoutTest*' - pull_request: - paths: - - '.github/workflows/linux_riscv_xuantie_dev_cpu.yml' - -concurrency: - # github.ref is not unique in post-commit - group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux-riscv-xuantie - cancel-in-progress: true - -permissions: read-all - -env: - TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} - GTEST_FILTER: ${{ github.event_name == 'workflow_dispatch' && inputs.testFilter || '*smoke_AdaPoolAvg4DLayoutTest*' }} - RISCV_TOOLCHAIN_ROOT: /opt/riscv - -jobs: - Smart_CI: - runs-on: ubuntu-latest - outputs: - affected_components: "${{ steps.smart_ci.outputs.affected_components }}" - changed_components: "${{ steps.smart_ci.outputs.changed_components }}" - skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" - target_branch: ${{ steps.set_target_branch.outputs.target_branch }} - steps: - - name: checkout action - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - sparse-checkout: .github/actions/smart-ci - - - name: Get affected components - id: smart_ci - uses: ./.github/actions/smart-ci - with: - repository: ${{ github.repository }} - pr: ${{ github.event.number }} - commit_sha: ${{ github.sha }} - ref_name: ${{ github.ref_name }} - component_pattern: "category: (.*)" - repo_token: ${{ secrets.GITHUB_TOKEN }} - skip_when_only_listed_labels_set: 'docs' - skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' - - - name: Get target branch - id: set_target_branch - run: | - echo "target_branch=${TARGET_BRANCH#refs/heads/}" >> $GITHUB_OUTPUT - shell: bash - - Docker: - needs: Smart_CI - runs-on: aks-linux-4-cores-16gb-docker-build - if: "!needs.smart_ci.outputs.skip_workflow" - container: - image: openvinogithubactions.azurecr.io/docker_build:0.2 - volumes: - - /mount:/mount - outputs: - images: "${{ steps.handle_docker.outputs.images }}" - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - - - uses: ./.github/actions/handle_docker - id: handle_docker - with: - images: | - ov_build/ubuntu_22_04_riscv_xuantie - ov_test/ubuntu_22_04_riscv_xuantie - registry: 'openvinogithubactions.azurecr.io' - dockerfiles_root_dir: '.github/dockerfiles' - changed_components: ${{ needs.smart_ci.outputs.changed_components }} - - Build: - name: Build (RVV 0.7.1) - needs: [Smart_CI, Docker] - timeout-minutes: 150 - defaults: - run: - shell: bash - runs-on: aks-linux-16-cores-32gb - container: - image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_riscv_xuantie }} - volumes: - - /mount:/mount - - ${{ github.workspace }}:${{ github.workspace }} - env: - CMAKE_BUILD_TYPE: 'Release' - CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache - OPENVINO_REPO: ${{ github.workspace }}/openvino - BUILD_DIR: ${{ github.workspace }}/build - INSTALL_DIR: ${{ github.workspace }}/openvino_install - INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install - CCACHE_REMOTE_DIR: /mount/caches/ccache/ubuntu22_riscv64/xuantie/${{ needs.Smart_CI.outputs.target_branch }} - CCACHE_DIR: ${{ github.workspace }}/ccache - CCACHE_TEMPDIR: ${{ github.workspace }}/ccache_temp - CCACHE_MAXSIZE: 2G - - steps: - - name: Clone OpenVINO - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - path: ${{ env.OPENVINO_REPO }} - submodules: 'true' - - # - # Print system info - # - - - name: System info - uses: ./openvino/.github/actions/system_info - - - name: Setup ccache - id: ccache_restore - uses: ./openvino/.github/actions/cache - with: - save-always: ${{ github.event_name == 'push' && 'true' || 'false' }} - cleanup-always: ${{ github.event_name == 'push' && 'true' || 'false' }} - cache-size: 10 - max-cache-size: 50 - cache-path: ${{ env.CCACHE_REMOTE_DIR }} - path: ${{ env.CCACHE_DIR }} - key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-${{ runner.arch }}-ccache - - # - # Build - # - - name: Clean ccache stats - run: ccache --zero-stats - - - name: CMake - Configure - run: > - cmake -G "${{ env.CMAKE_GENERATOR }}" - -DENABLE_CPPLINT=OFF - -DENABLE_INTEL_GPU=OFF - -DENABLE_INTEL_NPU=OFF - -DENABLE_SAMPLES=OF - -DENABLE_NCC_STYLE=OFF - -DENABLE_PYTHON=OFF - -DENABLE_TESTS=ON - -DENABLE_STRICT_DEPENDENCIES=OFF - -DCMAKE_VERBOSE_MAKEFILE=ON - -DENABLE_WHEEL=OFF - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF - -DCMAKE_TOOLCHAIN_FILE=${OPENVINO_REPO}/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake - -S ${OPENVINO_REPO} - -B ${BUILD_DIR} - - - name: Cmake - Build - run: cmake --build ${BUILD_DIR} --parallel $(nproc) - - - name: Show ccache stats and cleanup - run: | - ccache --show-stats - ccache --cleanup - - - name: Cmake install - OpenVINO - run: | - cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR }} - cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_TEST_DIR }} --component tests - working-directory: ${{ env.BUILD_DIR }} - - - name: Pack openvino_package - run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz - working-directory: ${{ env.INSTALL_DIR }} - - - name: Pack openvino_tests - run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz - working-directory: ${{ env.INSTALL_TEST_DIR }} - - - name: Upload openvino package - if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - with: - name: openvino_package - path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz - if-no-files-found: 'error' - - - name: Upload openvino tests package - if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - with: - name: openvino_tests - path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz - if-no-files-found: 'error' - - CPU_Functional_Tests: - name: CPU functional tests - timeout-minutes: 30 - needs: [Smart_CI, Docker, Build] - runs-on: aks-linux-4-cores-16gb - container: - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_riscv_xuantie }} - volumes: - - ${{ github.workspace }}:${{ github.workspace }} - defaults: - run: - shell: bash - env: - INSTALL_DIR: ${{ github.workspace }}/install - INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests - - steps: - - name: Download OpenVINO artifacts (tarballs) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_[package|tests]* - path: ${{ env.INSTALL_DIR }} - merge-multiple: true - - - name: Extract OpenVINO packages and tests - run: | - pigz -dc openvino_package.tar.gz | tar -xvf - -C ${INSTALL_DIR} - pigz -dc openvino_tests.tar.gz | tar -xvf - -C ${INSTALL_DIR} - working-directory: ${{ env.INSTALL_DIR }} - - - name: Intel CPU plugin func tests (parallel) - run: | - source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.RISCV_TOOLCHAIN_ROOT}}/bin/qemu-riscv64 -cpu c910v ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=${{ env.GTEST_FILTER }} - timeout-minutes: 25 - - Overall_Status: - name: ci/gha_overall_status_linux_riscv - needs: [Smart_CI, Build, CPU_Functional_Tests] - if: ${{ always() }} - runs-on: ubuntu-latest - steps: - - name: Check status of all jobs - if: >- - ${{ - contains(needs.*.result, 'failure') || - contains(needs.*.result, 'cancelled') - }} - run: exit 1 diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index cf8e1642fa5f51..4bb597d83fadc8 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -25,7 +25,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -54,7 +53,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -110,14 +108,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -285,7 +281,6 @@ jobs: - name: Fetch Sanitizer Suppression Lists uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | tests/sanitizers/lsan/suppressions.txt diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 0fbc20cf19594b..c587c5ad7323b3 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -43,7 +43,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -84,14 +83,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -154,7 +151,6 @@ jobs: -DENABLE_CPPLINT=OFF \ -DENABLE_NCC_STYLE=OFF \ -DENABLE_TESTS=ON \ - -DENABLE_WHEEL=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ -DENABLE_STRICT_DEPENDENCIES=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ @@ -172,6 +168,7 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | @@ -182,48 +179,6 @@ jobs: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd - # Setup additional Python versions for wheels building - - name: Setup Python 3.9 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.9" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Setup Python 3.10 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.10" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Setup Python 3.12 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.12" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Build additional Python wheels - run: | - for py_version in "3.9" "3.10" "3.11" "3.12" - do - python_exec_path=$(python$py_version -c "import sys; print(sys.executable)") - $python_exec_path -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt - - cmake -DPython3_EXECUTABLE=$python_exec_path -DENABLE_WHEEL=ON -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B ${{ github.workspace }}/py$py_version - cmake --build ${{ github.workspace }}/py$py_version --parallel - cmake --install ${{ github.workspace }}/py$py_version --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels - done - - # Setup Python 3.11 as the default one - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - name: Cmake & Build - OpenVINO Contrib run: | cmake \ @@ -244,7 +199,6 @@ jobs: cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake - # # Upload build artifacts # @@ -256,7 +210,7 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - + - name: Upload openvino wheels uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: @@ -316,19 +270,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'mac_13' - Python_API_Tests: - name: Python API tests - needs: [ Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml - strategy: - fail-fast: false - matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12' ] - with: - runner: 'macos-13' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: ${{ matrix.python-version }} - Python_Unit_Tests: name: Python unit tests needs: [ Build, Smart_CI ] @@ -356,15 +297,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Build, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'macos-13' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - CPU_Functional_Tests: name: CPU functional tests # if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index b60daefa442c83..0708a844fe6b8b 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -43,7 +43,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -84,14 +83,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -154,7 +151,6 @@ jobs: -DENABLE_CPPLINT=OFF \ -DENABLE_NCC_STYLE=OFF \ -DENABLE_TESTS=ON \ - -DENABLE_WHEEL=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ -DENABLE_STRICT_DEPENDENCIES=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ @@ -172,6 +168,7 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | @@ -183,48 +180,6 @@ jobs: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd - # Setup additional Python versions for wheels building - - name: Setup Python 3.9 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.9" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Setup Python 3.10 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.10" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Setup Python 3.12 - uses: ./openvino/.github/actions/setup_python - with: - version: "3.12" - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - - name: Build additional Python wheels - run: | - for py_version in "3.9" "3.10" "3.11" "3.12" - do - python_exec_path=$(python$py_version -c "import sys; print(sys.executable)") - $python_exec_path -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt - - cmake -DPython3_EXECUTABLE=$python_exec_path -DENABLE_WHEEL=ON -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B ${{ github.workspace }}/py$py_version - cmake --build ${{ github.workspace }}/py$py_version --parallel - cmake --install ${{ github.workspace }}/py$py_version --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels - done - - # Setup Python 3.11 as the default one - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - should-setup-pip-paths: 'false' - self-hosted-runner: 'false' - - name: Cmake & Build - OpenVINO Contrib run: | cmake \ @@ -324,19 +279,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - Python_API_Tests: - name: Python API tests - needs: [ Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml - strategy: - fail-fast: false - matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12' ] - with: - runner: 'macos-13-xlarge' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: ${{ matrix.python-version }} - TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Build, Smart_CI, Openvino_tokenizers ] @@ -355,15 +297,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Build, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'macos-13-xlarge' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml index d6b3daa12abb57..bd5da965226a50 100644 --- a/.github/workflows/manylinux_2014.yml +++ b/.github/workflows/manylinux_2014.yml @@ -28,7 +28,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +62,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -71,11 +69,6 @@ jobs: images: | ov_build/ubuntu_22_04_x64_docker ov_build/manylinux2014_x86_64 - ov_test/ubuntu_20_04_x64_py313 - ov_test/ubuntu_22_04_x64 - ov_test/ubuntu_24_04_x64 - ov_test/fedora_33 - ov_test/debian_10_py310 registry: 'openvinogithubactions.azurecr.io' dockerfiles_root_dir: '.github/dockerfiles' changed_components: ${{ needs.smart_ci.outputs.changed_components }} @@ -99,7 +92,6 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/src INSTALL_DIR: ${{ github.workspace }}/install/openvino INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels - INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests BUILD_DIR: ${{ github.workspace }}/build DOCKER_CONFIG: "/mount/.docker" CMAKE_CXX_COMPILER_LAUNCHER: sccache @@ -115,7 +107,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -144,7 +135,6 @@ jobs: -v ${{ env.OPENVINO_REPO }}:/work/src \ -v ov_build_cache:/work/build \ -v ${{ env.INSTALL_DIR }}:/work/install \ - -v ${{ env.INSTALL_TEST_DIR }}:/work/api_tests \ -e SCCACHE_AZURE_BLOB_CONTAINER \ -e SCCACHE_AZURE_CONNECTION_STRING \ -e SCCACHE_SERVER_PORT \ @@ -158,18 +148,16 @@ jobs: -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " - python3.12 -m pip install -r /work/src/src/bindings/python/wheel/requirements-dev.txt - cmake -DPython3_EXECUTABLE=/usr/local/bin/python3.12 -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DENABLE_TESTS=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_OV_TF_FRONTEND=OFF -DENABLE_OV_TF_LITE_FRONTEND=OFF -DENABLE_OV_PADDLE_FRONTEND=OFF -DENABLE_OV_PYTORCH_FRONTEND=ON -DENABLE_OV_JAX_FRONTEND=OFF -DENABLE_OV_ONNX_FRONTEND=ON -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -S /work/src -B /work/build && + cmake -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -S /work/src -B /work/build && cmake --build /work/build --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} && cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/install - cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/api_tests --component tests " - name: Pack Artifacts run: mkdir -p ${{ env.BUILD_DIR }} && tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz working-directory: ${{ env.INSTALL_DIR }} - - name: Build Python API (Python 3.9-3.13) + - name: Build Python API(Python 3.9-3.13) run: | SUPPORTED_PYTHON_VERSIONS=("39" "310" "311" "312" "313") for PY_VER in "${SUPPORTED_PYTHON_VERSIONS[@]}"; do @@ -202,10 +190,6 @@ jobs: " done - - name: Pack openvino_tests - run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz - working-directory: ${{ env.INSTALL_TEST_DIR }} - # # Upload build artifacts # @@ -224,15 +208,7 @@ jobs: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl if-no-files-found: 'error' - - - name: Upload openvino tests package - if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - with: - name: openvino_tests - path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz - if-no-files-found: 'error' - + - name: Store artifacts to a shared drive id: store_artifacts if: ${{ always() }} @@ -244,34 +220,10 @@ jobs: ${{ env.INSTALL_WHEELS_DIR }}/wheels storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} - - Python_API_Tests: - name: Python API tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml - strategy: - fail-fast: false - matrix: - include: - - python-version: "3.9" - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.fedora_33 }} - - python-version: "3.10" - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_py310 }} - - python-version: "3.11" - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }} - - python-version: "3.12" - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }} - - python-version: "3.13" - image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64_py313 }} - with: - runner: 'aks-linux-4-cores-16gb' - container: '{"image": "${{ matrix.image }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: ${{ matrix.python-version }} - + Overall_Status: name: ci/gha_overall_status_manylinux2014 - needs: [Smart_CI, Build, Python_API_Tests] + needs: [Smart_CI, Build] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index 3e7dedf50ad51b..4d69563a741d3a 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -20,7 +20,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - name: Setup Python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index dcf0932df8024e..caed37eee89056 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -29,7 +29,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - name: Setup Python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml new file mode 100644 index 00000000000000..ba38d6a9f90fed --- /dev/null +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -0,0 +1,68 @@ +name: Export workflow metrics (BETA) + +on: + workflow_run: + workflows: + - Android ARM64 with vcpkg + - Android x64 + - Documentation + - Cleanup PIP caches + - Code snippets + - Code Style + - Code coverage + - Coverity (Ubuntu 20.04, Python 3.11) + - Debian 10 ARM + - Fedora 29 (RHEL 8.4), Python 3.9 + - Linux (Ubuntu 20.04, Python 3.9) + - Linux (Ubuntu 22.04, Python 3.11) + - Linux (Ubuntu 24.04, Python 3.12) + - Linux ARM64 (Ubuntu 20.04, Python 3.11) + - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) + - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) + - macOS (Python 3.11) + - macOS ARM64 (Python 3.11) + - MO + - Python API Checks + - Webassembly + - Windows (VS 2019, Python 3.11, Release) + - Windows (VS 2019, Python 3.11, Debug) + - Windows Conditional Compilation (VS 2022, Python 3.11) + - Rerun Workflow with Known Errors + types: + - completed + +permissions: read-all + +jobs: + otel-export-trace: + name: Export finished workflow metrics + runs-on: aks-linux-2-cores-8gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} + + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: '.github' + + - name: Install deps + run: | + pip3 install -r .github/scripts/requirements.txt + # dependency review action has these as an exception + # yet it still complains, so install them here + pip3 install PyGithub==2.2.0 psycopg2-binary==2.9.9 + + - name: Send metrics to SQL database + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RUN_ID: ${{ github.event.workflow_run.id }} + PGHOST: ${{ secrets.METRICS_DATABASE_HOST }} + PGUSER: ${{ secrets.METRICS_DATABASE_USERNAME }} + PGPASSWORD: ${{ secrets.METRICS_DATABASE_PASSWORD }} + PGDATABASE: ${{ secrets.METRICS_DATABASE_NAME }} + PGPORT: 5432 + LOGLEVEL: "${{ runner.debug == '1' && 'DEBUG' || 'INFO' }}" + run: | + python3 .github/scripts/collect_github_metrics.py \ + --run-id ${{ github.event.workflow_run.id }} \ + --repository-name ${GITHUB_REPOSITORY} diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 19760ff2551773..63a1fab87d566f 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -31,7 +31,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -66,7 +65,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -100,7 +98,6 @@ jobs: -DENABLE_SYSTEM_OPENCL=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCPACK_GENERATOR=TGZ \ - -DENABLE_WHEEL=ON \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON CXX_Unit_Tests: diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index e5c7d25003de1e..f4caec8b2458a0 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -33,7 +33,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -68,7 +67,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -103,7 +101,6 @@ jobs: -DENABLE_SYSTEM_OPENCL=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCPACK_GENERATOR=TGZ \ - -DENABLE_WHEEL=ON \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON Debian_Packages: @@ -187,7 +184,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -304,16 +300,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - Python_API_Tests: - name: Python API tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml - with: - runner: 'aks-linux-4-cores-16gb' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] @@ -334,16 +320,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'aks-linux-4-cores-16gb' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test @@ -484,7 +460,6 @@ jobs: - name: Clone OpenVINO Contrib uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml index ad11a31f7403bf..48230155f7e903 100644 --- a/.github/workflows/ubuntu_22_dpcpp.yml +++ b/.github/workflows/ubuntu_22_dpcpp.yml @@ -21,7 +21,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -56,7 +55,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index beac15bfbda97d..d874e06a189232 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -28,7 +28,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +62,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -136,16 +134,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' - Python_API_Tests: - name: Python API tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_python_api_tests.yml - with: - runner: 'aks-linux-4-cores-16gb' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.12' - Pytorch_Layer_Tests: name: Pytorch Layer Tests needs: [ Docker, Build, Smart_CI ] @@ -156,16 +144,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Docker, Build, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'aks-linux-4-cores-16gb' - container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.12' - TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 350df3113b0f3a..45d6c9ce98317a 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -25,7 +25,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -60,7 +59,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - uses: ./.github/actions/handle_docker id: handle_docker @@ -94,7 +92,6 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 0f965eabd3c1ad..6a5846b514dbd7 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -31,7 +31,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -75,14 +74,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone test models uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -240,7 +237,7 @@ jobs: Compress-Archive @compress $compress = @{ - Path = "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/ov_cpu_func_tests.exe", "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/openvino_template_extension.dll", "${{ env.OPENVINO_REPO }}/src/tests/test_utils/functional_test_utils/layer_tests_summary", "${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb" + Path = "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/ov_cpu_func_tests.exe", "${{ env.BUILD_DIR }}/bin/${{ env.CMAKE_BUILD_TYPE }}/ze_loader.dll", "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/openvino_template_extension.dll", "${{ env.OPENVINO_REPO }}/src/tests/test_utils/functional_test_utils/layer_tests_summary", "${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb" CompressionLevel = "Optimal" DestinationPath = "${{ env.BUILD_DIR }}/openvino_tests.zip" } @@ -286,14 +283,12 @@ jobs: steps: - name: Clone OpenVINO uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: path: 'openvino' submodules: 'true' - name: Clone test models uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -375,7 +370,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/windows_vs2019_debug.yml b/.github/workflows/windows_vs2019_debug.yml index 4fcdc6b58b79d1..68a99055f5bdb8 100644 --- a/.github/workflows/windows_vs2019_debug.yml +++ b/.github/workflows/windows_vs2019_debug.yml @@ -27,7 +27,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 5708b529f25acc..1b218cdf7d430b 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -29,7 +29,6 @@ jobs: steps: - name: checkout action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: .github/actions/smart-ci @@ -60,11 +59,9 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Release' target-branch: ${{ needs.smart_ci.outputs.target_branch }} - build-additional-python-wheels: true cmake-options: |- -G "Ninja Multi-Config" ` -DENABLE_PYTHON=ON ` - -DENABLE_WHEEL=ON ` -DENABLE_CPPLINT=OFF ` -DBUILD_SHARED_LIBS=ON ` -DENABLE_TESTS=ON ` @@ -72,7 +69,7 @@ jobs: -DENABLE_STRICT_DEPENDENCIES=OFF ` -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` - -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" Samples: needs: [ Build, Smart_CI ] @@ -111,13 +108,11 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath . working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -140,16 +135,11 @@ jobs: run: | & ${{ env.SAMPLES_INSTALL_DIR }}/c/build_samples.ps1 -InstallDirectory ${{ env.INSTALL_DIR }} -BuildDirectory ${{ env.BUILD_DIR }}/c_samples - # Install Python benchmark_app by installing openvino-*.whl - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' - - name: Samples tests run: | - python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt + # Install Python benchmark_app by installing openvino-*.whl + $ovCoreWheelPath=Get-ChildItem -Path ./wheels -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt "$ovCoreWheelPath" . "./setupvars.ps1" $Env:PYTHONCOERCECLOCALE="warn" python3 -bb -W error -X dev -X warn_default_encoding -m pytest ./tests/smoke_tests --numprocesses auto @@ -187,7 +177,6 @@ jobs: steps: - name: Fetch OpenVINO JS sources uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | src/bindings/js @@ -281,16 +270,15 @@ jobs: merge-multiple: true - name: Extract OpenVINO packages - run: Expand-Archive openvino_tests.zip -DestinationPath . + run: | + Expand-Archive openvino_tests.zip -DestinationPath . working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python and install wheels actions + - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -303,10 +291,12 @@ jobs: self-hosted-runner: 'true' - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' + run: | + # Find and install the core OV wheel + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install "$ovCoreWheelPath" + + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python API tests dependencies run: | @@ -329,7 +319,9 @@ jobs: run: echo SSL_CERT_FILE=$(python3 -m certifi) >> $env:GITHUB_ENV - name: Install Python Layer tests dependencies - run: python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + run: | + # layer test requirements + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - name: Python API Tests #if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test # Ticket: 127101 @@ -358,7 +350,7 @@ jobs: --ignore=${{ env.INSTALL_TEST_DIR }}/onnx/test_python/test_zoo_models.py - name: OVC Python API Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).OVC.test + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test shell: cmd run: | :: Used for 'test_utils' installed in '\python\openvino\test_utils' @@ -377,7 +369,7 @@ jobs: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml - name: OVC unit tests - if: fromJSON(needs.smart_ci.outputs.affected_components).OVC.test + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test shell: cmd run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml @@ -385,102 +377,10 @@ jobs: uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: - name: test-results-python-unittests + name: test-results-python path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' - Python_API_Tests: - name: OpenVINO Python API Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test - needs: [ Build, Smart_CI ] - timeout-minutes: 35 - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] - defaults: - run: - shell: pwsh - runs-on: aks-win-8-cores-16gb - env: - OPENVINO_REPO: "${{ github.workspace }}\\openvino" - INSTALL_DIR: "${{ github.workspace }}\\install" - INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" - INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels" - PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" - steps: - - name: Download OpenVINO artifacts (tarballs) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_[tests]* - path: ${{ env.INSTALL_DIR }} - merge-multiple: true - - - name: Download OpenVINO artifacts (wheels) - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - pattern: openvino_[wheels]* - path: ${{ env.INSTALL_WHEELS_DIR }} - merge-multiple: true - - - name: Extract OpenVINO packages - run: Expand-Archive openvino_tests.zip -DestinationPath . - working-directory: ${{ env.INSTALL_DIR }} - - - name: Fetch setup_python and install wheels actions - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - sparse-checkout: | - .github/actions/setup_python/action.yml - .github/actions/install_ov_wheels/action.yml - sparse-checkout-cone-mode: false - path: 'openvino' - - - name: Setup Python ${{ matrix.python-version }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ matrix.python-version }} - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'false' - self-hosted-runner: 'true' - - - name: Install OpenVINO Python wheels - uses: ./openvino/.github/actions/install_ov_wheels - with: - wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} - wheels-to-install: 'openvino' - - - name: Install Python API tests dependencies - run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/bindings/python/requirements_test.txt - - - name: Python API Tests - shell: cmd - run: | - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }};%PYTHONPATH% - python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py - - - name: Python API Tests -- numpy>=2.0.0 - shell: cmd - run: | - python3 -m pip uninstall -y numpy - python3 -m pip install "numpy>=2.0.0,<2.1.0" - python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/bindings/python/requirements_test.txt - # for 'template' extension - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }};%PYTHONPATH% - set PATH=${{ env.INSTALL_TEST_DIR }};%PATH% - python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph_new_numpy.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py - - - name: Upload Test Results - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - if: ${{ !cancelled() }} - with: - name: test-results-python-${{ matrix.python-version }} - path: | - ${{ env.INSTALL_TEST_DIR }}/TEST*.html - ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'error' - TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Build, Smart_CI, Openvino_tokenizers ] @@ -499,15 +399,6 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' - JAX_Layer_Tests: - name: JAX Layer Tests - needs: [ Build, Smart_CI ] - uses: ./.github/workflows/job_jax_layer_tests.yml - with: - runner: 'aks-win-8-cores-16gb' - affected-components: ${{ needs.smart_ci.outputs.affected_components }} - python-version: '3.11' - CXX_Unit_Tests: name: C++ unit tests needs: [ Build, Smart_CI ] @@ -558,7 +449,6 @@ jobs: - name: Fetch setup_python action uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 535101ec943264..55ecc2500635b1 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -29,7 +29,7 @@ jobs: name: Rerun Workflow # Run only for the failed workflows in openvinotoolkit org if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }} - runs-on: aks-linux-2-cores-8gb-stats + runs-on: aks-linux-2-cores-8gb permissions: actions: write contents: read @@ -38,7 +38,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: '.github/scripts/workflow_rerun' @@ -70,11 +69,10 @@ jobs: rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} - runs-on: aks-linux-2-cores-8gb-stats + runs-on: aks-linux-2-cores-8gb steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 with: sparse-checkout: '.github/scripts/workflow_rerun' lfs: true @@ -96,9 +94,9 @@ jobs: working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun run: | export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH - + # Need to get a run id with successful status for log analyzing # cannot lock a run id as logs get deleted after some time run_id=$(python3 -c "from github import Github, Auth; import os; github=Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN'))); repo = github.get_repo('${GITHUB_REPOSITORY}'); run_id = repo.get_workflow_runs(status='success')[0].id; print(run_id)") - + python3 rerunner.py --repository-name ${GITHUB_REPOSITORY} --run-id $run_id --dry-run diff --git a/.github/workflows/workflows_scans.yml b/.github/workflows/workflows_scans.yml deleted file mode 100644 index 1a3d091544e784..00000000000000 --- a/.github/workflows/workflows_scans.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: GitHub Actions Workflows Scans -on: - workflow_dispatch: {} - push: - paths: - - '.github/workflows/**' - branches: - - 'master' - - 'releases/**' - pull_request: - paths: - - '.github/workflows/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: read-all - -jobs: - semgrep: - name: github_actions_workflows_scan/semgrep - runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'openvinotoolkit' }} - - container: - image: semgrep/semgrep - - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - timeout-minutes: 15 - with: - submodules: 'false' - sparse-checkout: .github/workflows - - - name: Semgrep scan - run: | - semgrep scan --error -j 8 --config "p/github-actions" .github/workflows/* diff --git a/.github/workflows/workflows_to_track.txt b/.github/workflows/workflows_to_track.txt deleted file mode 100644 index ef3bb633ed7737..00000000000000 --- a/.github/workflows/workflows_to_track.txt +++ /dev/null @@ -1,75 +0,0 @@ -==> ./stale_prs_and_issues.yml <== -name: 'Close stale issues and PRs' -==> ./build_doc.yml <== -name: Documentation -==> ./ovc.yml <== -name: OVC -==> ./ubuntu_22.yml <== -name: Linux (Ubuntu 22.04, Python 3.11) -==> ./windows_conditional_compilation.yml <== -name: Windows Conditional Compilation (VS 2022, Python 3.11) -==> ./send_workflows_to_opentelemetry.yml <== -name: Export workflow metrics (BETA) -==> ./ubuntu_22_dpcpp.yml <== -name: Linux (Ubuntu 22.04, Python 3.11, Intel DPC++ Compiler) -==> ./coverage.yml <== -name: Code coverage -==> ./linux_conditional_compilation.yml <== -name: Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) -==> ./workflows_scans.yml <== -name: GitHub Actions Workflows Scans -==> ./check_pr_commits.yml <== -name: PR Commits -==> ./windows_vs2019_debug.yml <== -name: Windows (VS 2019, Python 3.11, Debug) -==> ./files_size.yml <== -name: Files Size -==> ./cleanup_caches.yml <== -name: Cleanup caches -==> ./mac.yml <== -name: macOS (Python 3.11) -==> ./merge_queue_stub.yml <== -==> ./debian_10_arm.yml <== -name: Debian 10 ARM -==> ./android_arm64.yml <== -name: Android ARM64 with vcpkg -==> ./code_style.yml <== -name: Code Style -==> ./manylinux_2014.yml <== -name: Manylinux 2014 -==> ./linux_arm64.yml <== -name: Linux ARM64 (Ubuntu 20.04, Python 3.11) -==> ./dev_cpu_linux_snippets_libxsmm.yml <== -name: Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04) -==> ./labeler.yml <== -name: "Pull Request Labeler" -==> ./mac_arm64.yml <== -name: macOS ARM64 (Python 3.11) -==> ./dependency_review.yml <== -name: 'Dependency Review' -==> ./fedora_29.yml <== -name: Fedora 29 (RHEL 8.4), Python 3.9 -==> ./code_snippets.yml <== -name: Code snippets -==> ./ubuntu_20.yml <== -name: Linux (Ubuntu 20.04, Python 3.9) -==> ./linux_riscv.yml <== -name: Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) -==> ./android_x64.yml <== -name: Android x64 -==> ./workflow_rerunner.yml <== -name: Rerun Workflow with Known Errors -==> ./linux_sanitizers.yml <== -name: Linux Sanitizers (Ubuntu 20.04, Python 3.9) -==> ./py_checks.yml <== -name: Python API Checks -==> ./webassembly.yml <== -name: Webassembly -==> ./ubuntu_24.yml <== -name: Linux (Ubuntu 24.04, Python 3.12) -==> ./assign_issue.yml <== -name: Take Issue -==> ./windows_vs2019_release.yml <== -name: Windows (VS 2019, Python 3.11, Release) -==> ./coverity.yml <== -name: Coverity (Ubuntu 20.04, Python 3.11) diff --git a/.gitignore b/.gitignore index 3ce289a4a2abf7..2815d16cf28392 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,17 @@ coverage .npm # Artifacts +/tools/mo/*.bin +/tools/mo/*.xml +/tools/mo/*.json +/tools/mo/*.so +/tools/mo/*.txt +/tools/mo/*.pb +/tools/mo/*.pbtxt +/tools/mo/!CMakeLists.txt +/tools/mo/*.mapping +/tools/mo/*.dat +/tools/mo/*.svg /src/plugins/intel_cpu/tools/commit_slider/*.json /src/plugins/intel_cpu/tools/commit_slider/slider_cache/* /src/plugins/intel_cpu/thirdparty/ComputeLibrary/build/* diff --git a/.gitmodules b/.gitmodules index 884ba551c9b269..5feb7458da1801 100644 --- a/.gitmodules +++ b/.gitmodules @@ -52,6 +52,9 @@ [submodule "thirdparty/onednn_gpu"] path = src/plugins/intel_gpu/thirdparty/onednn_gpu url = https://github.com/oneapi-src/oneDNN.git +[submodule "tools/pot/thirdparty/open_model_zoo"] + path = thirdparty/open_model_zoo + url = https://github.com/openvinotoolkit/open_model_zoo.git [submodule "thirdparty/json/nlohmann_json"] path = thirdparty/json/nlohmann_json url = https://github.com/nlohmann/json.git diff --git a/README.md b/README.md index 7e9b173530de61..695f84f1628118 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@
-

-Open-source software toolkit for optimizing and deploying deep learning models. -

- -

- DocumentationBlogKey FeaturesTutorialsIntegrationsBenchmarksGenerative AI -

- [![PyPI Status](https://badge.fury.io/py/openvino.svg)](https://badge.fury.io/py/openvino) [![Anaconda Status](https://anaconda.org/conda-forge/openvino/badges/version.svg)](https://anaconda.org/conda-forge/openvino) [![brew Status](https://img.shields.io/homebrew/v/openvino)](https://formulae.brew.sh/formula/openvino) @@ -18,14 +10,14 @@ Open-source software toolkit for optimizing and deploying deep learning models. [![brew Downloads](https://img.shields.io/homebrew/installs/dy/openvino)](https://formulae.brew.sh/formula/openvino)
+Welcome to OpenVINO™, an open-source software toolkit for optimizing and deploying deep learning models. - **Inference Optimization**: Boost deep learning performance in computer vision, automatic speech recognition, generative AI, natural language processing with large and small language models, and many other common tasks. -- **Flexible Model Support**: Use models trained with popular frameworks such as PyTorch, TensorFlow, ONNX, Keras, PaddlePaddle, and JAX/Flax. Directly integrate models built with transformers and diffusers from the Hugging Face Hub using Optimum Intel. Convert and deploy models without original frameworks. +- **Flexible Model Support**: Use models trained with popular frameworks such as TensorFlow, PyTorch, ONNX, Keras, and PaddlePaddle. Convert and deploy models without original frameworks. - **Broad Platform Compatibility**: Reduce resource demands and efficiently deploy on a range of platforms from edge to cloud. OpenVINO™ supports inference on CPU (x86, ARM), GPU (OpenCL capable, integrated and discrete) and AI accelerators (Intel NPU). - **Community and Ecosystem**: Join an active community contributing to the enhancement of deep learning performance across various domains. -Check out the [OpenVINO Cheat Sheet](https://docs.openvino.ai/2024/_static/download/OpenVINO_Quick_Start_Guide.pdf) and [Key Features](https://docs.openvino.ai/2024/about-openvino/key-features.html) for a quick reference. - +Check out the [OpenVINO Cheat Sheet](https://docs.openvino.ai/2024/_static/download/OpenVINO_Quick_Start_Guide.pdf) for a quick reference. ## Installation @@ -48,8 +40,6 @@ Learn how to optimize and deploy popular models with the [OpenVINO Notebooks](ht - [Multimodal assistant with LLaVa and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb) - [Automatic speech recognition using Whisper and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb) -Discover more examples in the [OpenVINO Samples (Python & C++)](https://docs.openvino.ai/2024/learn-openvino/openvino-samples.html) and [Notebooks (Python)](https://docs.openvino.ai/2024/learn-openvino/interactive-tutorials-python.html). - Here are easy-to-follow code examples demonstrating how to run PyTorch and TensorFlow model inference using OpenVINO: **PyTorch Model** @@ -96,43 +86,25 @@ data = np.random.rand(1, 224, 224, 3) output = compiled_model({0: data}) ``` -OpenVINO supports the CPU, GPU, and NPU [devices](https://docs.openvino.ai/2024/openvino-workflow/running-inference/inference-devices-and-modes.html) and works with models from PyTorch, TensorFlow, ONNX, TensorFlow Lite, PaddlePaddle, and JAX/Flax [frameworks](https://docs.openvino.ai/2024/openvino-workflow/model-preparation.html). It includes [APIs](https://docs.openvino.ai/2024/api/api_reference.html) in C++, Python, C, NodeJS, and offers the GenAI API for optimized model pipelines and performance. - -## Generative AI with OpenVINO - -Get started with the OpenVINO GenAI [installation](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html) and refer to the [detailed guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide/genai-guide.html) to explore the capabilities of Generative AI using OpenVINO. - -Learn how to run LLMs and GenAI with [Samples](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples) in the [OpenVINO™ GenAI repo](https://github.com/openvinotoolkit/openvino.genai). See GenAI in action with Jupyter notebooks: [LLM-powered Chatbot](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/README.md) and [LLM Instruction-following pipeline](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-question-answering/README.md). - -## Documentation - -[User documentation](https://docs.openvino.ai/) contains detailed information about OpenVINO and guides you from installation through optimizing and deploying models for your AI applications. - -[Developer documentation](./docs/dev/index.md) focuses on the OpenVINO architecture and describes [building](./docs/dev/build.md) and [contributing](./CONTRIBUTING.md) processes. +OpenVINO also supports CPU, GPU, and NPU devices and works with models in TensorFlow, PyTorch, ONNX, TensorFlow Lite, PaddlePaddle model formats. +With OpenVINO you can do automatic performance enhancements at runtime customized to your hardware (preserving model accuracy), including: +asynchronous execution, batch processing, tensor fusion, load balancing, dynamic inference parallelism, automatic BF16 conversion, and more. ## OpenVINO Ecosystem -### OpenVINO Tools - +- [🤗Optimum Intel](https://github.com/huggingface/optimum-intel) - a simple interface to optimize Transformers and Diffusers models. - [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) - advanced model optimization techniques including quantization, filter pruning, binarization, and sparsity. - [GenAI Repository](https://github.com/openvinotoolkit/openvino.genai) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers) - resources and tools for developing and optimizing Generative AI applications. - [OpenVINO™ Model Server (OVMS)](https://github.com/openvinotoolkit/model_server) - a scalable, high-performance solution for serving models optimized for Intel architectures. - [Intel® Geti™](https://geti.intel.com/) - an interactive video and image annotation tool for computer vision use cases. -### Integrations - -- [🤗Optimum Intel](https://github.com/huggingface/optimum-intel) - grab and use models leveraging OpenVINO within the Hugging Face API. -- [Torch.compile](https://docs.openvino.ai/2024/openvino-workflow/torch-compile.html) - use OpenVINO for Python-native applications by JIT-compiling code into optimized kernels. -- [OpenVINO LLMs inference and serving with vLLM​](https://docs.vllm.ai/en/stable/getting_started/openvino-installation.html) - enhance vLLM's fast and easy model serving with the OpenVINO backend. -- [OpenVINO Execution Provider for ONNX Runtime](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html) - use OpenVINO as a backend with your existing ONNX Runtime code. -- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/openvino/) - build context-augmented GenAI applications with the LlamaIndex framework and enhance runtime performance with OpenVINO. -- [LangChain](https://python.langchain.com/docs/integrations/llms/openvino/) - integrate OpenVINO with the LangChain framework to enhance runtime performance for GenAI applications. - Check out the [Awesome OpenVINO](https://github.com/openvinotoolkit/awesome-openvino) repository to discover a collection of community-made AI projects based on OpenVINO! -## Performance +## Documentation + +[User documentation](https://docs.openvino.ai/) contains detailed information about OpenVINO and guides you from installation through optimizing and deploying models for your AI applications. -Explore [OpenVINO Performance Benchmarks](https://docs.openvino.ai/2024/about-openvino/performance-benchmarks.html) to discover the optimal hardware configurations and plan your AI deployment based on verified data. +[Developer documentation](./docs/dev/index.md) focuses on how OpenVINO [components](./docs/dev/index.md#openvino-components) work and describes [building](./docs/dev/build.md) and [contributing](./CONTRIBUTING.md) processes. ## Contribution and Support @@ -146,8 +118,9 @@ You can ask questions and get support on: * The [`openvino`](https://stackoverflow.com/questions/tagged/openvino) tag on Stack Overflow\*. -## Resources +## Additional Resources +* [Product Page](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) * [Release Notes](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino.html) * [OpenVINO Blog](https://blog.openvino.ai/) * [OpenVINO™ toolkit on Medium](https://medium.com/@openvino) @@ -172,3 +145,4 @@ By contributing to the project, you agree to the license and copyright terms the --- \* Other names and brands may be claimed as the property of others. + diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index ae0bcde8793e5d..260e45b89778d0 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -4,6 +4,7 @@ cmake_policy(SET CMP0054 NEW) +# TODO: fix it, outside of source dir MO cannot find TBB dependency ov_set_temp_directory(TEMP "${CMAKE_SOURCE_DIR}") ## Intel OMP package diff --git a/cmake/developer_package/api_validator/api_validator.cmake b/cmake/developer_package/api_validator/api_validator.cmake index a2e7ab53052f84..ff6b0fbaa061e9 100644 --- a/cmake/developer_package/api_validator/api_validator.cmake +++ b/cmake/developer_package/api_validator/api_validator.cmake @@ -21,16 +21,16 @@ can't find Windows SDK version. Try to use vcvarsall.bat script") endif() endif() - # check that PROGRAMFILES_ENV is defined, because in case of cross-compilation for Windows we don't have such variable set(PROGRAMFILES_ENV "ProgramFiles\(X86\)") + + # check that PROGRAMFILES_ENV is defined, because in case of cross-compilation for Windows + # we don't have such variable if(DEFINED ENV{${PROGRAMFILES_ENV}}) file(TO_CMAKE_PATH $ENV{${PROGRAMFILES_ENV}} PROGRAMFILES) set(WDK_PATHS "${PROGRAMFILES}/Windows Kits/10/bin/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}/x64" "${PROGRAMFILES}/Windows Kits/10/bin/x64") - endif() - if(WDK_PATHS) message(STATUS "Trying to find apivalidator in: ") foreach(wdk_path IN LISTS WDK_PATHS) message(" * ${wdk_path}") @@ -90,10 +90,7 @@ endfunction() set(VALIDATED_TARGETS "" CACHE INTERNAL "") -# -# ov_add_api_validator_post_build_step(TARGET ) -# -function(ov_add_api_validator_post_build_step) +function(_ov_add_api_validator_post_build_step) if((NOT ONECORE_API_VALIDATOR) OR (WINDOWS_STORE OR WINDOWS_PHONE)) return() endif() @@ -215,3 +212,10 @@ function(ov_add_api_validator_post_build_step) list(APPEND VALIDATED_TARGETS ${API_VALIDATOR_TARGETS}) set(VALIDATED_TARGETS "${VALIDATED_TARGETS}" CACHE INTERNAL "" FORCE) endfunction() + +# +# ov_add_api_validator_post_build_step(TARGET ) +# +function(ov_add_api_validator_post_build_step) + _ov_add_api_validator_post_build_step(${ARGN}) +endfunction() diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index e70de45b9416b1..fdfd7211c8e815 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -4,7 +4,6 @@ include(ProcessorCount) include(CheckCXXCompilerFlag) -include(CheckCXXSourceCompiles) # # ov_disable_deprecated_warnings() @@ -92,50 +91,6 @@ macro(ov_dev_package_no_errors) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ov_c_cxx_dev_no_errors}") endmacro() -# -# ov_check_compiler_supports_sve(flags) -# -# Checks whether CXX compiler for passed language supports SVE code compilation -# -macro(ov_check_compiler_supports_sve flags) - # Code to compile - set(SVE_CODE " - #include - int main() { - svfloat64_t a; - a = svdup_n_f64(0); - return 0; - }") - - # Save the current state of required flags - set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) - - # Set the flags necessary for compiling the test code with SVE support - set(CMAKE_REQUIRED_FLAGS "${CMAKE_CXX_FLAGS_INIT} ${flags}") - - # Check if the source code compiles with the given flags for C++ - CHECK_CXX_SOURCE_COMPILES("${SVE_CODE}" CXX_HAS_SVE) - - # If the compilation test is successful, set appropriate variables indicating support - if(CXX_HAS_SVE) - set(CXX_SVE_FOUND TRUE CACHE BOOL "SVE available on host") - set(CXX_SVE_FOUND TRUE CACHE BOOL "CXX SVE support") - set(CXX_SVE_FLAGS "${flags}" CACHE STRING "CXX SVE flags") - endif() - - # Restore the original state of required flags - set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) - - # If the compilation test fails, indicate that the support is not found - if(NOT CXX_SVE_FOUND) - set(CXX_SVE_FOUND FALSE CACHE BOOL "CXX SVE support") - set(CXX_SVE_FLAGS "" CACHE STRING "CXX SVE flags") - endif() - - # Mark the variables as advanced to hide them in the default CMake GUI - mark_as_advanced(CXX_SVE_FOUND CXX_SVE_FLAGS) -endmacro() - # # ov_sse42_optimization_flags() # @@ -234,12 +189,12 @@ endmacro() # macro(ov_arm_neon_fp16_optimization_flags flags) if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID} for arm64 platform") + message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") elseif(ANDROID) if(ANDROID_ABI STREQUAL "arm64-v8a") set(${flags} -march=armv8.2-a+fp16 -Wno-unused-command-line-argument) else() - message(WARNING "ARM64 fp16 is not supported by Android armv7") + message(WARNING "fp16 is not supported by Android armv7") endif() elseif(AARCH64) set(${flags} -O2 -march=armv8.2-a+fp16) @@ -247,52 +202,9 @@ macro(ov_arm_neon_fp16_optimization_flags flags) list(APPEND ${flags} -ftree-vectorize) endif() elseif(ARM) - message(WARNING "ARM64 fp16 is not supported by 32-bit ARM") + message(WARNING "fp16 is not supported by 32-bit ARM") else() - message(WARNING "ARM64 fp16 is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}") - endif() -endmacro() - -# -# ov_arm_sve_optimization_flags() -# -macro(ov_arm_sve_optimization_flags flags) - # Check for compiler SVE support - ov_check_compiler_supports_sve("-march=armv8-a+sve") - - if(OV_COMPILER_IS_INTEL_LLVM) - message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # nothing should be required here - elseif(ANDROID) - if(ANDROID_ABI STREQUAL "arm64-v8a") - set(${flags} -Wno-unused-command-line-argument) - if(CXX_SVE_FOUND) - list(APPEND ${flags} -march=armv8-a+sve) - else() - message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}") - endif() - else() - message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}") - endif() - else() - if(AARCH64) - set(${flags} -O2) - - # Add flag for SVE if supported - if(CXX_SVE_FOUND) - list(APPEND ${flags} -march=armv8-a+sve) - endif() - if(NOT CMAKE_CL_64) - list(APPEND ${flags} -ftree-vectorize) - endif() - - set(${flags} ${${flags}}) - elseif(ARM) - message(WARNING "SVE is not supported on 32-bit ARM architectures.") - else() - message(WARNING "SVE is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}") - endif() + message(WARNING "fp16 is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}") endif() endmacro() @@ -390,7 +302,13 @@ endif() # to allows to override CMAKE_CXX_STANDARD from command line if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(CMAKE_CXX_STANDARD 14) + elseif(OV_COMPILER_IS_INTEL_LLVM) + set(CMAKE_CXX_STANDARD 17) + else() + set(CMAKE_CXX_STANDARD 11) + endif() endif() if(NOT DEFINED CMAKE_CXX_EXTENSIONS) @@ -449,12 +367,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # Build with multiple processes ov_add_compiler_flags(/MP) - # Workaround for an MSVC compiler issue in some versions of Visual Studio 2022. - # The issue involves a null dereference to a mutex. For details, refer to link https://github.com/microsoft/STL/wiki/Changelog#vs-2022-1710 - if(MSVC AND MSVC_VERSION GREATER_EQUAL 1930) - ov_add_compiler_flags(/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) - endif() - if(AARCH64 AND NOT MSVC_VERSION LESS 1930) # otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro ov_add_compiler_flags(/D_ARM64_DISTINCT_NEON_TYPES) diff --git a/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake b/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake index fc7f07ea9b6bfe..c33d64635eb10b 100644 --- a/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake +++ b/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake @@ -18,7 +18,6 @@ set(_CPU_CHECK_ANY "true") set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()") set(_CPU_CHECK_AVX "with_cpu_x86_avx()") set(_CPU_CHECK_NEON_FP16 "with_cpu_neon_fp16()") -set(_CPU_CHECK_SVE "with_cpu_sve()") set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()") set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()") @@ -40,16 +39,16 @@ function(_generate_dispatcher) ") - foreach(_namespace IN LISTS PARENT_NAMESPACES) + foreach(_namespace ${PARENT_NAMESPACES}) string(APPEND DISP_CONTENT "namespace ${_namespace} {\n") endforeach() - foreach(_func_name IN LISTS XARCH_FUNC_NAMES) + foreach(_func_name ${XARCH_FUNC_NAMES}) _find_signature_in_file(${XARCH_API_HEADER} ${_func_name} SIGNATURE) _generate_call_line_from_signature("${SIGNATURE}" CALL_LINE) - foreach(_arch IN LISTS XARCH_SET) + foreach(_arch ${XARCH_SET}) string(APPEND DISP_CONTENT "namespace ${_arch} {\n ${SIGNATURE}\; \n}\n") endforeach() @@ -57,7 +56,7 @@ function(_generate_dispatcher) string(APPEND DISP_CONTENT "namespace ${XARCH_CURRENT_NAMESPACE} {\n\n${SIGNATURE} {\n") - foreach(_arch IN LISTS XARCH_SET) + foreach(_arch ${XARCH_SET}) string(APPEND DISP_CONTENT " if (${_CPU_CHECK_${_arch}}) {\n return ${_arch}::${CALL_LINE}\;\n }\n") endforeach() @@ -65,7 +64,7 @@ function(_generate_dispatcher) string(APPEND DISP_CONTENT "}\n\n}\n") endforeach() - foreach(_namespace IN LISTS PARENT_NAMESPACES) + foreach(_namespace ${PARENT_NAMESPACES}) string(APPEND DISP_CONTENT "} // namespace ${_namespace}\n") endforeach() diff --git a/cmake/developer_package/cross_compile/cross_compiled_func.cmake b/cmake/developer_package/cross_compile/cross_compiled_func.cmake index 08e6e230189260..1e92fe3bfdaf8c 100644 --- a/cmake/developer_package/cross_compile/cross_compiled_func.cmake +++ b/cmake/developer_package/cross_compile/cross_compiled_func.cmake @@ -3,24 +3,14 @@ # ## list of available instruction sets -set(_AVAILABLE_ARCHS_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16 SVE) +set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16) -if(ENABLE_SVE) - list(APPEND _ENABLED_ARCHS_LIST SVE) -endif() -if(ENABLE_NEON_FP16) - list(APPEND _ENABLED_ARCHS_LIST NEON_FP16) -endif() -if(ENABLE_AVX512F) - list(APPEND _ENABLED_ARCHS_LIST AVX512F) -endif() -if(ENABLE_AVX2) - list(APPEND _ENABLED_ARCHS_LIST AVX2) -endif() -if(ENABLE_SSE42) - list(APPEND _ENABLED_ARCHS_LIST SSE42) -endif() -list(APPEND _ENABLED_ARCHS_LIST ANY) +set(_ACCEPTED_ARCHS_ANY "^(ANY)$") +set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$") +set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$") +set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$") +set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$") +set(_ACCEPTED_ARCHS_NEON_FP16 "^(ANY|NEON_FP16)$") ## Arch specific definitions set(_DEFINE_ANY "") @@ -29,24 +19,12 @@ set(_DEFINE_AVX "HAVE_AVX" ${_DEFINE_SSE42}) set(_DEFINE_AVX2 "HAVE_AVX2" ${_DEFINE_AVX}) set(_DEFINE_AVX512F "HAVE_AVX512F" ${_DEFINE_AVX2}) set(_DEFINE_NEON_FP16 "HAVE_NEON_FP16" ${_DEFINE_ANY}) -set(_DEFINE_SVE "HAVE_SVE" ${_DEFINE_SVE}) ## Arch specific compile options -if(ENABLE_AVX512F) - ov_avx512_optimization_flags(_FLAGS_AVX512F) -endif() -if(ENABLE_AVX2) - ov_avx2_optimization_flags(_FLAGS_AVX2) -endif() -if(ENABLE_SSE42) - ov_sse42_optimization_flags(_FLAGS_SSE42) -endif() -if(ENABLE_NEON_FP16) - ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16) -endif() -if(ENABLE_SVE) - ov_arm_sve_optimization_flags(_FLAGS_SVE) -endif() +ov_avx512_optimization_flags(_FLAGS_AVX512F) +ov_avx2_optimization_flags (_FLAGS_AVX2) +ov_sse42_optimization_flags (_FLAGS_SSE42) +ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16) set(_FLAGS_AVX "") ## TBD is not defined for OV project yet set(_FLAGS_ANY "") ## @@ -94,15 +72,18 @@ function(cross_compiled_file TARGET) message(FATAL_ERROR "Unknown argument: " ${X_UNPARSED_ARGUMENTS}) endif() if((NOT TARGET) OR (NOT X_NAME) OR (NOT X_NAMESPACE) OR (NOT X_API) OR (NOT X_ARCH)) - message(FATAL_ERROR "Missed arguments in 'cross_compiled_file'") + message(FATAL_ERROR "Missed arguments") endif() + _currently_requested_top_arch(TOP_ARCH) + set(_CURRENT_ARCH_FILTER "${_ACCEPTED_ARCHS_${TOP_ARCH}}") + ## format: ARCH1 ARCH2 ARCH3 ... - foreach(_it IN LISTS X_ARCH) - if(_it IN_LIST _AVAILABLE_ARCHS_LIST) + foreach(_it ${X_ARCH}) + if (_it IN_LIST _ARCH_LIST) ## that is arch ID set(_arch ${_it}) - if(_arch IN_LIST _ENABLED_ARCHS_LIST) + if(_arch MATCHES ${_CURRENT_ARCH_FILTER}) # make non/less-optimized version coming first list(INSERT _CUR_ARCH_SET 0 ${_arch}) list(APPEND _FULL_ARCH_SET ${_arch}) @@ -111,11 +92,8 @@ function(cross_compiled_file TARGET) ## that is source file name set(_src_name ${_it}) _remove_source_from_target(${TARGET} ${_src_name}) - - if(_CUR_ARCH_SET) - _clone_source_to_target(${TARGET} ${_src_name} "${_CUR_ARCH_SET}") - unset(_CUR_ARCH_SET) - endif() + _clone_source_to_target(${TARGET} ${_src_name} "${_CUR_ARCH_SET}") + set(_CUR_ARCH_SET "") endif() endforeach() @@ -200,6 +178,25 @@ function(_add_dispatcher_to_target TARGET HEADER FUNC_NAME NAMESPACE ARCH_SET) target_sources(${TARGET} PRIVATE ${DISPATCHER_SOURCE}) endfunction() +####################################### +# +# Return currently requested ARCH id +# +function(_currently_requested_top_arch VAR) + if(ENABLE_NEON_FP16) + set(RES NEON_FP16) + elseif(ENABLE_AVX512F) + set(RES AVX512F) + elseif(ENABLE_AVX2) + set(RES AVX2) + elseif(ENABLE_SSE42) + set(RES SSE42) + else() + set(RES ANY) + endif() + set (${VAR} "${RES}" PARENT_SCOPE) +endfunction() + ##################################### # # Utils to handle with cmake target diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake index 90ac7532fb4b28..8d1f3696c6759c 100644 --- a/cmake/developer_package/features.cmake +++ b/cmake/developer_package/features.cmake @@ -4,7 +4,6 @@ include(options) include(target_flags) -include(compile_flags/os_flags) if(WIN32) set (CPACK_GENERATOR "ZIP" CACHE STRING "Cpack generator for OpenVINO") @@ -42,8 +41,6 @@ ov_option (ENABLE_THREAD_SANITIZER "enable checking data races via ThreadSanitiz ov_dependent_option (ENABLE_COVERAGE "enable code coverage" OFF "CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG" OFF) -ov_dependent_option (ENABLE_API_VALIDATOR "Enables API Validator usage" ON "WIN32" OFF) - # Defines CPU capabilities ov_dependent_option (ENABLE_SSE42 "Enable SSE4.2 optimizations" ON "X86_64 OR (X86 AND NOT EMSCRIPTEN)" OFF) @@ -52,9 +49,7 @@ ov_dependent_option (ENABLE_AVX2 "Enable AVX2 optimizations" ON "X86_64 OR (X86 ov_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR (X86 AND NOT EMSCRIPTEN)" OFF) -ov_dependent_option (ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF) - -ov_dependent_option (ENABLE_SVE "Enable SVE optimizations" ON "AARCH64" OFF) +ov_dependent_option(ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF) # Type of build, we add this as an explicit option to default it to ON get_property(BUILD_SHARED_LIBS_DEFAULT GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS) @@ -109,11 +104,3 @@ if(ENABLE_AVX512F) set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE) endif() endif() - -if(ENABLE_SVE) - ov_check_compiler_supports_sve("-march=armv8-a+sve") - - if(NOT CXX_HAS_SVE) - set(ENABLE_SVE OFF CACHE BOOL "" FORCE) - endif() -endif() \ No newline at end of file diff --git a/cmake/features.cmake b/cmake/features.cmake index e5132be08707cb..f12810adf86075 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -208,6 +208,8 @@ ov_dependent_option(ENABLE_JS "Enables JS API building" ${ENABLE_JS_DEFAULT} "NO ov_option(ENABLE_OPENVINO_DEBUG "Enable output for OPENVINO_DEBUG statements" OFF) +ov_dependent_option (ENABLE_API_VALIDATOR "Enables API Validator usage" ON "WIN32" OFF) + if(NOT BUILD_SHARED_LIBS AND ENABLE_OV_TF_FRONTEND) set(FORCE_FRONTENDS_USE_PROTOBUF ON) else() diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index c82dca0364b463..59b312963c180d 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -99,7 +99,6 @@ macro(ov_cpack_settings) 2024.3.0 2024.4.0 2024.5.0 - 2024.6.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index 6e9d535d41cfff..a4a63c35858bf9 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -87,7 +87,6 @@ macro(ov_cpack_settings) 2024.3.0 2024.4.0 2024.5.0 - 2024.6.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in index 8fcd840f64132d..22eb432a97400a 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in @@ -2,10 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(OpenVINODeveloperPackage_FOUND) - return() -endif() - @PACKAGE_INIT@ include(CMakeFindDependencyMacro) diff --git a/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in index f1a02ebdff0177..be8fc1fa802e23 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in @@ -2,10 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(OpenVINODeveloperPackage_FOUND) - return() -endif() - @PACKAGE_INIT@ include(CMakeFindDependencyMacro) diff --git a/docs/RELEASE.MD b/docs/RELEASE.MD deleted file mode 100644 index b345431f3f2bcf..00000000000000 --- a/docs/RELEASE.MD +++ /dev/null @@ -1,29 +0,0 @@ -# OpenVINO Release Management -The process described below reflects the approach to managing OpenVINO releases. - -## Release Milestones -- Planning -- Execution (development of new features) -- Stabilization (Feature Freeze, Code Freeze milestones) -- Validation -- Distribution - -### Planning -This phase takes 2-4 weeks and involves scoping the backlog, prioritizing it, analyzing, and making commitments by developers for timelines specified by the release manager. - -### Execution (development of new features) -- [OpenVINO Contributing Guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md) -- [Code Contribution Guide](https://docs.openvino.ai/2024/about-openvino/contributing/code-contribution-guide.html) -- [OpenVINO First Good Issue](https://github.com/openvinotoolkit/openvino/issues/17502) - -### Stabilization (Feature Freeze, Code Freeze milestones) -- **Feature Freeze**: This milestone ensures that no new features are added to the software after a certain point. This allows the development team to focus on stabilizing and refining the existing features, fixing bugs, and improving performance without the risk of introducing new issues. -- **Code Freeze**: This milestone marks the point where no new code changes are allowed except for critical bug fixes. This helps in ensuring that the final product is stable and reliable, as it minimizes the risk of last-minute changes that could introduce new bugs or instability. - -### Release Validation -- This is a continuous process executed on a regular basis with cadence based on testing type: nightly, bi-weekly, weekly. -- After Code Freeze, the testing team can perform final regression testing to ensure that recent changes have not introduced new bugs and that the software meets the required quality standards. - -### Distribution -- OpenVINO has different types of build distribution: Regular releases, Long-Term Support, Pre-release releases, Nightly builds. Read more here: [OpenVINO Release Policy](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/release-policy.html) -- Different distribution channels are supported. Explore different options here: [OpenVINO Download](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) diff --git a/docs/articles_en/about-openvino/additional-resources/glossary.rst b/docs/articles_en/about-openvino/additional-resources/glossary.rst index 6120b0c9018a54..9aba2b395525c2 100644 --- a/docs/articles_en/about-openvino/additional-resources/glossary.rst +++ b/docs/articles_en/about-openvino/additional-resources/glossary.rst @@ -38,6 +38,7 @@ Acronyms and Abbreviations LRN Local Response Normalization mAP Mean Average Precision Intel® OneDNN Intel® OneAPI Deep Neural Network Library + `mo` Command-line tool for model conversion, CLI for ``tools.mo.convert_model`` (legacy) MVN Mean Variance Normalization NCDHW Number of images, Channels, Depth, Height, Width NCHW Number of images, Channels, Height, Width diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index 3bb46116ee1748..c80dc388568004 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -31,6 +31,11 @@ OpenVINO offers the option of running automated inference with the following inf | :doc:`Automatic Batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>`: | automatically groups inference requests to improve device utilization. +| :doc:`(LEGACY) Multi-device Inference <./../../documentation/legacy-features/multi-device>`: +| executes inference on multiple devices. Currently, this mode is considered a legacy + solution. Using Automatic Device Selection instead is advised. + + Feature Support and API Coverage ################################# @@ -47,6 +52,7 @@ Feature Support and API Coverage :doc:`Preprocessing acceleration <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` Yes Yes No :doc:`Stateful models <../../openvino-workflow/running-inference/stateful-models>` Yes Yes Yes :doc:`Extensibility <../../documentation/openvino-extensibility>` Yes Yes No + :doc:`(LEGACY) Multi-device execution <./../../documentation/legacy-features/multi-device>` Yes Yes Partial ======================================================================================================================================== ======= ========== =========== @@ -77,7 +83,7 @@ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). -.. dropdown:: Device support across OpenVINO 2024.6 distributions +.. dropdown:: Device support across OpenVINO 2024.5 distributions =============== ========== ====== =============== ======== ============ ========== ========== ========== Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst index f4ec275491fa32..d877cb1768d44d 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst @@ -6,14 +6,16 @@ models from OpenVINO-supported frameworks may also work properly but have not be **AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:** -.. data-table:: +.. raw:: html + + + + +.. csv-table:: :class: modeldata stripe :name: supportedModelsTable :header-rows: 1 :file: ../../_static/download/supported_models.csv - :data-column-hidden: [] - :data-order: [[ 0, "asc" ]] - :data-page-length: 10 | Marked cells indicate models that passed inference with no errors. Empty cells indicate diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst index 1bd8f5dae7c634..d27f7626391f46 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst @@ -41,36 +41,27 @@ Data as of OpenVINO 2024.4, 18 Oct. 2024. .. tab-item:: PyTorch - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: TensorFlow_ops_v1 + :name: TensorFlow ops :header-rows: 1 :file: ../../_static/conformance_files/pytorch_ops.csv - :data-column-hidden: [] - :data-order: [[ 0, "asc" ]] - :data-page-length: 10 .. tab-item:: TensorFlow - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: TensorFlow_ops_v2 + :name: TensorFlow ops :header-rows: 1 :file: ../../_static/conformance_files/tensorflow_ops.csv - :data-column-hidden: [] - :data-order: [[ 0, "asc" ]] - :data-page-length: 10 .. tab-item:: PaddlePaddle - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: Paddle_ops + :name: Paddle ops :header-rows: 1 :file: ../../_static/conformance_files/paddlepaddle_ops.csv - :data-column-hidden: [] - :data-order: [[ 0, "asc" ]] - :data-page-length: 10 .. tab-item:: ONNX diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 4262ec6b2b3732..8a58dc27df1f83 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -64,7 +64,7 @@ implemented in your solutions. Click the buttons below to see the chosen benchma :outline: :expand: - :material-regular:`bar_chart;1.4em` OVMS for GenAI + :material-regular:`bar_chart;1.4em` OVMS for GenAI (coming soon) @@ -132,21 +132,21 @@ For a listing of all platforms and configurations used for testing, refer to the .. grid-item:: - .. button-link:: ../_static/benchmarks_files/OV-2024.6-platform_list.pdf + .. button-link:: ../_static/benchmarks_files/OV-2024.5-platform_list.pdf :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Hardware Platforms [PDF] - .. button-link:: ../_static/benchmarks_files/OV-2024.6-system-info-detailed.xlsx + .. button-link:: ../_static/benchmarks_files/OV-2024.5-system-info-detailed.xlsx :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Configuration Details [XLSX] - .. button-link:: ../_static/benchmarks_files/OV-2024.6-Performance-Data.xlsx + .. button-link:: ../_static/benchmarks_files/OV-2024.5-Performance-Data.xlsx :color: primary :outline: :expand: @@ -160,10 +160,10 @@ For a listing of all platforms and configurations used for testing, refer to the **Disclaimers** * Intel® Distribution of OpenVINO™ toolkit performance results are based on release - 2024.6, as of December 18, 2024. + 2024.5, as of November 20, 2024. * OpenVINO Model Server performance results are based on release - 2024.5, as of November 20, 2024. + 2024.4, as of Sept. 30, 2024. The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 83581d465df92e..5697fcbf6e4d74 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -3,44 +3,42 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. -The current data is as of OpenVINO 2024.4, 20 Nov. 2024. +The current data is as of OpenVINO 2024.4, 24 Oct. 2024 The tables below list the key performance indicators for inference on built-in GPUs. +.. raw:: html + + + + .. tab-set:: .. tab-item:: 9-288V - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: supportedModelsTable_V1 + :name: supportedModelsTableOv :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_9-288V.csv - :data-column-hidden: [3,4,6] - :data-order: [[ 0, "asc" ]] - :data-page-length: 10 .. tab-item:: 7-268V - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: supportedModelsTable_V2 + :name: supportedModelsTableOv :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_7-258V.csv - :data-column-hidden: [3,4,6] - :data-order: [[ 0, "asc" ]] .. tab-item:: 7-155H - .. data-table:: + .. csv-table:: :class: modeldata stripe - :name: supportedModelsTable_V3 + :name: supportedModelsTableOv :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_7-155H.csv - :data-column-hidden: [3,4,6] - :data-order: [[ 0, "asc" ]] .. grid:: 1 1 2 2 diff --git a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst index 9ba82690b00395..936f1145a6b3b0 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst @@ -103,7 +103,7 @@ General considerations Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. For more information, refer to - :doc:`Preprocessing API <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. + :doc:`Embedding Pre-processing <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` and :doc:`General Runtime Optimizations <../../openvino-workflow/running-inference/optimize-inference/general-optimizations>`. @@ -192,7 +192,7 @@ execution breakdown. For example, the table below is part of performance counters for :doc:`CPU inference <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. -of a TensorFlow implementation of ResNet-50. +of a `TensorFlow implementation of ResNet-50 `__ Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time layers are the same. Information about layer precision is also stored in the performance counters. diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index 78c947fb64cb07..e87733a1445356 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -1,12 +1,10 @@ Model Accuracy ============== - - -The following two tables present the absolute accuracy drop calculated as the accuracy difference -between OV-accuracy and the original frame work accuracy for FP32, and the same for INT8, BF16 and -FP16 representations of a model on three platform architectures. The third table presents the GenAI model accuracies as absolute accuracy values. Please also refer to notes below -the table for more information. +between OV-accuracy and the original framework accuracy for FP32, and the same for INT8, BF16, +and FP16 representations of a model on three platform architectures. The third table presents +the GenAI model accuracies as absolute accuracy values. Refer to notes below the table for more +information. * A - Intel® Core™ i9-9000K (AVX2), INT8 and FP32 * B - Intel® Xeon® 6338, (VNNI), INT8 and FP32 @@ -34,31 +32,31 @@ the table for more information. * - efficientdet-d0 - COCO2017_detection_91cl - coco_precision - - - + - -0.84% + - -0.59% - -0.59% - - - -0.55% * - mask_rcnn_resnet50_atrous_coco - COCO2017_detection_91cl_bkgr - coco_orig_precision - -0.10% - -0.04% - - + - 0.07% - -0.01% * - mobilenet-v2 - ImageNet2012 - accuracy @ top1 - - + - - -0.97% - -0.98% - -0.95% * - resnet-50 - ImageNet2012 - accuracy @ top1 - - - - 0.97% - - 0.94% - - 0.95% + - 0.74% + - 0.76% + - 0.74% + - 0.82% * - ssd-resnet34-1200 - COCO2017_detection_80cl_bkgr - map @@ -69,17 +67,18 @@ the table for more information. * - ssd-mobilenet-v1-coco - COCO2017_detection_80cl_bkgr - coco-precision - - + - -2.94% + - -0.28% - -0.28% - - - -0.26% * - yolo_v8n - COCO2017_detection_80cl - map - - -0.11% - - -0.05% - - - - + - -0.01% + - -0.04% + - -0.07% + - 0.05% + .. list-table:: Model Accuracy for BF16, FP32 and FP16 (FP16: Flex-170 only. BF16: Xeon(R) 8480+ only) :header-rows: 1 @@ -102,15 +101,15 @@ the table for more information. * - efficientdet-d0 - COCO2017_detection_91cl - coco_precision - - - 0.01% - - 0.00% - 0.01% + - 0.01% + - 0.00% - 0.00% * - mask_rcnn_resnet50_atrous_coco - COCO2017_detection_91cl_bkgr - coco_orig_precision - - + - -0.01% - -0.01% - -0.01% - 0.05% @@ -136,8 +135,8 @@ the table for more information. - map - 0.02% - 0.02% - - 0.01% - 0.02% + - -0.01% - 0.02% * - ssd-mobilenet-v1-coco - COCO2017_detection_80cl_bkgr @@ -155,9 +154,10 @@ the table for more information. - 0.01% - 0.05% - 0.00% + .. list-table:: Model Accuracy for VNNI-FP16, VNNI-INT4, AMX-FP16 and MTL-INT4 (Core Ultra iGPU) :header-rows: 1 - + * - OpenVINO™ Model name - dataset - Metric Name @@ -168,59 +168,59 @@ the table for more information. * - chatGLM4 - Wikiset - ppl - - - - - - - - + - + - + - + - * - Gemma-2-9B - Wikitext - ppl - - + - - 1.57 - 1.57 - - + - * - Llama-2-7b-chat - Wikiset - ppl - - - - 1.59 + - + - - 1.59 - - + - * - Llama-3-8b - Wikiset - ppl - 1.45 - 1.48 - 1.45 - - + - * - Llama-3.2-3b-instruct - Wikiset - ppl - 1.60 - 1.62 - - 1.62 - - + - 1.17 + - * - Mistral-7b - Wikitext - ppl - 1.48 - 1.49 - 1.48 - - + - * - Phi3-mini-4k-instruct - Wikitext - ppl + - 1.52 - 1.55 - - 1.55 - - 1.55 - - + - 1.52 + - 1.56 * - Qwen-2-7B - Wikitext - ppl - 1.52 - 1.53 - 1.52 - - + - 1.56 Notes: For all accuracy metrics a "-", (minus sign), indicates an accuracy drop. For perplexity (ppl) the values do not indicate a deviation from a reference but are the actual measured diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index 5495711bc0054a..0f70c93e9c8b96 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -15,7 +15,13 @@ Performance Information F.A.Q. .. dropdown:: Where can I find the models used in the performance benchmarks? - All models used are published on `Hugging Face `__. + All models used are included in the GitHub repository of + :doc:`Open Model Zoo <../../documentation/legacy-features/model-zoo>`. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. .. dropdown:: Will there be any new models added to the list used for benchmarking? @@ -29,7 +35,7 @@ Performance Information F.A.Q. open-source tool within the Intel® Distribution of OpenVINO™ toolkit called :doc:`benchmark_app <../../learn-openvino/openvino-samples/benchmark-tool>`. - For diffusers (Stable-Diffusion) and foundational models (aka LLMs) please use the OpenVINO GenAI + For diffusers (Stable-Diffusion) and foundational models (aka LLMs) please use the OpenVINO GenAI opensource repo `OpenVINO GenAI tools/llm_bench `__ For a simple instruction on testing performance, see the :doc:`Getting Performance Numbers Guide `. @@ -87,6 +93,30 @@ Performance Information F.A.Q. - BERT - question / answer - 128 + * - `efficientdet-d0 `__ + - Efficientdet + - classification + - 512x512 + * - `mask_rcnn_resnet50_atrous_coco `__ + - Mask R-CNN ResNet 50 Atrous + - object instance segmentation + - 800x1365 + * - `mobilenet-v2 `__ + - Mobilenet V2 PyTorch + - classification + - 224x224 + * - `resnet-50 `__ + - ResNet-50_v1_ILSVRC-2012 + - classification + - 224x224 + * - `ssd-mobilenet-v1-coco `__ + - ssd-mobilenet-V1-coco onnx model + - object detection + - 300x300 + * - `ssd-resnet34-1200-onnx `__ + - ssd-resnet34 onnx model + - object detection + - 1200x1200 * - `yolov8n `__ - Yolov8nano - object detection diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index bf475159380dff..343c9e780f05dc 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -16,408 +16,359 @@ OpenVINO Release Notes -2024.6 - 18 December 2024 +2024.5 - 20 November 2024 ############################# :doc:`System Requirements <./release-notes-openvino/system-requirements>` | :doc:`Release policy <./release-notes-openvino/release-policy>` | :doc:`Installation Guides <./../get-started/install-openvino>` + + What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* OpenVINO 2024.6 release includes updates for enhanced stability and improved LLM performance. -* Introduced support for Intel® Arc™ B-Series Graphics (formerly known as Battlemage). -* Implemented optimizations to improve the inference time and LLM performance on NPUs. -* Improved LLM performance with GenAI API optimizations and bug fixes. +* More GenAI coverage and framework integrations to minimize code changes. + + * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. + * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 + Mini-Instruct. + * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. + * Preview: support for Flax, a high-performance Python neural network library based on JAX. + Its modular design allows for easy customization and accelerated inference on GPUs. + +* Broader Large Language Model (LLM) support and more model compression techniques. + + * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ + Graphics include KV Cache compression for memory reduction along with improved usability, + and model load time optimizations to improve first token latency for LLMs. + * Dynamic quantization was enabled to improve first token latency for LLMs on built-in + Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second + token latency will also improve for large batch inference. + * A new method to generate synthetic text data is implemented in the Neural Network + Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using + data-aware methods without datasets. Coming soon: This feature will soon be accessible via + Optimum Intel on Hugging Face. + +* More portability and performance to run AI at the edge, in the cloud, or locally. + + * Support for + `Intel® Xeon® 6 Processors with P-cores `__ + (formerly codenamed Granite Rapids) and + `Intel® Core™ Ultra 200V series processors `__ + (formerly codenamed Arrow Lake-S). + * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines + for improved contextual awareness, transcription pipelines for easy audio-to-text + conversions, and image generation pipelines for streamlined text-to-visual conversions. + * Speculative decoding feature added to the GenAI API for improved performance and efficient + text generation using a small draft model that is periodically corrected by the full-size + model. + * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and + efficiently customize image and text generation models for specialized tasks. + * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the + target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and + whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + +Now deprecated +----------------------------------------------------------------------------------------------- +* Python 3.8 is no longer supported: OpenVINO™ Runtime +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -CPU Device Plugin +Common ----------------------------------------------------------------------------------------------- -* KV cache now uses asymmetric 8-bit unsigned integer (U8) as the default precision, reducing - memory stress for LLMs and increasing their performance. This option can be controlled by - model meta data. -* Quality and accuracy has been improved for selected models with several bug fixes. +* Numpy 2.x has been adopted for all currently supported components, including NNCF. +* A new constant constructor has been added, enabling constants to be created from data pointer + as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding + a two-step process to wrap memory into ``ov::Tensor``. +* Files are now read via the async ReadFile API, reducing the bottleneck for LLM model load + times on GPU. +* CPU implementation of SliceScatter operator is now available, used for models such as Gemma, + supporting increased LLM performance. -GPU Device Plugin ------------------------------------------------------------------------------------------------ -* Device memory copy optimizations have been introduced for inference with **Intel® Arc™ B-Series - Graphics** (formerly known as Battlemage). Since it does not utilize L2 cache for copying memory - between the device and host, a dedicated `copy` operation is used, if inputs or results are - not expected in the device memory. -* ChatGLM4 inference on GPU has been optimized. - -NPU Device Plugin +CPU Device Plugin ----------------------------------------------------------------------------------------------- -* LLM performance and inference time has been improved with memory optimizations. - - - - - -OpenVINO.GenAI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -* The encrypted_model_causal_lm sample is now available, showing how to decrypt a model. - - - - -Other Changes and Known Issues -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Jupyter Notebooks ------------------------------ - -* `Visual-language assistant with GLM-Edge-V and OpenVINO `__ -* `Local AI and OpenVINO `__ -* `Multimodal understanding and generation with Janus and OpenVINO `__ - - - - - - - - - - - - -Previous 2024 releases -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. dropdown:: 2024.5 - 20 November 2024 - :animate: fade-in-slide-down - :color: secondary - - **What's new** - - * More GenAI coverage and framework integrations to minimize code changes. - - * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. - * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 - Mini-Instruct. - * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. - * Preview: support for Flax, a high-performance Python neural network library based on JAX. - Its modular design allows for easy customization and accelerated inference on GPUs. - - * Broader Large Language Model (LLM) support and more model compression techniques. - - * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ - Graphics include KV Cache compression for memory reduction along with improved usability, - and model load time optimizations to improve first token latency for LLMs. - * Dynamic quantization was enabled to improve first token latency for LLMs on built-in - Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second - token latency will also improve for large batch inference. - * A new method to generate synthetic text data is implemented in the Neural Network - Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using - data-aware methods without datasets. Coming soon: This feature will soon be accessible via - Optimum Intel on Hugging Face. - - * More portability and performance to run AI at the edge, in the cloud, or locally. - - * Support for - `Intel® Xeon® 6 Processors with P-cores `__ - (formerly codenamed Granite Rapids) and - `Intel® Core™ Ultra 200V series processors `__ - (formerly codenamed Arrow Lake-S). - * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines - for improved contextual awareness, transcription pipelines for easy audio-to-text - conversions, and image generation pipelines for streamlined text-to-visual conversions. - * Speculative decoding feature added to the GenAI API for improved performance and efficient - text generation using a small draft model that is periodically corrected by the full-size - model. - * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and - efficiently customize image and text generation models for specialized tasks. - * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the - target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and - whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. - - *Now deprecated* - - * Python 3.8 is no longer supported: - +* Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) + has been reached. +* Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has + been implemented. +* LLM performance has been further improved with Rotary Position Embedding optimization; Query, + Key, and Value; and multi-layer perceptron fusion optimization. +* FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via + both native APIs and the vLLM integration. +* Models with LoRA adapters are now supported. - **OpenVINO™ Runtime** - - *Common* - - * Numpy 2.x has been adopted for all currently supported components, including NNCF. - * A new constant constructor has been added, enabling constants to be created from data pointer - as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding - a two-step process to wrap memory into ``ov::Tensor``. - * Asynchronous file reading with mmap library has been implemented, reducing loading times for - model files, especially for LLMs. - * CPU implementation of SliceScatter operator is now available, used for models such as Gemma, - supporting increased LLM performance. - - - *CPU Device Plugin* - - * Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) - has been reached. - * Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has - been implemented. - * LLM performance has been further improved with Rotary Position Embedding optimization; Query, - Key, and Value; and multi-layer perceptron fusion optimization. - * FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via - both native APIs and the vLLM integration. - * Models with LoRA adapters are now supported. +GPU Device Plugin +----------------------------------------------------------------------------------------------- - *GPU Device Plugin* - - * The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a - significant reduction in memory consumption, increasing performance with a minimal impact to - accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is - activated by default for non-systolic devices. - * LoRA adapters are now functionally supported on GPU. - * A new feature of GPU weightless blob caching enables caching model structure only and reusing - the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. - * Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by - default on Intel® Core™ Ultra platforms, improving LLM first token latency. - - - *NPU Device Plugin* - - * Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin - releases the cached model (blob) after weights are loaded in NPU regions. Model export is not - available in this scenario. Memory consumption is reduced during inference execution with one - blob size. This optimization requires the latest NPU driver: 32.0.100.3104. - * A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now - report 2GB as the maximum allocatable memory for any driver that does not support graph - extension 1.8. Even if older drivers report a larger amount of memory to be available, memory - allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes - for any driver that supports graph extension 1.8 (or newer). - * A new API is used to initialize the model (available in graph extension 1.8). - * Inference request set_tensors is now supported. - * ``ov::device::LUID`` is now exposed on Windows. - * LLM-related improvements have been implemented in terms of both memory usage and performance. - * AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. - - * NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may - be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B - (exceeding 4B parameters) with prompt sizes over 1024 tokens. +* The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a + significant reduction in memory consumption, increasing performance with a minimal impact to + accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is + activated by default for non-systolic devices. +* LoRA adapters are now functionally supported on GPU. +* A new feature of GPU weightless blob caching enables caching model structure only and reusing + the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. +* Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by + default on Intel® Core™ Ultra platforms, improving LLM first token latency. - *OpenVINO Python API* +NPU Device Plugin +----------------------------------------------------------------------------------------------- - * Constant now can be created from openvino.Tensor. - * The “release_memory” method has been added for a compiled model, improving control over - memory consumption. +* Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin + releases the cached model (blob) after weights are loaded in NPU regions. Model export is not + available in this scenario. Memory consumption is reduced during inference execution with one + blob size. This optimization requires the latest NPU driver: 32.0.100.3104. +* A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now + report 2GB as the maximum allocatable memory for any driver that does not support graph + extension 1.8. Even if older drivers report a larger amount of memory to be available, memory + allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes + for any driver that supports graph extension 1.8 (or newer). +* A new API is used to initialize the model (available in graph extension 1.8). +* Inference request set_tensors is now supported. +* ``ov::device::LUID`` is now exposed on Windows. +* LLM-related improvements have been implemented in terms of both memory usage and performance. +* AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. + +* NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may + be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B + (exceeding 4B parameters) with prompt sizes over 1024 tokens. + + +OpenVINO Python API +----------------------------------------------------------------------------------------------- +* Constant now can be created from openvino.Tensor. +* The “release_memory” method has been added for a compiled model, improving control over + memory consumption. - *OpenVINO Node.js API* - * Querying the best device to perform inference of a model with specific operations - is now available in JavaScript API. - * Contribution guidelines have been improved to make it easier for developers to contribute. - * Testing scope has been extended by inference in end-to-end tests. - * JavaScript API samples have been improved for readability and ease of running. +OpenVINO Node.js API +----------------------------------------------------------------------------------------------- +* Querying the best device to perform inference of a model with specific operations + is now available in JavaScript API. +* Contribution guidelines have been improved to make it easier for developers to contribute. +* Testing scope has been extended by inference in end-to-end tests. +* JavaScript API samples have been improved for readability and ease of running. - *TensorFlow Framework Support* - * TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python - versions have been added to validation. - * Out-of-the-box conversion with static ranks has been improved by devising a new shape for - Switch-Merge condition sub-graphs. - * Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, - ScatterNd, Sub. - * The following issues have been fixed: +TensorFlow Framework Support +----------------------------------------------------------------------------------------------- - * the corner case with one element in LinSpace to avoid division by zero, - * support FP16 and FP64 input types for LeakyRelu, - * support non-i32/i64 output index type for ArgMin/Max operations. +* TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python + versions have been added to validation. +* Out-of-the-box conversion with static ranks has been improved by devising a new shape for + Switch-Merge condition sub-graphs. +* Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, + ScatterNd, Sub. +* The following issues have been fixed: + * the corner case with one element in LinSpace to avoid division by zero, + * support FP16 and FP64 input types for LeakyRelu, + * support non-i32/i64 output index type for ArgMin/Max operations. - *PyTorch Framework Support* - * PyTorch version 2.5 is now supported. - * OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. - * The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, - helping to support the HF Stable Diffusion model in ExportedProgram format. +PyTorch Framework Support +----------------------------------------------------------------------------------------------- +* PyTorch version 2.5 is now supported. +* OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. +* The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, + helping to support the HF Stable Diffusion model in ExportedProgram format. - *ONNX Framework Support* - * ONNX version 1.17.0 is now used. - * Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and - com.microsoft.QuickGelu operations are now supported. +ONNX Framework Support +----------------------------------------------------------------------------------------------- - *JAX/Flax Framework Support* +* ONNX version 1.17.0 is now used. +* Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and + com.microsoft.QuickGelu operations are now supported. - * JAX 0.4.35 and Flax 0.10.0 has been added to validation. - * jax._src.core.ClosedJaxpr object conversion is now supported. - * Vision Transformer from google-research/vision_transformer is now supported - (with support for 37 new operations). +JAX/Flax Framework Support +----------------------------------------------------------------------------------------------- +* JAX 0.4.35 and Flax 0.10.0 has been added to validation. +* jax._src.core.ClosedJaxpr object conversion is now supported. +* Vision Transformer from google-research/vision_transformer is now supported + (with support for 37 new operations). - **OpenVINO Model Server** - * The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building - block for AI applications like RAG. - `(read more) `__ - * The rerank endpoint has been added based on Cohere API, enabling easy similarity detection - between a query and a set of documents. It is one of the building blocks for AI applications - like RAG and makes integration with frameworks such as langchain easy. - `(read more) `__ - * The following improvements have been done to LLM text generation: +OpenVINO Model Server ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint - is now supported. - * Performance has been increased on both CPU and GPU. - * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. - * Testing coverage and stability has been improved. - * The procedure for service deployment and model repository preparation has been simplified. +* The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building + block for AI applications like RAG. + `(read more) `__ +* The rerank endpoint has been added based on Cohere API, enabling easy similarity detection + between a query and a set of documents. It is one of the building blocks for AI applications + like RAG and makes integration with frameworks such as langchain easy. + `(read more) `__ +* The following improvements have been done to LLM text generation: + + * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint + is now supported. + * Performance has been increased on both CPU and GPU. + * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. + * Testing coverage and stability has been improved. + * The procedure for service deployment and model repository preparation has been simplified. + +* An experimental version of a Windows binary package - native model server for Windows OS - is + available. This release includes a set of limitations and has limited tests coverage. It is + intended for testing, while the production-ready release is expected with 2025.0. All feedback + is welcome. + + +Neural Network Compression Framework ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * An experimental version of a Windows binary package - native model server for Windows OS - is - available. This release includes a set of limitations and has limited tests coverage. It is - intended for testing, while the production-ready release is expected with 2025.0. All feedback - is welcome. +* A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset + for LLM compression. This approach helps to compress LLMs more accurately in situations when + the dataset is not available or not sufficient. + `See our example `__ + for more information about the usage. +* Support of data-free and data-aware weight compression methods - nncf.compress_weights() - + has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and + faster on NPU. +* Caching of computed statistics in nncf.compress_weights() is now available, significantly + reducing compression time when performing compression of the same LLM multiple times, with + different compression parameters. To enable it, set the advanced ``statistics_path`` parameter + of nncf.compress_weights() to the desired file path location. +* The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for + specifying the data type for embeddings, convolutions, and last linear layers during 4-bit + weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains + the original floating-point precision of the model weights). In certain situations, + non-default value might give better accuracy of compressed LLMs. +* Preview support is now available for optimizing models in Torch + `FX format `__, nncf.quantize(), and + nncf.compress_weights() methods. After optimization such models can be directly executed + via torch.compile(compressed_model, backend="openvino"). For more details, see + `INT8 quantization example `__. +* Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has + been reduced significantly, with some variation depending on the model and method. +* Support for the following has changed: + + * NumPy 2 added + * PyTorch upgraded to 2.5.1 + * ONNX upgraded to 1.17 + * Python 3.8 discontinued + + + +OpenVINO Tokenizers ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +* Several operations have been introduced and optimized. +* Conversion parameters and environment info have been added to ``rt_info``, improving + reproducibility and debugging. - **Neural Network Compression Framework** - * A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset - for LLM compression. This approach helps to compress LLMs more accurately in situations when - the dataset is not available or not sufficient. - `See our example `__ - for more information about the usage. - * Support of data-free and data-aware weight compression methods - nncf.compress_weights() - - has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and - faster on NPU. - * Caching of computed statistics in nncf.compress_weights() is now available, significantly - reducing compression time when performing compression of the same LLM multiple times, with - different compression parameters. To enable it, set the advanced ``statistics_path`` parameter - of nncf.compress_weights() to the desired file path location. - * The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for - specifying the data type for embeddings, convolutions, and last linear layers during 4-bit - weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains - the original floating-point precision of the model weights). In certain situations, - non-default value might give better accuracy of compressed LLMs. - * Preview support is now available for optimizing models in Torch - `FX format `__, nncf.quantize(), and - nncf.compress_weights() methods. After optimization such models can be directly executed - via torch.compile(compressed_model, backend="openvino"). For more details, see - `INT8 quantization example `__. - * Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has - been reduced significantly, with some variation depending on the model and method. - * Support for the following has changed: - - * NumPy 2 added - * PyTorch upgraded to 2.5.1 - * ONNX upgraded to 1.17 - * Python 3.8 discontinued +OpenVINO.GenAI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +* The following has been added: - **OpenVINO Tokenizers** + * LoRA adapter for the LLMPipeline. + * Text2ImagePipeline with LoRA adapter and text2image samples. + * VLMPipeline and visual_language_chat sample for text generation models with text and image + inputs. + * WhisperPipeline and whisper_speech_recognition sample. - * Several operations have been introduced and optimized. - * Conversion parameters and environment info have been added to ``rt_info``, improving - reproducibility and debugging. +* speculative_decoding_lm has been moved to LLMPipeline based implementation and is now + installed as part of the package. +* On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, + whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. - **OpenVINO.GenAI** - * The following has been added: - * LoRA adapter for the LLMPipeline. - * Text2ImagePipeline with LoRA adapter and text2image samples. - * VLMPipeline and visual_language_chat sample for text generation models with text and image - inputs. - * WhisperPipeline and whisper_speech_recognition sample. +Other Changes and Known Issues ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - * speculative_decoding_lm has been moved to LLMPipeline based implementation and is now - installed as part of the package. - * On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, - whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. +Jupyter Notebooks +----------------------------- +* `Text-to-Image generation using OpenVINO GenAI `__ +* `Multi LoRA Image Generation `__ +* `Virtual Try-on using OpenVINO and CatVTON `__ +* `Visual Language Assistant using OpenVINO GenAI `__ +* `Speech recognition using OpenVINO GenAI `__ +* `YoloV11 `__ +* `Llama-3.2-vision `__ +* `Pixtral `__ +* `Segment Anything 2 `__ +* `Video Lips-sync using Wav2Lip `__ +* `Convert JAX to OpenVINO tutorial `__ +Known Issues +----------------------------- +| **Component: CPU Plugin** +| ID: 155898 +| Description: +| Description: When using new version of Transformer version to convert some of LLMs + (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th + generation of Intel® Xeon® processors, due to model structure update triggering inference + precision difference in part of the model. The workaround is to use transformer version of + 4.44.2 or lower. - **Other Changes and Known Issues** +| **Component: GPU Plugin** +| ID: 154583 +| Description: +| LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When + facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR + with a value of 8.0 in the compile_model() function. From the next release, scaling factor + value will be automatically applied through updated IR. - *Jupyter Notebooks* +| **Component: GenAI** +| ID: 156437, 148933 +| Description: +| When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the + error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) + initialization routine failed.” It is due to the ONNX dependency issue + `onnx/onnx#6267 `__, + Install + `Microsoft Visual C++ Redistributable `__ + latest supported downloads to fix the issue. - * `Text-to-Image generation using OpenVINO GenAI `__ - * `Multi LoRA Image Generation `__ - * `Virtual Try-on using OpenVINO and CatVTON `__ - * `Visual Language Assistant using OpenVINO GenAI `__ - * `Speech recognition using OpenVINO GenAI `__ - * `YoloV11 `__ - * `Llama-3.2-vision `__ - * `Pixtral `__ - * `Segment Anything 2 `__ - * `Video Lips-sync using Wav2Lip `__ - * `Convert JAX to OpenVINO tutorial `__ +| **Component: GenAI** +| ID: 156944 +| Description: +| There were backward incompatible changes resulting in different text generated by LLMs like + Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a + tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert + tokenizer and detokenizer models using the latest openvino_tokenizers. - *Known Issues* - | **Component: CPU Plugin** - | ID: 155898 - | Description: - | Description: When using new version of Transformer version to convert some of LLMs - (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th - generation of Intel® Xeon® processors, due to model structure update triggering inference - precision difference in part of the model. The workaround is to use transformer version of - 4.44.2 or lower. - - | **Component: GPU Plugin** - | ID: 154583 - | Description: - | LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When - facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR - with a value of 8.0 in the compile_model() function. From the next release, scaling factor - value will be automatically applied through updated IR. - | **Component: GenAI** - | ID: 156437, 148933 - | Description: - | When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the - error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) - initialization routine failed.” It is due to the ONNX dependency issue - `onnx/onnx#6267 `__, - Install - `Microsoft Visual C++ Redistributable `__ - latest supported downloads to fix the issue. - - | **Component: GenAI** - | ID: 156944 - | Description: - | There were backward incompatible changes resulting in different text generated by LLMs like - Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a - tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert - tokenizer and detokenizer models using the latest openvino_tokenizers. +Previous 2024 releases ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -521,20 +472,20 @@ Previous 2024 releases *NPU Device Plugin* - * `Remote Tensor API `__ - is now supported. - * You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a - specific number of tiles to be used by the model, per inference request - (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles - selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile - number other than 1 may be a problem for cross platform compatibility, if not tested - explicitly versus the max_tiles value. - * You can now bypass the model caching mechanism in the driver - (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. - * Memory footprint at model execution has been reduced by one blob (compiled model) size. - For execution, the plugin no longer retrieves the compiled model from the driver, it uses the - level zero graph handle directly, instead. The compiled model is now retrieved from the driver - only during the export method. +* `Remote Tensor API `__ + is now supported. +* You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a + specific number of tiles to be used by the model, per inference request + (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles + selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile + number other than 1 may be a problem for cross platform compatibility, if not tested + explicitly versus the max_tiles value. +* You can now bypass the model caching mechanism in the driver + (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. +* Memory footprint at model execution has been reduced by one blob (compiled model) size. + For execution, the plugin no longer retrieves the compiled model from the driver, it uses the + level zero graph handle directly, instead. The compiled model is now retrieved from the driver + only during the export method. *OpenVINO Python API* @@ -641,7 +592,7 @@ Previous 2024 releases * New samples and pipelines are now available: * An example IterableStreamer implementation in - `multinomial_causal_lm/python sample `__ + `multinomial_causal_lm/python sample `__ * GenAI compilation is now available as part of OpenVINO via the –DOPENVINO_EXTRA_MODULES CMake option. @@ -1669,7 +1620,7 @@ Deprecation And Support Using deprecated features and components is not advised. They are available to enable a smooth transition to new solutions and will be discontinued in the future. To keep using discontinued features, you will have to revert to the last LTS OpenVINO version supporting them. -For more details, refer to the `OpenVINO Legacy Features and Components __` +For more details, refer to the :doc:`OpenVINO Legacy Features and Components <../documentation/legacy-features>` page. Discontinued in 2024 @@ -1727,7 +1678,7 @@ Deprecated and to be removed in the future * Model Optimizer will be discontinued with OpenVINO 2025.0. Consider using the :doc:`new conversion methods <../openvino-workflow/model-preparation/convert-model-to-ir>` instead. For more details, see the - `model conversion transition guide `__. + :doc:`model conversion transition guide <../documentation/legacy-features/transition-legacy-conversion-api>`. * OpenVINO property Affinity API will be discontinued with OpenVINO 2025.0. It will be replaced with CPU binding configurations (``ov::hint::enable_cpu_pinning``). * OpenVINO Model Server components: @@ -1735,9 +1686,6 @@ Deprecated and to be removed in the future * “auto shape” and “auto batch size” (reshaping a model in runtime) will be removed in the future. OpenVINO's dynamic shape models are recommended instead. -* Starting with 2025.0 MacOS x86 will no longer be recommended for use due to the discontinuation - of validation. Full support will be removed later in 2025. - * A number of notebooks have been deprecated. For an up-to-date listing of available notebooks, refer to the `OpenVINO™ Notebook index (openvinotoolkit.github.io) `__. @@ -1759,6 +1707,10 @@ Deprecated and to be removed in the future * See alternative: `Machine Translation Python* Demo `__ + * `Open Model Zoo Tools Tutorial `__ + + * No alternatives, demonstrates deprecated tools. + * `Super Resolution with OpenVINO™ `__ * See alternative: `Super Resolution with PaddleGAN and OpenVINO `__ @@ -1859,4 +1811,6 @@ Copyright © 2024, Intel Corporation. All rights reserved. For more complete information about compiler optimizations, see our Optimization Notice. -Performance varies by use, configuration and other factors. \ No newline at end of file +Performance varies by use, configuration and other factors. + + diff --git a/docs/articles_en/about-openvino/release-notes-openvino/release-policy.rst b/docs/articles_en/about-openvino/release-notes-openvino/release-policy.rst index 34107c60b73139..44ca052ee8e7b9 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/release-policy.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/release-policy.rst @@ -179,7 +179,7 @@ Additional Information * Binary distribution: * Download from `OpenVINO storage `__ - * `pypi.org `__ + * `pypi.org `__ * `DockerHub* `__ diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index 79a9f63821c16f..a12cacf8402953 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -37,7 +37,7 @@ CPU * Ubuntu 20.04 long-term support (LTS), 64-bit (Kernel 5.15+) * macOS 12.6 and above, 64-bit and ARM64 * CentOS 7 - * Red Hat Enterprise Linux (RHEL) 8 and 9, 64-bit + * Red Hat Enterprise Linux 9.3-9.4, 64-bit * openSUSE Tumbleweed, 64-bit and ARM64 * Ubuntu 20.04 ARM64 @@ -65,7 +65,7 @@ GPU * Ubuntu 22.04 long-term support (LTS), 64-bit * Ubuntu 20.04 long-term support (LTS), 64-bit * CentOS 7 - * Red Hat Enterprise Linux (RHEL) 8 and 9, 64-bit + * Red Hat Enterprise Linux 9.3-9.4, 64-bit .. tab-item:: Additional considerations diff --git a/docs/articles_en/assets/images/MO_connection_example_1.svg b/docs/articles_en/assets/images/MO_connection_example_1.svg new file mode 100644 index 00000000000000..9e975041032891 --- /dev/null +++ b/docs/articles_en/assets/images/MO_connection_example_1.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1e2d8f82ce07f5d463d6480293935443785979fe16b555cd8e60fb2f253928 +size 55232 diff --git a/docs/articles_en/assets/images/MO_conversion_pipeline.svg b/docs/articles_en/assets/images/MO_conversion_pipeline.svg new file mode 100644 index 00000000000000..e0448b06dda139 --- /dev/null +++ b/docs/articles_en/assets/images/MO_conversion_pipeline.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6f798882e0301f0cf83f1eba90560b5151266612fef2bc5f16a12cf192f0a0 +size 128446 diff --git a/docs/articles_en/assets/images/MO_graph_after_extractors.svg b/docs/articles_en/assets/images/MO_graph_after_extractors.svg new file mode 100644 index 00000000000000..7ee1ebe7c1761a --- /dev/null +++ b/docs/articles_en/assets/images/MO_graph_after_extractors.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d5ee3d23d232fc10072189c0bf18d76f5d5d7217091d81a1ac465d129c034e +size 88648 diff --git a/docs/articles_en/assets/images/MO_graph_after_loader.svg b/docs/articles_en/assets/images/MO_graph_after_loader.svg new file mode 100644 index 00000000000000..380db77679be7f --- /dev/null +++ b/docs/articles_en/assets/images/MO_graph_after_loader.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e882e25b5117e4d17a3b94944f58470c0337fafa5afc2ec6aa01f498c442c5f3 +size 73933 diff --git a/docs/articles_en/assets/images/MO_graph_before_partial_inference.svg b/docs/articles_en/assets/images/MO_graph_before_partial_inference.svg new file mode 100644 index 00000000000000..b312a0314b0b55 --- /dev/null +++ b/docs/articles_en/assets/images/MO_graph_before_partial_inference.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7799a6c30352fa74d7d98f993d9ad7b148d975d96778762df410d69133abf8a8 +size 158171 diff --git a/docs/articles_en/assets/images/MO_ports_example_1.svg b/docs/articles_en/assets/images/MO_ports_example_1.svg new file mode 100644 index 00000000000000..778ee6fd3ecb7a --- /dev/null +++ b/docs/articles_en/assets/images/MO_ports_example_1.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8340d5ca434fe74d19f397c1acd0c92b4ad3b16a563975dc1603a6bf8ef03eb6 +size 55262 diff --git a/docs/articles_en/assets/images/MO_ports_example_2.svg b/docs/articles_en/assets/images/MO_ports_example_2.svg new file mode 100644 index 00000000000000..288ce970b3664f --- /dev/null +++ b/docs/articles_en/assets/images/MO_ports_example_2.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed3820019aa5b9d4741c146bd4596e6850ea714e6e44fefe6cccf4707e5f152 +size 55270 diff --git a/docs/articles_en/assets/images/MO_transformations_graph.svg b/docs/articles_en/assets/images/MO_transformations_graph.svg new file mode 100644 index 00000000000000..093365f92a8e8d --- /dev/null +++ b/docs/articles_en/assets/images/MO_transformations_graph.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbc2911e5aa5a672d8ebaf82b3d06f6915e44b8760ac18f88fba1d2e99fddd6 +size 349693 diff --git a/docs/articles_en/assets/images/deploy_encrypted_model.svg b/docs/articles_en/assets/images/deploy_encrypted_model.svg index fa897731b54fef..61d0dbe710994e 100644 --- a/docs/articles_en/assets/images/deploy_encrypted_model.svg +++ b/docs/articles_en/assets/images/deploy_encrypted_model.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:454a531a9b2d2883ac9a6beb01ce7ecdd7ec69ea2c68d63b39b65f3780c957fe -size 54772 +oid sha256:6f802b1396fafdc8a80c03c4931d4b6290cc10451961ddba5edcef1c8227833b +size 44097 diff --git a/docs/articles_en/assets/images/genai_main_diagram.svg b/docs/articles_en/assets/images/genai_main_diagram.svg deleted file mode 100644 index b01cbd827acb3c..00000000000000 --- a/docs/articles_en/assets/images/genai_main_diagram.svg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07ce964e115f1e3942cdf381f44b4dc6d466df62c70396306a4f241fb07ea3ed -size 392244 diff --git a/docs/articles_en/assets/images/training_extensions_framework.png b/docs/articles_en/assets/images/training_extensions_framework.png index b518aa584a96fc..3cbbac7fdbfba8 100644 --- a/docs/articles_en/assets/images/training_extensions_framework.png +++ b/docs/articles_en/assets/images/training_extensions_framework.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c8069733dbd51ff2bd47b47e7d2a7083dac55d9faf66dfb61b897d65eb0a545 -size 47828 +oid sha256:2b3932d0cf0071c629e1013f3e17a9f8abda800eb01c50b3e826a42127e42da7 +size 48770 diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp index 76e6d60b8e3e90..d9e41bc77eec17 100644 --- a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp +++ b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp @@ -38,7 +38,7 @@ auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector{}; if (useLpt) { // disable constant folding on dequantization subgraphs so they can be processed by LPT - manager.register_pass(defaultPrecisions); + manager.register_pass(defaultPrecisions); } // OpenVINO common transformations happen here diff --git a/docs/articles_en/assets/snippets/ov_caching.cpp b/docs/articles_en/assets/snippets/ov_caching.cpp index f3113438e20642..aa08a739261b81 100644 --- a/docs/articles_en/assets/snippets/ov_caching.cpp +++ b/docs/articles_en/assets/snippets/ov_caching.cpp @@ -90,41 +90,6 @@ auto compiled = core.compile_model(model, device, config); // Step 5: } } -void part5() { - std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GPU"; - ov::Core core; // Step 1: create ov::Core object - bool hasGPU = false; // Step 1a: Check if GPU is available - auto devices = core.get_available_devices(); - for (auto&& supported : devices) { - hasGPU |= supported.find(device) != std::string::npos; - } - if(!hasGPU) { - return; - } - core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching -//! [ov:caching:part5] -static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; -auto codec_xor = [&](const std::string& source_str) { - auto key_size = sizeof(codec_key); - int key_idx = 0; - std::string dst_str = source_str; - for (char& c : dst_str) { - c ^= codec_key[key_idx % key_size]; - key_idx++; - } - return dst_str; -}; -auto compiled = core.compile_model(modelPath, - device, - ov::cache_encryption_callbacks(ov::EncryptionCallbacks{codec_xor, codec_xor}), - ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); // Step 5: Compile model -//! [ov:caching:part5] - if (!compiled) { - throw std::runtime_error("error"); - } -} - int main() { try { part0(); @@ -132,8 +97,7 @@ int main() { part2(); part3(); part4(); - part5(); } catch (...) { } return 0; -} +} \ No newline at end of file diff --git a/docs/articles_en/assets/snippets/ov_caching.py b/docs/articles_en/assets/snippets/ov_caching.py index b4534ebcd2d9c3..57bd72f3f9b80b 100644 --- a/docs/articles_en/assets/snippets/ov_caching.py +++ b/docs/articles_en/assets/snippets/ov_caching.py @@ -59,21 +59,3 @@ def decrypt_base64(src): model = core.read_model(model=model_path) compiled_model = core.compile_model(model=model, device_name=device_name, config=config_cache) # ! [ov:caching:part4] - -# ! [ov:caching:part5] -import base64 - -def encrypt_base64(src): - return base64.b64encode(bytes(src, "utf-8")) - -def decrypt_base64(src): - return base64.b64decode(bytes(src, "utf-8")) - -core = ov.Core() -if "GPU" in core.available_devices: - core.set_property({props.cache_dir: path_to_cache_dir}) - config_cache = {} - config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64] - config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE" - compiled_model = core.compile_model(model=model_path, device_name='GPU', config=config_cache) -# ! [ov:caching:part5] diff --git a/docs/articles_en/documentation.rst b/docs/articles_en/documentation.rst index c1dd34f5373429..5be7bb9dbc30fb 100644 --- a/docs/articles_en/documentation.rst +++ b/docs/articles_en/documentation.rst @@ -13,6 +13,7 @@ Documentation API Reference OpenVINO IR format and Operation Sets + Legacy Features Tool Ecosystem OpenVINO Extensibility OpenVINO™ Security diff --git a/docs/articles_en/documentation/legacy-features.rst b/docs/articles_en/documentation/legacy-features.rst new file mode 100644 index 00000000000000..2457d28cf24c15 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features.rst @@ -0,0 +1,130 @@ +Legacy Features and Components +============================== + +.. meta:: + :description: A list of deprecated OpenVINO™ components. + +.. toctree:: + :maxdepth: 1 + :hidden: + + OpenVINO Development Tools package + Model Optimizer / Conversion API + Open Model ZOO + legacy-features/multi-device + + +Since OpenVINO has grown very rapidly in recent years, a number of its features +and components have been replaced by other solutions. Some of them are still +supported to assure OpenVINO users are given enough time to adjust their projects, +before the features are fully discontinued. + +This section will give you an overview of these major changes and tell you how +you can proceed to get the best experience and results with the current OpenVINO +offering. + + +| **OpenVINO Development Tools Package** +| *New solution:* OpenVINO Runtime includes all supported components +| *Old solution:* discontinuation planned for OpenVINO 2025.0 +| +| OpenVINO Development Tools used to be the OpenVINO package with tools for + advanced operations on models, such as Model conversion API, Benchmark Tool, + Accuracy Checker, Annotation Converter, Post-Training Optimization Tool, + and Open Model Zoo tools. Most of these tools have been either removed, + replaced by other solutions, or moved to the OpenVINO Runtime package. +| :doc:`See how to install Development Tools ` + + +| **Model Optimizer / Conversion API** +| *New solution:* Direct model support and OpenVINO Converter (OVC) +| *Old solution:* Legacy Conversion API discontinuation planned for OpenVINO 2025.0 +| +| The role of Model Optimizer and later the Conversion API was largely reduced + when all major model frameworks became supported directly. For converting model + files explicitly, it has been replaced with a more light-weight and efficient + solution, the OpenVINO Converter (launched with OpenVINO 2023.1). +| :doc:`See how to use OVC <../openvino-workflow/model-preparation>` +| :doc:`See how to transition from the legacy solution ` + + +| **Open Model ZOO** +| *New solution:* users are encouraged to use public model repositories +| *Old solution:* discontinuation planned for OpenVINO 2025.0 +| +| Open Model ZOO provided a collection of models prepared for use with OpenVINO, + and a small set of tools enabling a level of automation for the process. + Since the tools have been mostly replaced by other solutions and several + other model repositories have recently grown in size and popularity, + Open Model ZOO will no longer be maintained. You may still use its resources + until they are fully removed. +| :doc:`See the Open Model ZOO documentation ` +| `Check the OMZ GitHub project `__ +| As for public model databases, `Hugging Face `__ has + become the recommended model source for OpenVINO. + + +| **Multi-Device Execution** +| *New solution:* Automatic Device Selection +| *Old solution:* Legacy Multi-Device Execution discontinuation planned for OpenVINO 2025.0 +| +| The behavior and results of the Multi-Device Execution mode are covered by the ``CUMULATIVE_THROUGHPUT`` + option of the Automatic Device Selection. The only difference is that ``CUMULATIVE_THROUGHPUT`` uses + the devices specified by AUTO, which means that adding devices manually is not mandatory, + while with MULTI, the devices had to be specified before the inference. +| :doc:`Check the Automatic Device Selection <../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` +| :doc:`Check the legacy solution ` + +Discontinued: +############# + +.. dropdown:: Caffe, and Kaldi model formats + + | *New solution:* conversion to ONNX via external tools + | *Old solution:* model support discontinued with OpenVINO 2024.0 + | `The last version supporting Apache MXNet, Caffe, and Kaldi model formats `__ + | :doc:`See the currently supported frameworks <../openvino-workflow/model-preparation>` + +.. dropdown:: Post-training Optimization Tool (POT) + + | *New solution:* Neural Network Compression Framework (NNCF) now offers the same functionality + | *Old solution:* POT discontinued with OpenVINO 2024.0 + | :doc:`See how to use NNCF for model optimization <../openvino-workflow/model-optimization>` + | `Check the NNCF GitHub project, including documentation `__ + +.. dropdown:: Inference API 1.0 + + | *New solution:* API 2.0 launched in OpenVINO 2022.1 + | *Old solution:* discontinued with OpenVINO 2024.0 + | `2023.2 is the last version supporting API 1.0 `__ + +.. dropdown:: Compile tool + + | *New solution:* the tool is no longer needed + | *Old solution:* discontinued with OpenVINO 2023.0 + | If you need to compile a model for inference on a specific device, use the following script: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/export_compiled_model.py + :language: python + :fragment: [export_compiled_model] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/export_compiled_model.cpp + :language: cpp + :fragment: [export_compiled_model] + +.. dropdown:: TensorFlow integration (OVTF) + + | *New solution:* Direct model support and OpenVINO Converter (OVC) + | *Old solution:* discontinued in OpenVINO 2023.0 + | + | OpenVINO now features a native TensorFlow support, with no need for explicit model + conversion. + diff --git a/docs/articles_en/documentation/legacy-features/install-dev-tools.rst b/docs/articles_en/documentation/legacy-features/install-dev-tools.rst new file mode 100644 index 00000000000000..4b0160e11c9082 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/install-dev-tools.rst @@ -0,0 +1,259 @@ +Install OpenVINO™ Development Tools +===================================== + + +.. meta:: + :description: Learn how to install OpenVINO™ Development Tools on Windows, + Linux, and macOS operating systems, using a PyPi package. + +OpenVINO Development Tools is a set of utilities that make it easy to develop and +optimize models and applications for OpenVINO. It provides the following tools: + +* Model conversion API +* Benchmark Tool +* Accuracy Checker and Annotation Converter +* Model Downloader and other Open Model Zoo tools + +The instructions on this page show how to install OpenVINO Development Tools. If you are a +Python developer, it only takes a few simple steps to install the tools with PyPI. If you +are developing in C/C++, OpenVINO Runtime must be installed separately before installing +OpenVINO Development Tools. + +In both cases, Python 3.9 - 3.12 needs to be installed on your system before starting. + +.. note:: + + From the 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. + +.. _python_developers: + +For Python Developers +##################### + +If you are a Python developer, follow the steps in the +:ref:`Installing OpenVINO Development Tools ` section on this page to +install it. Installing OpenVINO Development Tools will also install OpenVINO Runtime as +a dependency, so you don’t need to install OpenVINO Runtime separately. This option is +recommended for new users. + +.. _cpp_developers: + +For C/C++ Developers +####################### + +If you are a C/C++ developer, you must first install OpenVINO Runtime separately to set +up the C/C++ libraries, sample code, and dependencies for building applications with +OpenVINO. These files are not included with the PyPI distribution. See the +:doc:`Selector Tool <../../get-started/install-openvino>` page to install OpenVINO Runtime +from an archive file for your operating system. + +Once OpenVINO Runtime is installed, you may install OpenVINO Development Tools for access +to tools like ``mo``, Model Downloader, Benchmark Tool, and other utilities that will help +you optimize your model and develop your application. Follow the steps in the +:ref:`Installing OpenVINO Development Tools ` section on this page +to install it. + +.. _install_dev_tools: + +Installing OpenVINO™ Development Tools +###################################### + +Follow these step-by-step instructions to install OpenVINO Development Tools on your computer. +There are two options to install OpenVINO Development Tools: installation into an existing +environment with a deep learning framework that was used for model training or creation; +or installation into a new environment. + +Installation into an Existing Environment with the Source Deep Learning Framework ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To install OpenVINO Development Tools (see the :ref:`Install the Package ` +section of this article) into an existing environment with the deep learning framework used +for the model training or creation, run the following command: + +.. code-block:: sh + + pip install openvino-dev + + +Installation in a New Environment ++++++++++++++++++++++++++++++++++ + +If you do not have an environment with a deep learning framework for the input model or you +encounter any compatibility issues between OpenVINO and your version of deep learning +framework, you may install OpenVINO Development Tools with validated versions of +frameworks into a new environment. + +Step 1. Set Up Python Virtual Environment +----------------------------------------- + +Create a virtual Python environment to avoid dependency conflicts. To create a virtual +environment, use the following command: + +.. tab-set:: + + .. tab-item:: Windows + :sync: windows + + .. code-block:: sh + + python -m venv openvino_env + + .. tab-item:: Linux and macOS + :sync: linux-and-macos + + .. code-block:: sh + + python3 -m venv openvino_env + + + +Step 2. Activate Virtual Environment +------------------------------------ + +Activate the newly created Python virtual environment by issuing this command: + +.. tab-set:: + + .. tab-item:: Windows + :sync: windows + + .. code-block:: sh + + openvino_env\Scripts\activate + + .. tab-item:: Linux and macOS + :sync: linux-and-macos + + .. code-block:: sh + + source openvino_env/bin/activate + +.. important:: + + The above command must be re-run every time a new command terminal window is opened. + + +Step 3. Set Up and Update PIP to the Highest Version +---------------------------------------------------- + +Make sure `pip` is installed in your environment and upgrade it to the latest version by +issuing the following command: + +.. code-block:: sh + + python -m pip install --upgrade pip + + +.. _install_the_package: + +Step 4. Install the Package +--------------------------- + +To install and configure the components of the development package together with validated +versions of specific frameworks, use the commands below. + +.. code-block:: sh + + pip install openvino-dev[extras] + + +where the ``extras`` parameter specifies the source deep learning framework for the input model +and is one or more of the following values separated with "," : ``onnx``, ``pytorch``, +``tensorflow``, ``tensorflow2``. + +For example, to install and configure dependencies required for working with TensorFlow 2.x +and ONNX models, use the following command: + +.. code-block:: sh + + pip install openvino-dev[tensorflow2,onnx] + + +.. note:: + + Model conversion API support for TensorFlow 1.x environment has been deprecated. Use the + ``tensorflow2`` parameter to install a TensorFlow 2.x environment that can convert both + TensorFlow 1.x and 2.x models. If your model isn't compatible with the TensorFlow 2.x + environment, use the `tensorflow` parameter to install the TensorFlow 1.x environment. + The TF 1.x environment is provided only for legacy compatibility reasons. + +For more details on the openvino-dev PyPI package, see +`pypi.org `__ . + +Step 5. Test the Installation +------------------------------ + +To verify the package is properly installed, run the command below (this may take a few seconds): + +.. code-block:: sh + + mo -h + +You will see the help message for ``mo`` if installation finished successfully. If you get an +error, refer to the :doc:`Troubleshooting Guide <../../get-started/troubleshooting-install-config>` +for possible solutions. + +Congratulations! You finished installing OpenVINO Development Tools with C/C++ capability. +Now you can start exploring OpenVINO's functionality through example C/C++ applications. +See the "What's Next?" section to learn more! + +What's Next? +############ + +Learn more about OpenVINO and use it in your own application by trying out some of these examples! + +Get started with Python ++++++++++++++++++++++++ + +.. image:: ../../assets/images/get_started_with_python.gif + :width: 400 + +Try the `Python Quick Start Example <../../notebooks/vision-monodepth-with-output.html>`__ +to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook +inside your web browser. + +Visit the :doc:`Tutorials <../../learn-openvino/interactive-tutorials-python>` page for more +Jupyter Notebooks to get you started with OpenVINO, such as: + +* `OpenVINO Python API Tutorial <../../notebooks/openvino-api-with-output.html>`__ +* `Basic image classification program with Hello Image Classification <../../notebooks/hello-world-with-output.html>`__ +* `Convert a PyTorch model and use it for image background removal <../../notebooks/vision-background-removal-with-output.html>`__ + +Get started with C++ +++++++++++++++++++++ + +.. image:: ../../assets/images/get_started_with_cpp.jpg + :width: 400 + + +Try the :doc:`C++ Quick Start Example <../../learn-openvino/openvino-samples/get-started-demos>` +for step-by-step instructions on building and running a basic image classification C++ application. + +Visit the :doc:`Samples <../../learn-openvino/openvino-samples>` page for other C++ +example applications to get you started with OpenVINO, such as: + +* :doc:`Basic object detection with the Hello Reshape SSD C++ sample <../../learn-openvino/openvino-samples/hello-reshape-ssd>` +* :doc:`Object classification sample <../../learn-openvino/openvino-samples/hello-classification>` + +Learn OpenVINO Development Tools +++++++++++++++++++++++++++++++++ + +* Explore a variety of pre-trained deep learning models in the + :doc:`Open Model Zoo ` and deploy them in demo applications to see how they work. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + +* Want to import a model from another framework and optimize its performance with OpenVINO? + Visit the :doc:`Convert a Model ` page. +* Accelerate your model's speed even further with quantization and other compression techniques + using :doc:`Neural Network Compression Framework (NNCF) <../../openvino-workflow/model-optimization-guide/quantizing-models-post-training>`. +* Benchmark your model's inference speed with one simple command using the + :doc:`Benchmark Tool <../../learn-openvino/openvino-samples/benchmark-tool>`. + +Additional Resources +#################### + +- `Intel® Distribution of OpenVINO™ toolkit home page `__ diff --git a/docs/articles_en/documentation/legacy-features/model-zoo.rst b/docs/articles_en/documentation/legacy-features/model-zoo.rst new file mode 100644 index 00000000000000..4b761e6c7df831 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/model-zoo.rst @@ -0,0 +1,31 @@ +Model Zoo +========= + +.. _model zoo: + +.. note:: + + Since the deprecation of Open Model Zoo, OpenVINO has significantly extended its presence on the + `Hugging Face `__ model repository. It is currently + the recommended source of optimized OpenVINO IR models. + +Open Model Zoo for OpenVINO™ toolkit delivers a wide variety of free, pre-trained deep learning +models and demo applications that provide full application templates to help you implement deep +learning in Python, C++, or OpenCV Graph API (G-API). + +Models, demos and full documentation are available in the +`Open Model Zoo GitHub repo `__ +and licensed under Apache License Version 2.0. + +Browse through over 200 neural network models, both +`public `__ and from +`Intel `__, and pick the right one for your solution. +Types include object detection, classification, image segmentation, handwriting recognition, +text to speech, pose estimation, and others. The Intel models have already been converted +to work with OpenVINO™ toolkit, while public models can easily be converted using the +:doc:`OpenVINO Model Conversion API <../../openvino-workflow/model-preparation>` utility. + +Open Model Zoo offers a +`comprehensive set of demos `__ that you can adapt for implementing specific deep +learning scenarios in your applications. + diff --git a/docs/articles_en/documentation/legacy-features/multi-device.rst b/docs/articles_en/documentation/legacy-features/multi-device.rst new file mode 100644 index 00000000000000..594f496287d714 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/multi-device.rst @@ -0,0 +1,155 @@ +Multi-device execution +====================== + + +.. meta:: + :description: The Multi-Device execution mode in OpenVINO Runtime assigns + multiple available computing devices to particular inference + requests to execute in parallel. + +.. danger:: + + The Multi-device execution mode described here has been **deprecated**. + + It's functionality is now fully covered by the :ref:`CUMULATIVE_THROUGHPUT ` + option of the :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` mode. + This way, all available devices in the system can be used without the need to specify them. + +How MULTI Works +#################### + +The Multi-Device execution mode, or MULTI for short, acts as a "virtual" or a "proxy" device, which does not bind to a specific type of hardware. Instead, it assigns available computing devices to particular inference requests, which are then executed in parallel. + +The potential gains from using Multi-Device execution are: + +* improved throughput from using multiple devices at once, +* increase in performance stability due to multiple devices sharing inference workload. + +Importantly, the Multi-Device mode does not change the application logic, so it does not require you to explicitly compile the model on every device or create and balance inference requests. It appears to use a typical device but internally handles the actual hardware. + +Note that the performance increase in this mode comes from utilizing multiple devices at once. This means that you need to provide the devices with enough inference requests to keep them busy, otherwise you will not benefit much from using MULTI. + + +Using the Multi-Device Mode +########################### + +Following the OpenVINO™ naming convention, the Multi-Device mode is assigned the label of “MULTI.” The only configuration option available for it is a prioritized list of devices to use: + + ++----------------------------+---------------------------------+------------------------------------------------------------+ +| Property | Property values | Description | ++============================+=================================+============================================================+ +| | | MULTI: | | Specifies the devices available for selection. | +| | | comma-separated, no spaces | | The device sequence will be taken as priority | ++----------------------------+---------------------------------+ | from high to low. | +| ``ov::device::priorities`` | | device names | | Priorities can be set directly as a string. | +| | | comma-separated, no spaces | | ++----------------------------+---------------------------------+------------------------------------------------------------+ + + +Specifying the device list explicitly is required by MULTI, as it defines the devices available for inference and sets their priorities. + +Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. +More details on enumerating devices can be found in :doc:`Inference Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>`. + +The following commands are accepted by the API: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_multi.py + :language: python + :fragment: [MULTI_0] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/MULTI0.cpp + :language: cpp + :fragment: [part0] + + +To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration <../../openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties>`. + + +Configuring Individual Devices and Creating the Multi-Device On Top ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +As mentioned previously, executing inference with MULTI may be set up by configuring individual devices before creating the "MULTI" device on top. It may be considered for performance reasons. + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_multi.py + :language: python + :fragment: [MULTI_4] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/articles_en/assets/snippets/MULTI4.cpp + :language: cpp + :fragment: [part4] + + +Alternatively, you can combine all the individual device settings into a single config file and load it for MULTI to parse. See the code example in the next section. + +Querying the Optimal Number of Inference Requests ++++++++++++++++++++++++++++++++++++++++++++++++++ + +When using MULTI, you don't need to sum over included devices yourself, you can query the optimal number of requests directly, +using the :doc:`configure devices <../../openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties>` property: + +.. tab-set:: + + .. tab-item:: C++ + + .. doxygensnippet:: docs/articles_en/assets/snippets/MULTI5.cpp + :language: cpp + :fragment: [part5] + + +Using the Multi-Device with OpenVINO Samples and Benchmarking Performance +######################################################################### + +To see how the Multi-Device execution is used in practice and test its performance, take a look at OpenVINO's Benchmark Application which presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads. +Here is an example command to evaluate performance of CPU + GPU: + +.. code-block:: sh + + ./benchmark_app –d MULTI:CPU,GPU –m -i -niter 1000 + + +For more information, refer to the :doc:`Benchmark Tool <../../../learn-openvino/openvino-samples/benchmark-tool>` article. + + +.. note:: + + You can keep using the FP16 IR without converting it to FP32, even if some of the listed devices do not support it. The conversion will be done automatically for you. + + No demos are yet fully optimized for MULTI, by means of supporting the ``ov::optimal_number_of_infer_requests`` property, using the GPU streams/throttling, and so on. + + +Performance Considerations for the Multi-Device Execution +######################################################### + +For best performance when using the MULTI execution mode you should consider a few recommendations: + +- MULTI usually performs best when the fastest device is specified first in the device candidate list. This is particularly important when the request-level parallelism is not sufficient (e.g. the number of requests is not enough to saturate all devices). +- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests directly from the instance of the ``ov:compiled_model``. Refer to the code of the previously mentioned ``benchmark_app`` for more details. +- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the ``benchmark_app`` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower communication of inference completion from the device to the host. +- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, and device-specific 'worker' requests that are being actually scheduled behind the scene. To facilitate the copy savings, it is recommended to run the requests in the order in which they were created. +- While performance of accelerators combines well with MULTI, the CPU+GPU execution may introduce certain performance issues. It is due to the devices sharing some resources, like power or bandwidth. Enabling the GPU throttling hint, which saves a CPU thread for CPU inference, is an example of a recommended solution addressing this issue. + + +Additional Resources +#################### + +- :doc:`Inference Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>` +- :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>` + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api.rst new file mode 100644 index 00000000000000..e031c10e7e4e08 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api.rst @@ -0,0 +1,863 @@ +Transition from Legacy Conversion API +===================================== + + +.. meta:: + :description: Transition guide from MO / mo.convert_model() to OVC / ov.convert_model(). + +.. toctree:: + :maxdepth: 1 + :hidden: + + transition-legacy-conversion-api/legacy-conversion-api + transition-legacy-conversion-api/legacy-model-optimizer-extensibility + +In the 2023.1 OpenVINO release OpenVINO Model Converter was introduced with the corresponding +Python API: ``openvino.convert_model`` method. ``ovc`` and ``openvino.convert_model`` represent +a lightweight alternative of ``mo`` and ``openvino.tools.mo.convert_model`` which are considered +legacy API now. In this article, all the differences between ``mo`` and ``ovc`` are summarized +and the transition guide from the legacy API to the new API is provided. + +Parameters Comparison +##################### + +The comparison of parameters between ov.convert_model() / OVC and mo.convert_model() / MO. + +.. list-table:: + :widths: 20 25 55 + :header-rows: 1 + + * - mo.convert_model() / MO + - ov.convert_model() / OVC + - Differences description + * - input_model + - input_model + - Along with model object or path to input model ov.convert_model() accepts list of model parts, for example, the path to TensorFlow weights plus the path to TensorFlow checkpoint. OVC tool accepts an unnamed input model. + * - output_dir + - output_model + - output_model in OVC tool sets both output model name and output directory. + * - model_name + - output_model + - output_model in OVC tool sets both output model name and output directory. + * - input + - input + - ov.convert_model() accepts tuples for setting multiple parameters. OVC tool 'input' does not have type setting and freezing functionality. ov.convert_model() does not allow input cut. + * - output + - output + - ov.convert_model() does not allow output cut. + * - input_shape + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by ``input`` parameter. + * - example_input + - example_input + - No differences. + * - batch + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by model reshape functionality. See details below. + * - mean_values + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - scale_values + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - scale + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - reverse_input_channels + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - source_layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - target_layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - compress_to_fp16 + - compress_to_fp16 + - OVC provides 'compress_to_fp16' for command line tool only, as compression is performed during saving a model to IR (Intermediate Representation). + * - extensions + - extension + - No differences. + * - transform + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - transformations_config + - N/A + - Not available in ov.convert_model() / OVC. + * - static_shape + - N/A + - Not available in ov.convert_model() / OVC. + * - freeze_placeholder_with_value + - N/A + - Not available in ov.convert_model() / OVC. + * - use_legacy_frontend + - N/A + - Not available in ov.convert_model() / OVC. + * - use_legacy_frontend + - N/A + - Not available in ov.convert_model() / OVC. + * - silent + - verbose + - OVC / ov.convert_model provides 'verbose' parameter instead of 'silent' for printing of detailed conversion information if 'verbose' is set to True. + * - log_level + - N/A + - Not available in ov.convert_model() / OVC. + * - version + - version + - N/A + * - progress + - N/A + - Not available in ov.convert_model() / OVC. + * - stream_output + - N/A + - Not available in ov.convert_model() / OVC. + * - share_weights + - share_weights + - No differences. + * - framework + - N/A + - Not available in ov.convert_model() / OVC. + * - help / -h + - help / -h + - OVC provides help parameter only in command line tool. + * - example_output + - output + - OVC / ov.convert_model 'output' parameter includes capabilities of MO 'example_output' parameter. + * - input_model_is_text + - N/A + - Not available in ov.convert_model() / OVC. + * - input_checkpoint + - input_model + - All supported model formats can be passed to 'input_model'. + * - input_meta_graph + - input_model + - All supported model formats can be passed to 'input_model'. + * - saved_model_dir + - input_model + - All supported model formats can be passed to 'input_model'. + * - saved_model_tags + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_custom_operations_config_update + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_object_detection_api_pipeline_config + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorboard_logdir + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_custom_layer_libraries + - N/A + - Not available in ov.convert_model() / OVC. + * - input_symbol + - N/A + - Not available in ov.convert_model() / OVC. + * - nd_prefix_name + - N/A + - Not available in ov.convert_model() / OVC. + * - pretrained_model_name + - N/A + - Not available in ov.convert_model() / OVC. + * - save_params_from_nd + - N/A + - Not available in ov.convert_model() / OVC. + * - legacy_mxnet_model + - N/A + - Not available in ov.convert_model() / OVC. + * - enable_ssd_gluoncv + - N/A + - Not available in ov.convert_model() / OVC. + * - input_proto + - N/A + - Not available in ov.convert_model() / OVC. + * - caffe_parser_path + - N/A + - Not available in ov.convert_model() / OVC. + * - k + - N/A + - Not available in ov.convert_model() / OVC. + * - disable_omitting_optional + - N/A + - Not available in ov.convert_model() / OVC. + * - enable_flattening_nested_params + - N/A + - Not available in ov.convert_model() / OVC. + * - counts + - N/A + - Not available in ov.convert_model() / OVC. + * - remove_output_softmax + - N/A + - Not available in ov.convert_model() / OVC. + * - remove_memory + - N/A + - Not available in ov.convert_model() / OVC. + +Transition from Legacy API to New API +############################################################################ + +mo.convert_model() provides a wide range of preprocessing parameters. Most of these parameters have analogs in OVC or can be replaced with functionality from ``ov.PrePostProcessor`` class. +Here is the guide to transition from legacy model preprocessing to new API preprocessing. + + +``input_shape`` +################ + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, input_shape=[[1, 3, 100, 100],[1]]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model, input=[[1, 3, 100, 100],[1]]) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --input_shape [1,3,100,100],[1] --output_dir OUTPUT_DIR + + - .. code-block:: sh + :force: + + ovc MODEL_NAME --input [1,3,100,100],[1] --output_model OUTPUT_MODEL + +``batch`` +########## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, batch=2) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + input_shape = ov_model.inputs[0].partial_shape + input_shape[0] = 2 # batch size + ov_model.reshape(input_shape) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --batch 2 --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``mean_values`` +################ + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, mean_values=[0.5, 0.5, 0.5]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().mean([0.5, 0.5, 0.5]) + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview>`. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --mean_values [0.5,0.5,0.5] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``scale_values`` +################# + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, scale_values=[255., 255., 255.]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().scale([255., 255., 255.]) + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview>`. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --scale_values [255,255,255] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``reverse_input_channels`` +########################### + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, reverse_input_channels=True) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().reverse_channels() + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview>`. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --reverse_input_channels --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``source_layout`` +################## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + import openvino as ov + from openvino.tools import mo + + ov_model = mo.convert_model(model, source_layout={input_name: ov.Layout("NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).model().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --source_layout input_name(NHWC) --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``target_layout`` +################## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + import openvino as ov + from openvino.tools import mo + + ov_model = mo.convert_model(model, target_layout={input_name: ov.Layout("NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --target_layout input_name(NHWC) --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``layout`` +########### + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, layout={input_name: mo.LayoutMap("NCHW", "NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).model().set_layout(ov.Layout("NCHW")) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --layout "input_name(NCHW->NHWC)" --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +``transform`` +############## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, transform=[('LowLatency2', {'use_const_initializer': False}), 'Pruning', ('MakeStateful', {'param_res_names': {'input_name': 'output_name'}})]) + + - .. code-block:: py + :force: + + import openvino as ov + from openvino._offline_transformations import apply_low_latency_transformation, apply_pruning_transformation, apply_make_stateful_transformation + + ov_model = ov.convert_model(model) + apply_low_latency_transformation(model, use_const_initializer=False) + apply_pruning_transformation(model) + apply_make_stateful_transformation(model, param_res_names={'input_name': 'output_name'}) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --transform LowLatency2[use_const_initializer=False],Pruning,MakeStateful[param_res_names={'input_name':'output_name'}] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Switch to the **Python** tab. + +Cutting Off Parts of a Model +############################ + +Performing surgery by cutting model inputs and outputs from a model is no longer available in the new conversion API. Instead, we recommend performing the cut in the original framework. +Below are examples of model cutting of TensorFlow protobuf, TensorFlow SavedModel, and ONNX formats with the legacy conversion API, compared to achieving the same cut with tools provided by the Tensorflow and ONNX frameworks. +For PyTorch, TensorFlow 2 Keras, and PaddlePaddle, we recommend changing the original model code to perform the model cut. + +Note: This guide does not cover the cutting a model by input port of an operation that MO tool provides using `input` and `output` options, for example, `--input 1:name_op`. + +``PyTorch`` +########### + +Model cut for PyTorch is not available in legacy API. + +When it is needed to remove a whole module from the model it is possible to replace such modules with `Identity`. Below is the example of removing `conv1` and `bn1` modules at the input and `fc` module at the output of the resnet50 model. + +.. code-block:: py + :force: + + import openvino as ov + import torch + import torchvision + from torch.nn import Identity + + # Load pretrained model + model = torchvision.models.resnet50(weights='DEFAULT') + + # input cut + model.conv1 = Identity() + model.bn1 = Identity() + + # output cut + model.fc = Identity() + + # convert and compile the model + ov_model = ov.convert_model(model, input=([-1,64,-1,-1], torch.float32)) + compiled_model = ov.compile_model(ov_model) + +When it is needed to remove one or more outputs from the model it is possible to create a wrapper for the model and only output the needed output. Below is the example of removing second output from the model. + +.. code-block:: py + :force: + + import openvino as ov + import torch + + # Example of model with multiple outputs + class Model(torch.nn.Module): + def __init__(self): + super(Model, self).__init__() + self.linear1 = torch.nn.Linear(100, 200) + self.activation1 = torch.nn.ReLU() + self.linear2 = torch.nn.Linear(200, 10) + self.activation2 = torch.nn.Sigmoid() + + def forward(self, x): + x = self.linear1(x) + x = self.activation1(x) + y = self.linear2(x) + y = self.activation2(y) + return x, y + + # New model, where some outputs are cut + class CutModel(torch.nn.Module): + def __init__(self): + super(CutModel, self).__init__() + self.model = Model() + + def forward(self, x): + + # get first output + x, _ = self.model(x) + + return x + + # Model with output cut + cut_model = CutModel() + + # convert and compile the model + ov_model = ov.convert_model(cut_model, input=([-1,-1,-1], torch.float32)) + compiled_model = ov.compile_model(ov_model) + + +``TensorFlow protobuf format / tf.Graph / tf.GraphDef`` +####################################################### + +Legacy API. + +.. code-block:: py + :force: + + import openvino as ov + import openvino.tools.mo as mo + + import tensorflow as tf + + def load_graph(model_path): + graph_def = tf.compat.v1.GraphDef() + with open(model_path, "rb") as f: + graph_def.ParseFromString(f.read()) + with tf.compat.v1.Graph().as_default() as graph: + tf.graph_util.import_graph_def(graph_def, name="") + return graph + + # Load TF model + graph = load_graph("/path_to_model/HugeCTR.pb") + + # Convert the model with input and output cut + input_name = "concat" + output_name = "MatVec_3/Squeeze" + ov_model = mo.convert_model(graph, input=(input_name, [-1, -1]), output=output_name) + + # Compile the model + compiled_model = ov.compile_model(ov_model) + +Model cut in original FW. + +.. code-block:: py + :force: + + import openvino as ov + import tensorflow as tf + + from tensorflow.python.tools.strip_unused_lib import strip_unused + + def load_graph(model_path): + graph_def = tf.compat.v1.GraphDef() + with open(model_path, "rb") as f: + graph_def.ParseFromString(f.read()) + with tf.compat.v1.Graph().as_default() as graph: + tf.graph_util.import_graph_def(graph_def, name="") + return graph + + # Load TF model + graph = load_graph("/path_to_model/HugeCTR.pb") + + # Cut the model + input_name = "concat" + output_name = "MatVec_3/Squeeze" + graph_def = graph.as_graph_def() + new_graph_def = strip_unused(graph_def, [input_name], [output_name], tf.float32.as_datatype_enum) + + # Convert and compile model + ov_model = ov.convert_model(new_graph_def, input=[-1, -1]) + cmp_model = ov.compile_model(ov_model) + + +``TensorFlow SavedModel format`` +################################ + +Model cut for SavedModel format is not available in legacy API. + +Example of model cut in original FW. + +.. code-block:: py + :force: + + import openvino as ov + import tensorflow_hub as hub + + import tensorflow as tf + from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 + from tensorflow.python.tools.strip_unused_lib import strip_unused + + # Load TF model + model = hub.load("https://tfhub.dev/svampeatlas/vision/embedder/fungi_V2/1?tf-hub-format=compressed") + + # Convert model to GraphDef + model_func = model.signatures["default"] + frozen_func = convert_variables_to_constants_v2(model_func) + graph_def = frozen_func.graph.as_graph_def() + + # Cut the model + input_name = 'InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu' + output_name = 'InceptionV4/InceptionV4/Mixed_7c/concat' + new_graph_def = strip_unused(graph_def, [input_name], [output_name], tf.float32.as_datatype_enum) + + # Convert and compile the model + ov_model = ov.convert_model(new_graph_def) + compiled_model = ov.compile_model(ov_model) + + +``ONNX`` +######## + + +Legacy API. + +.. code-block:: py + :force: + + import openvino as ov + import openvino.tools.mo as mo + + input_path = "/path_to_model/yolov8x.onnx" + + # Convert model and perform input and output cut + input_name = "/model.2/Concat_output_0" + output_name = "/model.22/Concat_3_output_0" + ov_model = mo.convert_model(input_path, input=input_name, output=output_name) + + # Compile model + ov.compile_model(ov_model) + +Model cut in original FW. + +.. code-block:: py + :force: + + import onnx + import openvino as ov + + input_path = "/path_to_model/yolov8x.onnx" + + # Cut the model + input_name = "/model.2/Concat_output_0" + output_name = "/model.22/Concat_3_output_0" + cut_model_path = "/path_to_model/yolov8x_cut.onnx" + onnx.utils.extract_model(input_path, cut_model_path, [input_name], [output_name]) + + # Convert model + ov_model = ov.convert_model(cut_model_path) + + # Compile model + ov.compile_model(ov_model) + + +Supported Frameworks in MO vs OVC +################################# + +ov.convert_model() and OVC tool support conversion from PyTorch, TF, TF Lite, ONNX, PaddlePaddle. + + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api.rst new file mode 100644 index 00000000000000..5302c7912995f6 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api.rst @@ -0,0 +1,188 @@ +Legacy Conversion API +===================== + + +.. toctree:: + :maxdepth: 1 + :hidden: + + Setting Input Shapes + Troubleshooting Reshape Errors + Cutting Off Parts of a Model + Embedding Preprocessing Computation + Compressing a Model to FP16 + Convert Models Represented as Python Objects + Model Optimizer Frequently Asked Questions + Supported Model Formats + +.. meta:: + :description: Model conversion (MO) furthers the transition between training and + deployment environments, it adjusts deep learning models for + optimal execution on target devices. + +.. note:: + This part of the documentation describes a legacy approach to model conversion. Starting with OpenVINO 2023.1, a simpler alternative API for model conversion is available: ``openvino.convert_model`` and OpenVINO Model Converter ``ovc`` CLI tool. Refer to :doc:`Model preparation <../../../openvino-workflow/model-preparation>` for more details. If you are still using `openvino.tools.mo.convert_model` or `mo` CLI tool, you can still refer to this documentation. However, consider checking the :doc:`transition guide <../transition-legacy-conversion-api>` to learn how to migrate from the legacy conversion API to the new one. Depending on the model topology, the new API can be a better option for you. + +To convert a model to OpenVINO model format (``ov.Model``), you can use the following command: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model(INPUT_MODEL) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model INPUT_MODEL + + +If the out-of-the-box conversion (only the ``input_model`` parameter is specified) is not successful, use the parameters mentioned below to override input shapes and cut the model: + +- ``input`` and ``input_shape`` - the model conversion API parameters used to override original input shapes for model conversion, + + For more information about the parameters, refer to the :doc:`Setting Input Shapes ` guide. + +- ``input`` and ``output`` - the model conversion API parameters used to define new inputs and outputs of the converted model to cut off unwanted parts (such as unsupported operations and training sub-graphs), + + For a more detailed description, refer to the :doc:`Cutting Off Parts of a Model ` guide. + +- ``mean_values``, ``scales_values``, ``layout`` - the parameters used to insert additional input pre-processing sub-graphs into the converted model, + + For more details, see the :doc:`Embedding Preprocessing Computation ` article. + +- ``compress_to_fp16`` - a compression parameter in ``mo`` command-line tool, which allows generating IR with constants (for example, weights for convolutions and matrix multiplications) compressed to ``FP16`` data type. + + For more details, refer to the :doc:`Compression of a Model to FP16 ` guide. + +To get the full list of conversion parameters, run the following command: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model(help=True) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --help + + +Examples of model conversion parameters +####################################### + +Below is a list of separate examples for different frameworks and model conversion parameters: + +1. Launch model conversion for a TensorFlow MobileNet model in the binary protobuf format: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("MobileNet.pb") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model MobileNet.pb + + + Launch model conversion for a TensorFlow BERT model in the SavedModel format with three inputs. Specify input shapes explicitly where the batch size and the sequence length equal 2 and 30 respectively: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("BERT", input_shape=[[2,30],[2,30],[2,30]]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --saved_model_dir BERT --input_shape [2,30],[2,30],[2,30] + + + For more information, refer to the :doc:`Converting a TensorFlow Model ` guide. + +2. Launch model conversion for an ONNX OCR model and specify new output explicitly: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("ocr.onnx", output="probabilities") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model ocr.onnx --output probabilities + + + For more information, refer to the :doc:`Converting an ONNX Model ` guide. + + .. note:: + + PyTorch models must be exported to the ONNX format before conversion into IR. More information can be found in :doc:`Converting a PyTorch Model `. + +3. Launch model conversion for a PaddlePaddle UNet model and apply mean-scale normalization to the input: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("unet.pdmodel", mean_values=[123,117,104], scale=255) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255 + + + For more information, refer to the :doc:`Converting a PaddlePaddle Model ` guide. + +- To get conversion recipes for specific TensorFlow, ONNX, and PyTorch models, refer to the :doc:`Model Conversion Tutorials `. +- For more information about IR, see :doc:`Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™ <../../openvino-ir-format/operation-sets>`. +- For more information about support of neural network models trained with various frameworks, see :doc:`OpenVINO Extensibility Mechanism <../../openvino-extensibility>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-compressing-model-to-fp16.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-compressing-model-to-fp16.rst new file mode 100644 index 00000000000000..c9e93036a3a7c2 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-compressing-model-to-fp16.rst @@ -0,0 +1,53 @@ +[LEGACY] Compressing a Model to FP16 +============================================= + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Conversion Parameters <../../../../openvino-workflow/model-preparation/conversion-parameters>` article. + +By default, when IR is saved all relevant floating-point weights are compressed to ``FP16`` data type during model conversion. +It results in creating a "compressed ``FP16`` model", which occupies about half of +the original space in the file system. The compression may introduce a minor drop in accuracy, +but it is negligible for most models. +In case if accuracy drop is significant user can disable compression explicitly. + +To disable compression, use the ``compress_to_fp16=False`` option: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.runtime import save_model + ov_model = save_model(INPUT_MODEL, compress_to_fp16=False) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model INPUT_MODEL --compress_to_fp16=False + + +For details on how plugins handle compressed ``FP16`` models, see +:doc:`Inference Devices and Modes <../../../../openvino-workflow/running-inference/inference-devices-and-modes>`. + +.. note:: + + ``FP16`` compression is sometimes used as the initial step for ``INT8`` quantization. + Refer to the :doc:`Post-training optimization <../../../../openvino-workflow/model-optimization-guide/quantizing-models-post-training>` guide for more + information about that. + + +.. note:: + + Some large models (larger than a few GB) when compressed to ``FP16`` may consume an overly large amount of RAM on the loading + phase of the inference. If that is the case for your model, try to convert it without compression: + ``convert_model(INPUT_MODEL, compress_to_fp16=False)`` or ``convert_model(INPUT_MODEL)`` + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-convert-models-as-python-objects.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-convert-models-as-python-objects.rst new file mode 100644 index 00000000000000..4921dc6bfa221f --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-convert-models-as-python-objects.rst @@ -0,0 +1,150 @@ +[LEGACY] Convert Models Represented as Python Objects +============================================================= + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Model Preparation <../../../../openvino-workflow/model-preparation>` article. + +Model conversion API is represented by ``convert_model()`` method in openvino.tools.mo namespace. ``convert_model()`` is compatible with types from openvino.runtime, like PartialShape, Layout, Type, etc. + +``convert_model()`` has the ability available from the command-line tool, plus the ability to pass Python model objects, such as a PyTorch model or TensorFlow Keras model directly, without saving them into files and without leaving the training environment (Jupyter Notebook or training scripts). In addition to input models consumed directly from Python, ``convert_model`` can take OpenVINO extension objects constructed directly in Python for easier conversion of operations that are not supported in OpenVINO. + +.. note:: + + Model conversion can be performed only when you install + :doc:`the development tools <../../../legacy-features/install-dev-tools>`, which provide + both the ``convert_model()`` method and ``mo`` command-line tool. + The functionality from this article is applicable for ``convert_model()`` only and it is + not present in command-line tool. + + +``convert_model()`` returns an openvino.runtime.Model object which can be compiled and inferred or serialized to IR. + +Example of converting a PyTorch model directly from memory: + +.. code-block:: py + :force: + + import torchvision + from openvino.tools.mo import convert_model + + model = torchvision.models.resnet50(weights='DEFAULT') + ov_model = convert_model(model) + +The following types are supported as an input model for ``convert_model()``: + +* PyTorch - ``torch.nn.Module``, ``torch.jit.ScriptModule``, ``torch.jit.ScriptFunction``. Refer to the :doc:`Converting a PyTorch Model <[legacy]-supported-model-formats/[legacy]-convert-pytorch>` article for more details. +* TensorFlow / TensorFlow 2 / Keras - ``tf.keras.Model``, ``tf.keras.layers.Layer``, ``tf.compat.v1.Graph``, ``tf.compat.v1.GraphDef``, ``tf.Module``, ``tf.function``, ``tf.compat.v1.session``, ``tf.train.checkpoint``. Refer to the :doc:`Converting a TensorFlow Model <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>` article for more details. + +``convert_model()`` accepts all parameters available in the MO command-line tool. Parameters can be specified by Python classes or string analogs, similar to the command-line tool. + +Example of using native Python classes to set ``input_shape``, ``mean_values`` and ``layout``: + +.. code-block:: py + :force: + + from openvino.runtime import PartialShape, Layout + from openvino.tools.mo import convert_model + + ov_model = convert_model(model, input_shape=PartialShape([1,3,100,100]), mean_values=[127, 127, 127], layout=Layout("NCHW")) + +Example of using strings for setting ``input_shape``, ``mean_values`` and ``layout``: + +.. code-block:: py + :force: + + from openvino.runtime import Layout + from openvino.tools.mo import convert_model + + ov_model = convert_model(model, input_shape="[1,3,100,100]", mean_values="[127,127,127]", layout="NCHW") + + +The ``input`` parameter can be set by a ``tuple`` with a name, shape, and type. The input name of the type string is required in the tuple. The shape and type are optional. +The shape can be a ``list`` or ``tuple`` of dimensions (``int`` or ``openvino.runtime.Dimension``), or ``openvino.runtime.PartialShape``, or ``openvino.runtime.Shape``. The type can be of numpy type or ``openvino.runtime.Type``. + +Example of using a tuple in the ``input`` parameter to cut a model: + +.. code-block:: py + :force: + + from openvino.tools.mo import convert_model + + ov_model = convert_model(model, input=("input_name", [3], np.float32)) + +For complex cases, when a value needs to be set in the ``input`` parameter, the ``InputCutInfo`` class can be used. ``InputCutInfo`` accepts four parameters: ``name``, ``shape``, ``type``, and ``value``. + +``InputCutInfo("input_name", [3], np.float32, [0.5, 2.1, 3.4])`` is equivalent of ``InputCutInfo(name="input_name", shape=[3], type=np.float32, value=[0.5, 2.1, 3.4])``. + +Supported types for ``InputCutInfo``: + +* name: ``string``. +* shape: ``list`` or ``tuple`` of dimensions (``int`` or ``openvino.runtime.Dimension``), ``openvino.runtime.PartialShape``, ``openvino.runtime.Shape``. +* type: ``numpy type``, ``openvino.runtime.Type``. +* value: ``numpy.ndarray``, ``list`` of numeric values, ``bool``. + +Example of using ``InputCutInfo`` to freeze an input with value: + +.. code-block:: py + :force: + + from openvino.tools.mo import convert_model, InputCutInfo + + ov_model = convert_model(model, input=InputCutInfo("input_name", [3], np.float32, [0.5, 2.1, 3.4])) + +To set parameters for models with multiple inputs, use ``list`` of parameters. +Parameters supporting ``list``: + +* input +* input_shape +* layout +* source_layout +* dest_layout +* mean_values +* scale_values + +Example of using lists to set shapes, types and layout for multiple inputs: + +.. code-block:: py + :force: + + from openvino.runtime import Layout + from openvino.tools.mo import convert_model, LayoutMap + + ov_model = convert_model(model, input=[("input1", [1,3,100,100], np.float32), ("input2", [1,3,100,100], np.float32)], layout=[Layout("NCHW"), LayoutMap("NCHW", "NHWC")]) + +``layout``, ``source_layout`` and ``dest_layout`` accept an ``openvino.runtime.Layout`` object or ``string``. + +Example of using the ``Layout`` class to set the layout of a model input: + +.. code-block:: py + :force: + + from openvino.runtime import Layout + from openvino.tools.mo import convert_model + + ov_model = convert_model(model, source_layout=Layout("NCHW")) + +To set both source and destination layouts in the ``layout`` parameter, use the ``LayoutMap`` class. ``LayoutMap`` accepts two parameters: ``source_layout`` and ``target_layout``. + +``LayoutMap("NCHW", "NHWC")`` is equivalent to ``LayoutMap(source_layout="NCHW", target_layout="NHWC")``. + +Example of using the ``LayoutMap`` class to change the layout of a model input: + +.. code-block:: py + :force: + + from openvino.tools.mo import convert_model, LayoutMap + + ov_model = convert_model(model, layout=LayoutMap("NCHW", "NHWC")) + +Example of using the ``serialize`` method to save the converted model to OpenVINO IR: + +.. code-block:: py + :force: + + from openvino.runtime import serialize + + serialize(ov_model, "model.xml") + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-cutting-parts-of-a-model.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-cutting-parts-of-a-model.rst new file mode 100644 index 00000000000000..0406602a6e51fa --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-cutting-parts-of-a-model.rst @@ -0,0 +1,585 @@ +[LEGACY] Cutting Off Parts of a Model +================================================ + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + +Sometimes, it is necessary to remove parts of a model when converting it to OpenVINO IR. This chapter describes how to do it, using model conversion API parameters. Model cutting applies mostly to TensorFlow models, which is why TensorFlow will be used in this chapter's examples, but it may be also useful for other frameworks. + +Purpose of Model Cutting +######################## + +The following examples are the situations when model cutting is useful or even required: + +* A model has pre- or post-processing parts that cannot be translated to existing OpenVINO operations. +* A model has a training part that is convenient to be kept in the model but not used during inference. +* A model is too complex be converted at once, because it contains a lot of unsupported operations that cannot be easily implemented as custom layers. +* A problem occurs with model conversion or inference in OpenVINO™ Runtime. To identify the issue, limit the conversion scope by iterative search for problematic areas in the model. +* A single custom layer or a combination of custom layers is isolated for debugging purposes. + +.. note:: + + Internally, when you run model conversion API, it loads the model, goes through the topology, and tries to find each layer type in a list of known layers. Custom layers are layers that are not included in the list. If your topology contains such kind of layers, model conversion API classifies them as custom. + +Model conversion API parameters +############################### + +Model conversion API provides ``input`` and ``output`` command-line options to specify new entry and exit nodes, while ignoring the rest of the model: + +* ``input`` option accepts a list of layer names of the input model that should be treated as new entry points to the model. See the full list of accepted types for input on :doc:`Model Conversion Python API <[legacy]-convert-models-as-python-objects>` page. +* ``output`` option accepts a list of layer names of the input model that should be treated as new exit points from the model. + +The ``input`` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and ``input_shape`` or ``mean_values`` options are used, the ``input`` option specifies the order of input nodes for correct mapping between multiple items provided in ``input_shape`` and ``mean_values`` and the inputs in the model. + +Model cutting is illustrated with the Inception V1 model, found in the ``models/research/slim`` repository. To proceed with this chapter, make sure you do the necessary steps to :doc:`prepare the model for model conversion <[legacy]-setting-input-shapes>`. + +Default Behavior without input and output +######################################### + +The input model is converted as a whole if neither ``input`` nor ``output`` command line options are used. All ``Placeholder`` operations in a TensorFlow graph are automatically identified as entry points. The ``Input`` layer type is generated for each of them. All nodes that have no consumers are automatically identified as exit points. + +For Inception_V1, there is one ``Placeholder``: input. If the model is viewed in TensorBoard, the input operation is easy to find: + +.. image:: ../../../../assets/images/inception_v1_std_input.svg + :alt: Placeholder in Inception V1 + +``Reshape`` is the only output operation, which is enclosed in a nested name scope of ``InceptionV1/Logits/Predictions``, under the full name of ``InceptionV1/Logits/Predictions/Reshape_1``. + +In TensorBoard, along with some of its predecessors, it looks as follows: + +.. image:: ../../../../assets/images/inception_v1_std_output.svg + :alt: TensorBoard with predecessors + +Convert this model to ``ov.Model``: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --output_dir + + +``ov.Model`` can be serialized with the ``ov.serialize()`` method to Intermediate Representation which can be used for model structure exploring. +In IR, the structure of a model has the following layers: + +.. code-block:: xml + :force: + + + + + 1 + 3 + 224 + 224 + + + + + +The ``input`` layer is converted from the TensorFlow graph ``Placeholder`` operation ``input`` and has the same name. + +The ``-b`` option is used here for conversion to override a possible undefined batch size (coded as -1 in TensorFlow models). If a model was frozen with a defined batch size, you may omit this option in all the examples. + +The last layer in the model is ``InceptionV1/Logits/Predictions/Reshape_1``, which matches an output operation in the TensorFlow graph: + +.. code-block:: xml + :force: + + + + + + 1 + 1001 + + + + + 1 + 1001 + + + + + +Due to automatic identification of inputs and outputs, providing the ``input`` and ``output`` options to convert the whole model is not required. The following commands are equivalent for the Inception V1 model: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1) + + ov_model = convert_model("inception_v1.pb", batch=1, input="input", output="InceptionV1/Logits/Predictions/Reshape_1") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --output_dir + + mo --input_model inception_v1.pb -b 1 --input input --output InceptionV1/Logits/Predictions/Reshape_1 --output_dir + + +The Intermediate Representations are identical for both conversions. The same is true if the model has multiple inputs and/or outputs. + +Model Cutting +#################### + +Now, consider how to cut some parts of the model off. This chapter describes the first convolution block ``InceptionV1/InceptionV1/Conv2d_1a_7x7`` of the Inception V1 model to illustrate cutting: + +.. image:: ../../../../assets/images/inception_v1_first_block.svg + :alt: Inception V1 first convolution block + +Cutting at the End +++++++++++++++++++++ + +If you want to cut your model at the end, you have the following options: + +1. The following command cuts off the rest of the model after the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu``, making this node the last in the model: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir + + + The resulting Intermediate Representation has three layers: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + + ... + + + ... + + + + + + + + + ... + + + ... + + + + + + + + + + + As shown in the TensorBoard picture, the original model has more nodes than its Intermediate Representation. Model conversion, using ``convert_model()``, consists of a set of model transformations, including fusing of batch normalization ``InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm`` with convolution ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution``, which is why it is not present in the final model. This is not an effect of the ``output`` option, it is the typical behavior of model conversion API for batch normalizations and convolutions. The effect of the ``output`` is that the ``ReLU`` layer becomes the last one in the converted model. + +2. The following command cuts the edge that comes from 0 output port of the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` and the rest of the model, making this node the last one in the model: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0 --output_dir + + + The resulting Intermediate Representation has three layers, which are the same as in the previous case: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + + ... + + + ... + + + + + + + + + ... + + + ... + + + + + + + + + + + This type of cutting is useful for cutting multiple output edges. + +3. The following command cuts the edge that comes to 0 input port of the ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` and the rest of the model including ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu``, deleting this node and making the previous node ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D`` the last in the model: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, output="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --output=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir + + + The resulting Intermediate Representation has two layers, which are the same as the first two layers in the previous case: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + + ... + + + ... + + + + + + + + + + + + + +Cutting from the Beginning +++++++++++++++++++++++++++ + +If you want to go further and cut the beginning of the model, leaving only the ``ReLU`` layer, you have the following options: + +1. Use the following parameters, where ``input`` and ``output`` specify the same node in the graph: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu", input="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model=inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir + + + The resulting Intermediate Representation looks as follows: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + ... + + + ... + + + + + + + + + + ``Input`` layer is automatically created to feed the layer that is converted from the node specified in ``input``, which is ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` in this case. ``convert_model()`` does not replace the ``ReLU`` node by the ``Input`` layer. It produces such ``ov.Model`` to make the node the first executable node in the final Intermediate Representation. Therefore, model conversion creates enough ``Inputs`` to feed all input ports of the node that is passed in ``input``. + + Even though ``input_shape`` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape ``[1,64,112,112]`` as the model converted as a whole or without cutting off the beginning. + +2. Cut the edge incoming to layer by port number. To specify the incoming port, use the following notation ``input=port:input_node``. To cut everything before ``ReLU`` layer, cut the edge incoming to port 0 of ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` node: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, input="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu", output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir + + + The resulting Intermediate Representation looks as follows: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + ... + + + ... + + + + + + + + + + ``Input`` layer is automatically created to feed the layer that is converted from the node specified in ``input``, which is ``InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`` in this case. ``convert_model()`` does not replace the ``ReLU`` node by the ``Input`` layer, it produces such ``ov.Model`` to make the node be the first executable node in the final Intermediate Representation. Therefore, ``convert_model()`` creates enough ``Inputs`` to feed all input ports of the node that is passed in ``input``. + + Even though ``input_shape`` is not specified in the command line, the shapes for layers are inferred from the beginning of the original TensorFlow model to the point, at which the new input is defined. It has the same shape ``[1,64,112,112]`` as the model converted as a whole or without cutting off the beginning. + +3. Cut edge outcoming from layer by port number. To specify the outcoming port, use the following notation ``input=input_node:port``. To cut everything before ``ReLU`` layer, cut edge from ``InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1`` node to ``ReLU``: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, input="InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0", output="InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir + + + The resulting Intermediate Representation looks as follows: + + .. code-block:: xml + :force: + + + + + + + ... + + + + + ... + + + ... + + layer> + + + + + + + +Inputs with Multiple Input Ports +################################ + +There are operations that contain more than one input port. In the example considered here, the convolution ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`` is such operation. When ``input_shape`` is not provided, a new ``Input`` layer is created for each dynamic input port for the node. If a port is evaluated to a constant blob, this constant remains in the model and a corresponding input layer is not created. TensorFlow convolution used in this model contains two ports: + +* port 0: input tensor for convolution (dynamic) +* port 1: convolution weights (constant) + +Following this behavior, ``convert_model()`` creates an ``Input`` layer for port 0 only, leaving port 1 as a constant. Thus, the result of: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", batch=1, input="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --output_dir + + +is identical to the result of conversion of the model as a whole, because this convolution is the first executable operation in Inception V1. + +Different behavior occurs when ``input_shape`` is also used as an attempt to override the input shape: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", input="InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution", input_shape=[1,224,224,3]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb--input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape [1,224,224,3] --output_dir + + +An error occurs (for more information, see the :ref:`Model Conversion FAQ `): + +.. code-block:: sh + + [ ERROR ] Node InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution has more than 1 input and input shapes were provided. + Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer. + For more information, see FAQ #30 + +When ``input_shape`` is specified and the node contains multiple input ports, you need to provide an input port index together with an input node name. The input port index is specified in front of the node name with ``‘:’`` as a separator (``PORT:NODE``). In this case, the port index 0 of the node ``InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution`` should be specified as ``0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution``. + +The correct command line is: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("inception_v1.pb", input="0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution", input_shape=[1,224,224,3]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model inception_v1.pb --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3] --output_dir + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation.rst new file mode 100644 index 00000000000000..1e1fe61e717eb3 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation.rst @@ -0,0 +1,253 @@ +[LEGACY] Embedding Preprocessing Computation +===================================================== + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Conversion Parameters <../../../../openvino-workflow/model-preparation/conversion-parameters>` article. + +Input data for inference can be different from the training dataset and requires +additional preprocessing before inference. To accelerate the whole pipeline including +preprocessing and inference, model conversion API provides special parameters such as ``mean_values``, +``scale_values``, ``reverse_input_channels``, and ``layout``. + +Based on these parameters, model conversion API generates OpenVINO IR with additionally inserted sub-graphs +to perform the defined preprocessing. This preprocessing block can perform mean-scale +normalization of input data, reverting data along channel dimension, and changing +the data layout. See the following sections for details on the parameters, or the +:doc:`Overview of Preprocessing API <../../../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` +for the same functionality in OpenVINO Runtime. + +Specifying Layout +################# + +You may need to set input layouts, as it is required by some preprocessing, for +example, setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB). + +Layout defines the meaning of dimensions in shape and can be specified for both +inputs and outputs. Some preprocessing requires to set input layouts, for example, +setting a batch, applying mean or scales, and reversing input channels (BGR<->RGB). + +For the layout syntax, check the :doc:`Layout API overview <../../../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview>`. +To specify the layout, you can use the ``layout`` option followed by the layout value. + +For example, the following command specifies the ``NHWC`` layout for a Tensorflow +``nasnet_large`` model that was exported to the ONNX format: + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("tf_nasnet_large.onnx", layout="nhwc") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model tf_nasnet_large.onnx --layout nhwc + + +Additionally, if a model has more than one input or needs both input and output +layouts specified, you need to provide the name of each input or output to apply the layout. + +For example, the following command specifies the layout for an ONNX ``Yolo v3 Tiny`` +model with its first input ``input_1`` in ``NCHW`` layout and second input ``image_shape`` +having two dimensions: batch and size of the image expressed as the ``N?`` layout: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("yolov3-tiny.onnx", layout={"input_1": "nchw", "image_shape": "n?"}) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model yolov3-tiny.onnx --layout input_1(nchw),image_shape(n?) + + +Changing Model Layout +##################### + +Changing the model layout may be necessary if it differs from the one presented by input data. +Use either ``layout`` or ``source_layout`` with ``target_layout`` to change the layout. + +For example, for the same ``nasnet_large`` model mentioned previously, you can use +the following commands to provide data in the ``NCHW`` layout: + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("tf_nasnet_large.onnx", source_layout="nhwc", target_layout="nchw") + + ov_model = convert_model("tf_nasnet_large.onnx", layout="nhwc->nchw") + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model tf_nasnet_large.onnx --source_layout nhwc --target_layout nchw + + mo --input_model tf_nasnet_large.onnx --layout "nhwc->nchw" + + +Again, if a model has more than one input or needs both input and output layouts +specified, you need to provide the name of each input or output to apply the layout. + +For example, to provide data in the ``NHWC`` layout for the `Yolo v3 Tiny` model +mentioned earlier, use the following commands: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("yolov3-tiny.onnx", source_layout={"input_1": "nchw", "image_shape": "n?"}, target_layout={"input_1": "nhwc"}) + + ov_model = convert_model("yolov3-tiny.onnx", layout={"input_1": "nchw->nhwc", "image_shape": "n?"} + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model yolov3-tiny.onnx --source_layout "input_1(nchw),image_shape(n?)" --target_layout "input_1(nhwc)" + + mo --input_model yolov3-tiny.onnx --layout "input_1(nchw->nhwc),image_shape(n?)" + + +Specifying Mean and Scale Values +################################ + +Neural network models are usually trained with the normalized input data. This +means that the input data values are converted to be in a specific range, for example, +``[0, 1]`` or ``[-1, 1]``. Sometimes, the mean values (mean images) are subtracted +from the input data values as part of the preprocessing. + +There are two cases of how the input data preprocessing is implemented. + +* The input preprocessing operations are a part of a model. + + In this case, the application does not perform a separate preprocessing step: + everything is embedded into the model itself. ``convert_model()`` will generate the + ov.Model with required preprocessing operations, and no ``mean`` and + ``scale`` parameters are required. +* The input preprocessing operations are not a part of a model and the preprocessing + is performed within the application which feeds the model with input data. + + In this case, information about mean/scale values should be provided to ``convert_model()`` + to embed it to the generated ``ov.Model``. + +Model conversion API represented by ``convert_model()`` provides command-line parameters +to specify the values: ``mean_values``, ``scale_values``, ``scale``. Using these parameters, +model conversion API embeds the corresponding preprocessing block for mean-value +normalization of the input data and optimizes this block so that the preprocessing +takes negligible time for inference. + +For example, the following command runs model conversion for the PaddlePaddle UNet +model and applies mean-scale normalization to the input data: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("unet.pdmodel", mean_values=[123,117,104], scale=255) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255 + + +Reversing Input Channels +######################## + +Sometimes, input images for your application can be of the RGB (or BGR) format +and the model is trained on images of the BGR (or RGB) format, which is in the +opposite order of color channels. In this case, it is important to preprocess the +input images by reverting the color channels before inference. + +To embed this preprocessing step into ``ov.Model``, model conversion API provides the +``reverse_input_channels`` command-line parameter to shuffle the color channels. + +The ``reverse_input_channels`` parameter can be used to preprocess the model +input in the following cases: + +* Only one dimension in the input shape has a size equal to ``3``. +* One dimension has an undefined size and is marked as ``C`` channel using ``layout`` parameters. + +Using the ``reverse_input_channels`` parameter, model conversion API embeds the corresponding +preprocessing block for reverting the input data along channel dimension and optimizes +this block so that the preprocessing takes only negligible time for inference. + +For example, the following command launches model conversion for the TensorFlow AlexNet +model and embeds the ``reverse_input_channel`` preprocessing block into OpenVINO IR: + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("alexnet.pb", reverse_input_channels=True) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model alexnet.pb --reverse_input_channels + + +.. note:: + + If both mean and scale values are specified, the mean is subtracted first and + then the scale is applied regardless of the order of options in the command-line. + Input values are *divided* by the scale value(s). If the ``reverse_input_channels`` + option is also used, ``reverse_input_channels`` will be applied first, then ``mean`` + and after that ``scale``. The data flow in the model looks as follows: + ``Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model``. + +Additional Resources +#################### + +* :doc:`Overview of Preprocessing API <../../../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-model-optimizer-faq.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-model-optimizer-faq.rst new file mode 100644 index 00000000000000..f035101d715e9b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-model-optimizer-faq.rst @@ -0,0 +1,947 @@ +[LEGACY] Model Optimizer Frequently Asked Questions +=========================================================== + + +.. important:: + + All of the issues below refer to :doc:`legacy functionalities <../legacy-model-optimizer-extensibility>`. + +If your question is not covered by the topics below, use the +`OpenVINO Support page `__, +where you can participate in a free forum discussion. + +.. warning:: + + Note that OpenVINO support for Apache MXNet, Caffe, and Kaldi has been discontinued. + +.. _question-1: + +Q1. What does the message "[ ERROR ]: Current caffe.proto does not contain field" mean? +##################################################################################################################################################### + +**A:** Internally, Model Optimizer uses a protobuf library to parse and load Caffe models. This library requires a file grammar and a generated parser. For a Caffe fallback, Model Optimizer uses a Caffe-generated parser for a Caffe-specific ``.proto`` file (which is usually located in the ``src/caffe/proto`` directory). Make sure that you install exactly the same version of Caffe (with Python interface) as that was used to create the model. + +If you just want to experiment with Model Optimizer and test a Python extension for working with your custom +layers without building Caffe, add the layer description to the ``caffe.proto`` file and generate a parser for it. + +For example, to add the description of the ``CustomReshape`` layer, which is an artificial layer not present in any ``caffe.proto`` files: + +1. Add the following lines to the ``caffe.proto`` file: + + .. code-block:: shell + + package mo_caffe; // To avoid conflict with Caffe system, it is highly recommended to specify different package name. + ... + message LayerParameter { + // Other layers parameters description. + ... + optional CustomReshapeParameter custom_reshape_param = 546; // 546 - ID is any number not present in caffe.proto. + } + // The lines from here to the end of the file are describing contents of this parameter. + message CustomReshapeParameter { + optional BlobShape shape = 1; // Just use the same parameter type as some other Caffe layers. + } + + +2. Generate a new parser: + + .. code-block:: shell + + cd /openvino/tools/mo/front/caffe/proto + python3 generate_caffe_pb2.py --input_proto /src/caffe/proto/caffe.proto + + + where ``PATH_TO_CUSTOM_CAFFE`` is the path to the root directory of custom Caffe. + +3. Now, Model Optimizer is able to load the model into memory and start working with your extensions if there are any. + + However, since your model has custom layers, you must register them as custom. To learn more about it, refer to the :doc:`[Legacy] Custom Layers in Model Optimizer <../legacy-model-optimizer-extensibility>`. + +.. _question-2: + +Q2. How do I create a bare caffemodel, if I have only prototxt? +##################################################################################################################################################### + +**A:** You need the Caffe Python interface. In this case, do the following: + +.. code-block:: shell + + python3 + import caffe + net = caffe.Net('/my_net.prototxt', caffe.TEST) + net.save('/my_net.caffemodel') + + +.. _question-3: + +Q3. What does the message "[ ERROR ]: Unable to create ports for node with id" mean? +##################################################################################################################################################### + +**A:** Most likely, Model Optimizer does not know how to infer output shapes of some layers in the given topology. +To lessen the scope, compile the list of layers that are custom for Model Optimizer: present in the topology, +absent in the :doc:`list of supported operations <../../../../about-openvino/compatibility-and-support/supported-operations>` for the target framework. +Then, refer to available options in the corresponding section in the :doc:`[Legacy] Custom Layers in Model Optimizer <../legacy-model-optimizer-extensibility>` page. + +.. _question-7: + +Q7. What does the message "Invalid proto file: there is neither 'layer' nor 'layers' top-level messages" mean? +##################################################################################################################################################### + +**A:** The structure of any Caffe topology is described in the ``caffe.proto`` file of any Caffe version. For example, the following ``.proto`` file in Model Optimizer is used by default: ``mo/front/caffe/proto/my_caffe.proto``, with the structure: + +.. code-block:: sh + + message NetParameter { + // ... some other parameters + // The layers that make up the net. Each of their configurations, including + // connectivity and behavior, is specified as a LayerParameter. + repeated LayerParameter layer = 100; // ID 100 so layers are printed last. + // DEPRECATED: use 'layer' instead. + repeated V1LayerParameter layers = 2; + } + + +This means that any topology should contain layers as top-level structures in ``prototxt``. For example, see the `LeNet topology `__. + +.. _question-8: + +Q8. What does the message "Old-style inputs (via 'input_dims') are not supported. Please specify inputs via 'input_shape'" mean? +##################################################################################################################################################### + +**A:** The structure of any Caffe topology is described in the ``caffe.proto`` file for any Caffe version. For example, the following ``.proto`` file in Model Optimizer is used by default: ``mo/front/caffe/proto/my_caffe.proto``, with the structure: + +.. code-block:: sh + + message NetParameter { + + optional string name = 1; // consider giving the network a name + // DEPRECATED. See InputParameter. The input blobs to the network. + repeated string input = 3; + // DEPRECATED. See InputParameter. The shape of the input blobs. + repeated BlobShape input_shape = 8; + // 4D input dimensions -- deprecated. Use "input_shape" instead. + // If specified, for each input blob there should be four + // values specifying the num, channels, height and width of the input blob. + // Thus, there should be a total of (4 * #input) numbers. + repeated int32 input_dim = 4; + // ... other parameters + } + + +Therefore, the input layer of the provided model must be specified in one of the following styles: + +* + + .. code-block:: sh + + input: "data" + input_shape + { + dim: 1 + dim: 3 + dim: 227 + dim: 227 + } + + +* + + .. code-block:: sh + + input: "data" + input_shape + { + dim: 1 + dim: 3 + dim: 600 + dim: 1000 + } + input: "im_info" + input_shape + { + dim: 1 + dim: 3 + } + +* + + .. code-block:: sh + + layer + { + name: "data" + type: "Input" + top: "data" + input_param {shape: {dim: 1 dim: 3 dim: 600 dim: 1000}} + } + layer + { + name: "im_info" + type: "Input" + top: "im_info" + input_param {shape: {dim: 1 dim: 3}} + } + +* + + .. code-block:: sh + + input: "data" + input_dim: 1 + input_dim: 3 + input_dim: 500 + + +However, if your model contains more than one input, Model Optimizer is able to convert the model with inputs specified in one of the first three forms in the above list. The 4th form is not supported for multi-input topologies. + +.. _question-9: + +Q9. What does the message "Mean file for topologies with multiple inputs is not supported" mean? +##################################################################################################################################################### + +**A:** Model Optimizer does not support mean file processing for topologies with more than one input. In this case, you need to perform preprocessing of the inputs for a generated Intermediate Representation in OpenVINO Runtime to perform subtraction for every input of your multi-input model. See the :doc:`Overview of Preprocessing <../../../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` for details. + +.. _question-11: + +Q11. What does the message "Invalid prototxt file: value error" mean? +##################################################################################################################################################### + +**A:** There are multiple reasons why Model Optimizer does not accept a Caffe topology. See FAQs :ref:`#7 ` and :ref:`#20 `. + +.. _question-12: + +Q12. What does the message "Error happened while constructing caffe.Net in the Caffe fallback function" mean? +##################################################################################################################################################### + +**A:** Model Optimizer tried to infer a specified layer via the Caffe framework. However, it cannot construct a net using the Caffe Python interface. Make sure that your ``caffemodel`` and ``prototxt`` files are correct. To ensure that the problem is not in the ``prototxt`` file, see FAQ :ref:`#2 `. + +.. _question-13: + +Q13. What does the message "Cannot infer shapes due to exception in Caffe" mean? +##################################################################################################################################################### + +**A:** Model Optimizer tried to infer a custom layer via the Caffe framework, but the model could not be inferred using Caffe. This might happen if you try to convert the model with some noise weights and biases, which conflict with layers that have dynamic shapes. You should write your own extension for every custom layer your topology might have. For more details, refer to the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` page. + +.. _question-14: + +Q14. What does the message "Cannot infer shape for node {} because there is no Caffe available. Please register python infer function for op or use Caffe for shape inference" mean? +#################################################################################################################################################################################### + +**A:** Your model contains a custom layer and you have correctly registered it with the ``CustomLayersMapping.xml`` file. These steps are required to offload shape inference of the custom layer with the help of the system Caffe. However, Model Optimizer could not import a Caffe package. Make sure that you have built Caffe with a ``pycaffe`` target and added it to the ``PYTHONPATH`` environment variable. At the same time, it is highly recommended to avoid dependency on Caffe and write your own Model Optimizer extension for your custom layer. For more information, refer to FAQ :ref:`#44 `. + +.. _question-15: + +Q15. What does the message "Framework name can not be deduced from the given options. Use --framework to choose one of Caffe, TensorFlow, MXNet" mean? +###################################################################################################################################################### + +**A:** You have run Model Optimizer without a flag ``--framework caffe|tf``. Model Optimizer tries to deduce the framework by the extension of input model file (``.pb`` for TensorFlow, ``.caffemodel`` for Caffe, ``.params`` for Apache MXNet). Your input model might have a different extension and you need to explicitly set the source framework. For example, use ``--framework caffe``. + +.. _question-16: + +Q16. What does the message "Input shape is required to convert MXNet model. Please provide it with --input_shape" mean? +##################################################################################################################################################### + +**A:** Input shape was not provided. That is mandatory for converting an MXNet model to the OpenVINO Intermediate Representation, because MXNet models do not contain information about input shapes. Use the ``--input_shape`` flag to specify it. For more information about using the ``--input_shape``, refer to FAQ :ref:`#56 `. + +.. _question-17: + +.. _question-18: + +.. _question-19: + +Q19. What does the message "Both --scale and --scale_values are defined. Specify either scale factor or scale values per input channels" mean? +##################################################################################################################################################### + +**A:** The ``--scale`` option sets a scaling factor for all channels, while ``--scale_values`` sets a scaling factor per each channel. Using both of them simultaneously produces ambiguity, so you must use only one of them. For more information, refer to the **Using Framework-Agnostic Conversion Parameters** section: for :doc:`Converting a TensorFlow Model <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>`. + +.. _question-20: + +Q20. What does the message "Cannot find prototxt file: for Caffe please specify --input_proto - a protobuf file that stores topology and --input_model that stores pre-trained weights" mean? +############################################################################################################################################################################################## + +**A:** Model Optimizer cannot find a ``.prototxt`` file for a specified model. By default, it must be located in the same directory as the input model with the same name (except extension). If any of these conditions is not satisfied, use ``--input_proto`` to specify the path to the ``.prototxt`` file. + +.. _question-21: + +.. _question-22: + +Q22. What does the message "Failed to create directory .. . Permission denied!" mean? +##################################################################################################################################################### + +**A:** Model Optimizer cannot create a directory specified via ``--output_dir``. Make sure that you have enough permissions to create the specified directory. + +.. _question-23: + +Q23. What does the message "Discovered data node without inputs and value" mean? +##################################################################################################################################################### + +**A:** One of the layers in the specified topology might not have inputs or values. Make sure that the provided ``caffemodel`` and ``protobuf`` files are correct. + +.. _question-24: + +Q24. What does the message "Part of the nodes was not translated to IE. Stopped" mean? +##################################################################################################################################################### + +**A:** Some of the operations are not supported by OpenVINO Runtime and cannot be translated to OpenVINO Intermediate Representation. You can extend Model Optimizer by allowing generation of new types of operations and implement these operations in the dedicated OpenVINO plugins. For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-25: + +Q25. What does the message "While creating an edge from .. to .. : node name is undefined in the graph. Check correctness of the input model" mean? +##################################################################################################################################################### + +**A:** Model Optimizer cannot build a graph based on a specified model. Most likely, it is incorrect. + +.. _question-26: + +Q26. What does the message "Node does not exist in the graph" mean? +##################################################################################################################################################### + +**A:** You might have specified an output node via the ``--output`` flag that does not exist in a provided model. Make sure that the specified output is correct and this node exists in the current model. + +.. _question-27: + +Q27. What does the message "--input parameter was provided. Other inputs are needed for output computation. Provide more inputs or choose another place to cut the net" mean? +############################################################################################################################################################################## + +**A:** Most likely, Model Optimizer tried to cut the model by a specified input. However, other inputs are needed. + +.. _question-28: + +Q28. What does the message "Placeholder node does not have an input port, but input port was provided" mean? +##################################################################################################################################################### + +**A:** You might have specified a placeholder node with an input node, while the placeholder node does not have it in the model. + +.. _question-29: + +Q29. What does the message "Port index is out of number of available input ports for node" mean? +##################################################################################################################################################### + +**A:** This error occurs when an incorrect input port is specified with the ``--input`` command line argument. When using ``--input``, you may optionally specify an input port in the form: ``X:node_name``, where ``X`` is an integer index of the input port starting from 0 and ``node_name`` is the name of a node in the model. This error occurs when the specified input port ``X`` is not in the range 0..(n-1), where n is the number of input ports for the node. Specify a correct port index, or do not use it if it is not needed. + +.. _question-30: + +Q30. What does the message "Node has more than 1 input and input shapes were provided. Try not to provide input shapes or specify input port with PORT:NODE notation, where PORT is an integer" mean? +###################################################################################################################################################################################################### + +**A:** This error occurs when an incorrect combination of the ``--input`` and ``--input_shape`` command line options is used. Using both ``--input`` and ``--input_shape`` is valid only if ``--input`` points to the ``Placeholder`` node, a node with one input port or ``--input`` has the form ``PORT:NODE``, where ``PORT`` is an integer port index of input for node ``NODE``. Otherwise, the combination of ``--input`` and ``--input_shape`` is incorrect. + + +.. _question-31: + +Q31. What does the message "Input port > 0 in --input is not supported if --input_shape is not provided. Node: NAME_OF_THE_NODE. Omit port index and all input ports will be replaced by placeholders. Or provide --input_shape" mean? +####################################################################################################################################################################################################################################### + +**A:** When using the ``PORT:NODE`` notation for the ``--input`` command line argument and ``PORT`` > 0, you should specify ``--input_shape`` for this input. This is a limitation of the current Model Optimizer implementation. + +.. note:: It is no longer relevant message since the limitation on input port index for model truncation has been resolved. + +.. _question-32: + +Q32. What does the message "No or multiple placeholders in the model, but only one shape is provided, cannot set it" mean? +##################################################################################################################################################### + +**A:** You might have provided only one shape for the placeholder, while there are none or multiple inputs in the model. Make sure that you have provided the correct data for placeholder nodes. + +.. _question-33: + +Q33. What does the message "The amount of input nodes for port is not equal to 1" mean? +##################################################################################################################################################### + +**A:** This error occurs when the ``SubgraphMatch.single_input_node`` function is used for an input port that supplies more than one node in a sub-graph. The ``single_input_node`` function can be used only for ports that has a single consumer inside the matching sub-graph. When multiple nodes are connected to the port, use the ``input_nodes`` function or ``node_by_pattern`` function instead of ``single_input_node``. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions>` guide. + +.. _question-34: + +Q34. What does the message "Output node for port has already been specified" mean? +##################################################################################################################################################### + +**A:** This error occurs when the ``SubgraphMatch._add_output_node`` function is called manually from user's extension code. This is an internal function, and you should not call it directly. + +.. _question-35: + +Q35. What does the message "Unsupported match kind.... Match kinds "points" or "scope" are supported only" mean? +##################################################################################################################################################### + +**A:** While using configuration file to implement a TensorFlow front replacement extension, an incorrect match kind was used. Only ``points`` or ``scope`` match kinds are supported. For more details, refer to the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` guide. + +.. _question-36: + +Q36. What does the message "Cannot write an event file for the TensorBoard to directory" mean? +##################################################################################################################################################### + +**A:** Model Optimizer tried to write an event file in the specified directory but failed to do that. That could happen when the specified directory does not exist or you do not have permissions to write in it. + +.. _question-37: + +Q37. What does the message "There is no registered 'infer' function for node with op = .. . Please implement this function in the extensions" mean? +##################################################################################################################################################### + +**A** Most likely, you tried to extend Model Optimizer with a new primitive, but you did not specify an infer function. For more information on extensions, see the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-38: + +Q38. What does the message "Stopped shape/value propagation at node" mean? +##################################################################################################################################################### + +**A:** Model Optimizer cannot infer shapes or values for the specified node. It can happen because of the following reasons: a bug exists in the custom shape infer function, the node inputs have incorrect values/shapes, or the input shapes are incorrect. + +.. _question-39: + +Q39. What does the message "The input with shape .. does not have the batch dimension" mean? +##################################################################################################################################################### + +**A:** Batch dimension is the first dimension in the shape and it should be equal to 1 or undefined. In your case, it is not either equal to 1 or undefined, which is why the ``-b`` shortcut produces undefined and unspecified behavior. To resolve the issue, specify full shapes for each input with the ``--input_shape`` option. Run Model Optimizer with the ``--help`` option to learn more about the notation for input shapes. + +.. _question-40: + +Q40. What does the message "Not all output shapes were inferred or fully defined for node" mean? +##################################################################################################################################################### + +**A:** Most likely, the shape is not defined (partially or fully) for the specified node. You can use ``--input_shape`` with positive integers to override model input shapes. + +.. _question-41: + +Q41. What does the message "Shape for tensor is not defined. Can not proceed" mean? +##################################################################################################################################################### + +**A:** This error occurs when the ``--input`` command-line option is used to cut a model and ``--input_shape`` is not used to override shapes for a node, so a shape for the node cannot be inferred by Model Optimizer. You need to help Model Optimizer by specifying shapes with ``--input_shape`` for each node specified with the ``--input`` command-line option. + +.. _question-42: + +Q42. What does the message "Module TensorFlow was not found. Please install TensorFlow 1.2 or higher" mean? +##################################################################################################################################################### + +**A:** To convert TensorFlow models with Model Optimizer, TensorFlow 1.2 or newer must be installed. For more information on prerequisites, see the :doc:`Configuring Model Optimizer <../legacy-conversion-api>` guide. + +.. _question-43: + +Q43. What does the message "Cannot read the model file: it is incorrect TensorFlow model file or missing" mean? +##################################################################################################################################################### + +**A:** The model file should contain a frozen TensorFlow graph in the text or binary format. Make sure that ``--input_model_is_text`` is provided for a model in the text format. By default, a model is interpreted as binary file. + +.. _question-44: + +Q44. What does the message "Cannot pre-process TensorFlow graph after reading from model file. File is corrupt or has unsupported format" mean? +##################################################################################################################################################### + +**A:** Most likely, there is a problem with the specified file for the model. The file exists, but it has an invalid format or is corrupted. + +.. _question-45: + +Q45. What does the message "Found custom layer. Model Optimizer does not support this layer. Please, register it in CustomLayersMapping.xml or implement extension" mean? +########################################################################################################################################################################## + +**A:** This means that the layer ``{layer_name}`` is not supported in Model Optimizer. You will find a list of all unsupported layers in the corresponding section. You should implement the extensions for this layer. See :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` for more information. + +.. _question-46: + +Q46. What does the message "Custom replacement configuration file does not exist" mean? +##################################################################################################################################################### + +**A:** A path to the custom replacement configuration file was provided with the ``--transformations_config`` flag, but the file could not be found. Make sure the specified path is correct and the file exists. + +.. _question-47: + +Q47. What does the message "Extractors collection have case insensitive duplicates" mean? +##################################################################################################################################################### + +**A:** When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-48: + +Q48. What does the message "Input model name is not in an expected format, cannot extract iteration number" mean? +##################################################################################################################################################### + +**A:** Model Optimizer cannot load an MXNet model in the specified file format. Make sure you use the ``.json`` or ``.param`` format. + +.. _question-49: + +Q49. What does the message "Cannot convert type of placeholder because not all of its outputs are 'Cast' to float operations" mean? +##################################################################################################################################################### + +**A:** There are models where ``Placeholder`` has the UINT8 type and the first operation after it is 'Cast', which casts the input to FP32. Model Optimizer detected that the ``Placeholder`` has the UINT8 type, but the next operation is not 'Cast' to float. Model Optimizer does not support such a case. Make sure you change the model to have ``Placeholder`` for FP32. + +.. _question-50: + +Q50. What does the message "Data type is unsupported" mean? +##################################################################################################################################################### + +**A:** Model Optimizer cannot read the value with the specified data type. Currently, the following types are supported: bool, float16, float32, double, int8, int16, int32, int64, uint8, uint16, uint32, uint64, str. + +.. _question-51: + +Q51. What does the message "No node with name ..." mean? +##################################################################################################################################################### + +**A:** Model Optimizer tried to access a node that does not exist. This could happen if you have incorrectly specified placeholder, input or output node name. + +.. _question-52: + +Q52. What does the message "Module MXNet was not found. Please install MXNet 1.0.0" mean? +##################################################################################################################################################### + +**A:** To convert MXNet models with Model Optimizer, Apache MXNet 1.0.0 must be installed. For more information about prerequisites, see the :doc:`Configuring Model Optimizer <../legacy-conversion-api>` guide. + +.. _question-53: + +Q53. What does the message "The following error happened while loading MXNet model .." mean? +##################################################################################################################################################### + +**A:** Most likely, there is a problem with loading of the MXNet model. Make sure the specified path is correct, the model exists and is not corrupted, and you have sufficient permissions to work with it. + +.. _question-54: + +Q54. What does the message "The following error happened while processing input shapes: .." mean? +##################################################################################################################################################### + +**A:** Make sure inputs are defined and have correct shapes. You can use ``--input_shape`` with positive integers to override model input shapes. + +.. _question-55: + +Q55. What does the message "Attempt to register of custom name for the second time as class. Note that custom names are case-insensitive" mean? +##################################################################################################################################################### + +**A:** When extending Model Optimizer with new primitives, keep in mind that their names are case-insensitive. Most likely, another operation with the same name is already defined. For more information, see the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-56: + +Q56. What does the message "Both --input_shape and --batch were provided. Please, provide only one of them" mean? +##################################################################################################################################################### + +**A:** Specifying the batch and the input shapes at the same time is not supported. You must specify a desired batch as the first value of the input shape. + +.. _question-57: + +Q57. What does the message "Input shape .. cannot be parsed" mean? +##################################################################################################################################################### + +**A:** The specified input shape cannot be parsed. Define it in one of the following ways: + +* + + .. code-block:: shell + + mo --input_model .caffemodel --input_shape (1,3,227,227) + +* + + .. code-block:: shell + + mo --input_model .caffemodel --input_shape [1,3,227,227] + +* In case of multi input topology you should also specify inputs: + + .. code-block:: shell + + mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),(1,6,1,1) + + +Keep in mind that there is no space between and inside the brackets for input shapes. + +.. _question-58: + +Q58. What does the message "Please provide input layer names for input layer shapes" mean? +##################################################################################################################################################### + +**A:** When specifying input shapes for several layers, you must provide names for inputs, whose shapes will be overwritten. Additional information for ``--input_shape`` is in FAQ :ref:`#56 `. + +.. _question-59: + +Q59. What does the message "Values cannot be parsed" mean? +##################################################################################################################################################### + +**A:** Mean values for the given parameter cannot be parsed. It should be a string with a list of mean values. For example, in '(1,2,3)', 1 stands for the RED channel, 2 for the GREEN channel, 3 for the BLUE channel. + +.. _question-60: + +Q60. What does the message ".. channels are expected for given values" mean? +##################################################################################################################################################### + +**A:** The number of channels and the number of given values for mean values do not match. The shape should be defined as '(R,G,B)' or '[R,G,B]'. The shape should not contain undefined dimensions (? or -1). The order of values is as follows: (value for a RED channel, value for a GREEN channel, value for a BLUE channel). + +.. _question-61: + +Q61. What does the message "You should specify input for each mean value" mean? +##################################################################################################################################################### + +**A:** Most likely, you didn't specify inputs using ``--mean_values``. Specify inputs with the ``--input`` flag. For usage examples, refer to the FAQ :ref:`#62 `. + +.. _question-62: + +Q62. What does the message "You should specify input for each scale value" mean? +##################################################################################################################################################### + +**A:** Most likely, you didn't specify inputs using ``--scale_values``. Specify inputs with the ``--input`` flag. For usage examples, refer to the FAQ :ref:`#63 `. + +.. _question-63: + +Q63. What does the message "Number of inputs and mean values does not match" mean? +##################################################################################################################################################### + +**A:** The number of specified mean values and the number of inputs must be equal. + +.. _question-64: + +Q64. What does the message "Number of inputs and scale values does not match" mean? +##################################################################################################################################################### + +**A:** The number of specified scale values and the number of inputs must be equal. + +.. _question-65: + +Q65. What does the message "No class registered for match kind ... Supported match kinds are .. " mean? +##################################################################################################################################################### + +**A:** A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the ``match_kind`` attribute. The attribute may have only one of the values: ``scope`` or ``points``. If a different value is provided, this error is displayed. + +.. _question-66: + +Q66. What does the message "No instance(s) is(are) defined for the custom replacement" mean? +##################################################################################################################################################### + +**A:** A replacement defined in the configuration file for sub-graph replacement, using node names patterns or start/end nodes, has the ``instances`` attribute. This attribute is mandatory. This error will occur if the attribute is missing. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` guide. + +.. _question-67: + +Q67. What does the message "The instance must be a single dictionary for the custom replacement with id .." mean? +##################################################################################################################################################### + +**A:** A replacement defined in the configuration file for sub-graph replacement, using start/end nodes, has the ``instances`` attribute. For this type of replacement, the instance must be defined with a dictionary with two keys ``start_points`` and ``end_points``. Values for these keys are lists with the start and end node names, respectively. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions>` guide. + +.. _question-68: + +Q68. What does the message "No instances are defined for replacement with id .. " mean? +##################################################################################################################################################### + +**A:** A replacement for the specified id is not defined in the configuration file. For more information, refer to the FAQ :ref:`#65 `. + +.. _question-69: + +Q69. What does the message "Custom replacements configuration file .. does not exist" mean? +##################################################################################################################################################### + +**A:** The path to a custom replacement configuration file was provided with the ``--transformations_config`` flag, but it cannot be found. Make sure the specified path is correct and the file exists. + +.. _question-70: + +Q70. What does the message "Failed to parse custom replacements configuration file .." mean? +##################################################################################################################################################### + +**A:** The file for custom replacement configuration provided with the ``--transformations_config`` flag cannot be parsed. In particular, it should have a valid JSON structure. For more details, refer to the `JSON Schema Reference `__ page. + +.. _question-71: + +Q71. What does the message "One of the custom replacements in the configuration file .. does not contain attribute 'id'" mean? +##################################################################################################################################################### + +**A:** Every custom replacement should declare a set of mandatory attributes and their values. For more details, refer to FAQ :ref:`#71 `. + +.. _question-72: + +Q72. What does the message "File .. validation failed" mean? +##################################################################################################################################################### + +**A:** The file for custom replacement configuration provided with the ``--transformations_config`` flag cannot pass validation. Make sure you have specified ``id``, ``instances``, and ``match_kind`` for all the patterns. + +.. _question-73: + +Q73. What does the message "Cannot update the file .. because it is broken" mean? +##################################################################################################################################################### + +**A:** The custom replacement configuration file provided with the ``--tensorflow_custom_operations_config_update`` cannot be parsed. Make sure that the file is correct and refer to FAQ :ref:`#68 `, :ref:`#69 `, :ref:`#70 `, and :ref:`#71 `. + +.. _question-74: + +Q74. What does the message "End node .. is not reachable from start nodes: .." mean? +##################################################################################################################################################### + +**A:** This error occurs when you try to make a sub-graph match. It is detected that between the start and end nodes that were specified as inputs/outputs for the subgraph to find, there are nodes marked as outputs but there is no path from them to the input nodes. Make sure the subgraph you want to match does actually contain all the specified output nodes. + +.. _question-75: + +Q75. What does the message "Sub-graph contains network input node .." mean? +##################################################################################################################################################### + +**A:** The start or end node for the sub-graph replacement using start/end nodes is specified incorrectly. Model Optimizer finds internal nodes of the sub-graph strictly "between" the start and end nodes, and then adds all input nodes to the sub-graph (and the inputs of their inputs, etc.) for these "internal" nodes. This error reports that Model Optimizer reached input node during this phase. This means that the start/end points are specified incorrectly in the configuration file. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions>` guide. + +.. _question-76: + +Q76. What does the message "... elements of ... were clipped to infinity while converting a blob for node [...] to ..." mean? +##################################################################################################################################################### + +**A:** This message may appear when the ``--compress_to_fp16`` command-line option is used. This option implies compression of all the model weights, biases, and other constant values to FP16. If a value of a constant is out of the range of valid FP16 values, the value is converted to positive or negative infinity. It may lead to incorrect results of inference or may not be a problem, depending on the model. The number of such elements and the total number of elements in the constant value is printed out together with the name of the node, where this value is used. + +.. _question-77: + +Q77. What does the message "... elements of ... were clipped to zero while converting a blob for node [...] to ..." mean? +##################################################################################################################################################### + +**A:** This message may appear when the ``--compress_to_fp16`` command-line option is used. This option implies conversion of all blobs in the mode to FP16. If a value in the blob is so close to zero that it cannot be represented as a valid FP16 value, it is converted to a true zero FP16 value. Depending on the model, it may lead to incorrect results of inference or may not be a problem. The number of such elements and the total number of elements in the blob are printed out together with a name of the node, where this blob is used. + +.. _question-78: + +Q78. What does the message "The amount of nodes matched pattern ... is not equal to 1" mean? +##################################################################################################################################################### + +**A:** This error occurs when the ``SubgraphMatch.node_by_pattern`` function is used with a pattern that does not uniquely identify a single node in a sub-graph. Try to extend the pattern string to make unambiguous match to a single sub-graph node. For more details, refer to the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions>` guide. + +.. _question-79: + +Q79. What does the message "The topology contains no "input" layers" mean? +##################################################################################################################################################### + +**A:** Your Caffe topology ``.prototxt`` file is intended for training. Model Optimizer expects a deployment-ready ``.prototxt`` file. To fix the problem, prepare a deployment-ready ``.prototxt`` file. Preparation of a deploy-ready topology usually results in removing ``data`` layer(s), adding ``input`` layer(s), and removing loss layer(s). + +.. _question-80: + +Q80. What does the message "Warning: please expect that Model Optimizer conversion might be slow" mean? +##################################################################################################################################################### + +**A:** You are using an unsupported Python version. Use only versions 3.4 - 3.6 for the C++ ``protobuf`` implementation that is supplied with OpenVINO toolkit. You can still boost the conversion speed by building the protobuf library from sources. For complete instructions about building ``protobuf`` from sources, see the appropriate section in the :doc:`Converting a Model to Intermediate Representation <../legacy-conversion-api>` guide. + +.. _question-81: + +Q81. What does the message "Arguments --nd_prefix_name, --pretrained_model_name and --input_symbol should be provided. Please provide all or do not use any." mean? +#################################################################################################################################################################### + +**A:** This error occurs if you did not provide the ``--nd_prefix_name``, ``--pretrained_model_name``, and ``--input_symbol`` parameters. +Model Optimizer requires both ``.params`` and ``.nd`` model files to merge into the result file (``.params``). +Topology description (``.json`` file) should be prepared (merged) in advance and provided with the ``--input_symbol`` parameter. + +If you add additional layers and weights that are in ``.nd`` files to your model, Model Optimizer can build a model +from one ``.params`` file and two additional ``.nd`` files (``*_args.nd``, ``*_auxs.nd``). +To do that, provide both CLI options or do not pass them if you want to convert an MXNet model without additional weights. + +.. _question-82: + +Q82. What does the message "You should specify input for mean/scale values" mean? +##################################################################################################################################################### + +**A:** When the model has multiple inputs and you want to provide mean/scale values, you need to pass those values for each input. More specifically, the number of passed values should be the same as the number of inputs of the model. +For more information, refer to the :doc:`Converting a Model to Intermediate Representation <[legacy]-setting-input-shapes>` guide. + +.. _question-83: + +Q83. What does the message "Input with name ... not found!" mean? +##################################################################################################################################################### + +**A:** When you passed the mean/scale values and specify names of input layers of the model, you might have used the name that does not correspond to any input layer. Make sure that you list only names of the input layers of your model when passing values with the ``--input`` option. +For more information, refer to the :doc:`Converting a Model to Intermediate Representation <[legacy]-setting-input-shapes>` guide. + +.. _question-84: + +Q84. What does the message "Specified input json ... does not exist" mean? +##################################################################################################################################################### + +**A:** Most likely, ``.json`` file does not exist or has a name that does not match the notation of Apache MXNet. Make sure the file exists and has a correct name. + +.. _question-85: + +Q85. What does the message "Unsupported Input model file type ... Model Optimizer support only .params and .nd files format" mean? +##################################################################################################################################################### + +**A:** Model Optimizer for Apache MXNet supports only ``.params`` and ``.nd`` files formats. Most likely, you specified an unsupported file format in ``--input_model``. + +.. _question-86: + +Q86. What does the message "Operation ... not supported. Please register it as custom op" mean? +##################################################################################################################################################### + +**A:** Model Optimizer tried to load the model that contains some unsupported operations. +If you want to convert model that contains unsupported operations, you need to prepare extension for all such operations. +For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-87: + +Q87. What does the message "Can not register Op ... Please, call function 'register_caffe_python_extractor' with parameter 'name'" mean? +##################################################################################################################################################### + +**A:** This error appears if the class of implementation of ``Op`` for Python Caffe layer could not be used by Model Optimizer. Python layers should be handled differently comparing to ordinary Caffe layers. + +In particular, you need to call the function ``register_caffe_python_extractor`` and pass ``name`` as the second argument of the function. +The name should be the compilation of the layer name with the module name separated by a dot. + +For example, your topology contains this layer with type ``Python``: + +.. code-block:: py + :force: + + layer { + name: 'proposal' + type: 'Python' + ... + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } + } + + +The first step is to implement an extension for this layer in Model Optimizer as an ancestor of ``Op`` class: + +.. code-block:: py + :force: + + class ProposalPythonExampleOp(Op): + op = 'Proposal' + + def __init__(self, graph: nx.MultiDiGraph, attrs: dict): + ... + + +It is mandatory to call two functions right after the implementation of that class: + +.. code-block:: py + :force: + + class ProposalPythonExampleOp(Op): + ... + + register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer') + Op.excluded_classes.append(ProposalPythonExampleOp) + + +Note that the first call ``register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')`` registers an extension of the layer in Model Optimizer, which will be found by the specific name (mandatory to join module name and layer name): ``rpn.proposal_layer.ProposalLayer``. + +The second call prevents Model Optimizer from using this extension as if it is an extension for +a layer with type ``Proposal``. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues. +For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-88: + +Q88. What does the message "Model Optimizer is unable to calculate output shape of Memory node .." mean? +##################################################################################################################################################### + +**A:** Model Optimizer supports only ``Memory`` layers, in which ``input_memory`` goes before ``ScaleShift`` or the ``FullyConnected`` layer. +This error message means that in your model the layer after input memory is not of the ``ScaleShift`` or ``FullyConnected`` type. +This is a known limitation. + +.. _question-89: + +Q89. What do the messages "File ... does not appear to be a Kaldi file (magic number does not match)", "Kaldi model should start with tag" mean? +######################################################################################################################################################### + +**A:** These error messages mean that Model Optimizer does not support your Kaldi model, because the ``checksum`` of the model is not +16896 (the model should start with this number), or the model file does not contain the ```` tag as a starting one. +Make sure that you provide a path to a true Kaldi model and try again. + +.. _question-90: + +Q90. What do the messages "Expect counts file to be one-line file." or "Expect counts file to contain list of integers" mean? +##################################################################################################################################################### + +**A:** These messages mean that the file counts you passed contain not one line. The count file should start with +``[`` and end with ``]``, and integer values should be separated by spaces between those brackets. + +.. _question-91: + +Q91. What does the message "Model Optimizer is not able to read Kaldi model .." mean? +##################################################################################################################################################### + +**A:** There are multiple reasons why Model Optimizer does not accept a Kaldi topology, including: +the file is not available or does not exist. Refer to FAQ :ref:`#88 `. + +.. _question-92: + +Q92. What does the message "Model Optimizer is not able to read counts file .." mean? +##################################################################################################################################################### + +**A:** There are multiple reasons why Model Optimizer does not accept a counts file, including: +the file is not available or does not exist. Refer to FAQ :ref:`#89 `. + +.. _question-93: + +Q93. What does the message "For legacy MXNet models Model Optimizer does not support conversion of old MXNet models (trained with 1.0.0 version of MXNet and lower) with custom layers." mean? +############################################################################################################################################################################################### + +**A:** This message means that if you have a model with custom layers and its JSON file has been generated with Apache MXNet version +lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it, you have to rebuild +MXNet with unsupported layers or generate a new JSON file with Apache MXNet version 1.0.0 or higher. You also need to implement +OpenVINO extension to use custom layers. +For more information, refer to the :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` guide. + +.. _question-94: + +Q94. What does the message "Expected token ````, has ``...``" mean? +##################################################################################################################################################### + +**A:** This error messages mean that Model Optimizer does not support your Kaldi model, because the Net contains ``ParallelComponent`` that does not end with the ```` tag. +Make sure that you provide a path to a true Kaldi model and try again. + +.. _question-95: + +.. _question-96: + +.. _question-97: + +Q97. What does the message "Graph contains a cycle. Can not proceed .." mean? +##################################################################################################################################################### + +**A:** Model Optimizer supports only straightforward models without cycles. + +There are multiple ways to avoid cycles: + +For Tensorflow: + +* :doc:`Convert models, created with TensorFlow Object Detection API <[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection>` + +For all frameworks: + +1. :doc:`Replace cycle containing Sub-graph in Model Optimizer [Legacy Solution] <../legacy-model-optimizer-extensibility>` +2. See :doc:`OpenVINO Extensibility Mechanism <../../../openvino-extensibility>` + +or + +* Edit the model in its original framework to exclude cycle. + +.. _question-98: + +.. _question-99: + +.. _question-100: + +Q100. What does the message "Interp layer shape inference function may be wrong, please, try to update layer shape inference function in the file (extensions/ops/interp.op at the line ...)." mean? +#################################################################################################################################################################################################### + +**A:** There are many flavors of Caffe framework, and most layers in them are implemented identically. +However, there are exceptions. For example, the output value of layer Interp is calculated differently in Deeplab-Caffe and classic Caffe. Therefore, if your model contains layer Interp and the conversion of your model has failed, modify the ``interp_infer`` function in the ``extensions/ops/interp.op`` file according to the comments in the file. + +.. _question-101: + +Q101. What does the message "Mean/scale values should ..." mean? +##################################################################################################################################################### + +**A:** It means that your mean/scale values have a wrong format. Specify mean/scale values in the form of ``layer_name(val1,val2,val3)``. +You need to specify values for each input of the model. For more information, refer to the :doc:`Converting a Model to Intermediate Representation <[legacy]-setting-input-shapes>` guide. + +.. _question-102: + +Q102. What does the message "Operation _contrib_box_nms is not supported ..." mean? +##################################################################################################################################################### + +**A:** It means that you are trying to convert a topology contains the ``_contrib_box_nms`` operation which is not supported directly. However, the sub-graph of operations including ``_contrib_box_nms`` could be replaced with the DetectionOutput layer if your topology is one of the ``gluoncv`` topologies. Specify the ``--enable_ssd_gluoncv`` command-line parameter for Model Optimizer to enable this transformation. + +.. _question-103: + +Q103. What does the message "ModelOptimizer is not able to parse "\*.caffemodel" mean? +##################################################################################################################################################### + +**A:** If a ``*.caffemodel`` file exists and is correct, the error occurred possibly because of the use of Python protobuf implementation. In some cases, error messages may appear during model parsing, for example: "``utf-8`` codec can't decode byte 0xe0 in position 4: invalid continuation byte in field: mo_caffe.SpatialTransformerParameter.transform_type". You can either use a newer Python version (3.8 - 3.11) or build the ``cpp`` implementation of ``protobuf`` yourself for your version of Python. For the complete instructions about building ``protobuf`` from sources, see the appropriate section in the :doc:`Converting Models with Model Optimizer <../legacy-conversion-api>` guide. + +.. _question-104: + +.. _question-105: + +Q105. What does the message "The IR preparation was executed by the legacy MO path. ..." mean? +##################################################################################################################################################### + +**A:** For the models in ONNX format, there are two available paths of IR conversion. +The old one is handled by the old Python implementation, while the new one uses new C++ frontends. +Starting from the 2022.1 version, the default IR conversion path for ONNX models is processed using the new ONNX frontend. +Certain features, such as ``--extensions`` and ``--transformations_config``, are not yet fully supported on the new frontends. +The new frontends support only paths to shared libraries (.dll and .so) for ``--extensions``. They support JSON configurations with defined library fields for ``--transformations_config``. +Inputs freezing (enabled by ``--freeze_placeholder_with_value`` or ``--input`` arguments) is not supported by the new frontends. +The IR conversion falls back to the old path if a user does not select any expected path of conversion explicitly (with ``--use_new_frontend`` or ``--use_legacy_frontend`` MO arguments) and unsupported pre-defined scenario is detected on the new frontend path. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes.rst new file mode 100644 index 00000000000000..9e445742278568 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes.rst @@ -0,0 +1,156 @@ +[LEGACY] Setting Input Shapes +==================================== + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Setting Input Shapes <../../../../openvino-workflow/model-preparation/setting-input-shapes>` article. + +With model conversion API you can increase your model's efficiency by providing an additional shape definition, with these two parameters: `input_shape` and `static_shape`. + + +.. meta:: + :description: Learn how to increase the efficiency of a model with MO by providing an additional shape definition with the input_shape and static_shape parameters. + + +Specifying input_shape parameter +################################ + +``convert_model()`` supports conversion of models with dynamic input shapes that contain undefined dimensions. +However, if the shape of data is not going to change from one inference request to another, +it is recommended to set up static shapes (when all dimensions are fully defined) for the inputs. +Doing it at this stage, instead of during inference in runtime, can be beneficial in terms of performance and memory consumption. +To set up static shapes, model conversion API provides the ``input_shape`` parameter. +For more information on input shapes under runtime, refer to the :doc:`Changing input shapes <../../../../openvino-workflow/running-inference/changing-input-shape>` guide. +To learn more about dynamic shapes in runtime, refer to the :doc:`Dynamic Shapes <../../../../openvino-workflow/running-inference/dynamic-shapes>` guide. + +The OpenVINO Runtime API may present certain limitations in inferring models with undefined dimensions on some hardware. +In this case, the ``input_shape`` parameter and the :doc:`reshape method <../../../../openvino-workflow/running-inference/changing-input-shape>` can help to resolve undefined dimensions. + +For example, run model conversion for the TensorFlow MobileNet model with the single input +and specify the input shape of ``[2,300,300,3]``: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("MobileNet.pb", input_shape=[2,300,300,3]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model MobileNet.pb --input_shape [2,300,300,3] + + +If a model has multiple inputs, ``input_shape`` must be used in conjunction with ``input`` parameter. +The ``input`` parameter contains a list of input names, for which shapes in the same order are defined via ``input_shape``. +For example, launch model conversion for the ONNX OCR model with a pair of inputs ``data`` and ``seq_len`` +and specify shapes ``[3,150,200,1]`` and ``[3]`` for them: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("ocr.onnx", input=["data","seq_len"], input_shape=[[3,150,200,1],[3]]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model ocr.onnx --input data,seq_len --input_shape [3,150,200,1],[3] + + +Alternatively, specify input shapes, using the ``input`` parameter as follows: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("ocr.onnx", input=[("data",[3,150,200,1]),("seq_len",[3])]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model ocr.onnx --input data[3,150,200,1],seq_len[3] + + +The ``input_shape`` parameter allows overriding original input shapes to ones compatible with a given model. +Dynamic shapes, i.e. with dynamic dimensions, can be replaced in the original model with static shapes for the converted model, and vice versa. +The dynamic dimension can be marked in model conversion API parameter as ``-1`` or ``?``. +For example, launch model conversion for the ONNX OCR model and specify dynamic batch dimension for inputs: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + ov_model = convert_model("ocr.onnx", input=["data","seq_len"], input_shape=[[-1,150,200,1],[-1]] + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model ocr.onnx --input data,seq_len --input_shape [-1,150,200,1],[-1] + + +To optimize memory consumption for models with undefined dimensions in run-time, model conversion API provides the capability to define boundaries of dimensions. +The boundaries of undefined dimension can be specified with ellipsis. +For example, launch model conversion for the ONNX OCR model and specify a boundary for the batch dimension: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + from openvino.tools.mo import convert_model + from openvino.runtime import Dimension + ov_model = convert_model("ocr.onnx", input=["data","seq_len"], input_shape=[[Dimension(1,3),150,200,1],[Dimension(1,3)]] + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + mo --input_model ocr.onnx --input data,seq_len --input_shape [1..3,150,200,1],[1..3] + + +Practically, some models are not ready for input shapes change. +In this case, a new input shape cannot be set via model conversion API. +For more information about shape follow the :doc:`inference troubleshooting <[legacy]-troubleshooting-reshape-errors>` +and :ref:`ways to relax shape inference flow ` guides. + +Additional Resources +#################### + +* :doc:`Convert a Model <../legacy-conversion-api>` +* :doc:`Cutting Off Parts of a Model <[legacy]-cutting-parts-of-a-model>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst new file mode 100644 index 00000000000000..fb9f41c755d4fb --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst @@ -0,0 +1,598 @@ +[LEGACY] Supported Model Formats +===================================== + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Supported Model Formats <../../../../openvino-workflow/model-preparation>` article. + +.. toctree:: + :maxdepth: 1 + :hidden: + + Converting a TensorFlow Model <[legacy]-supported-model-formats/[legacy]-convert-tensorflow> + Converting an ONNX Model <[legacy]-supported-model-formats/[legacy]-convert-onnx> + Converting a PyTorch Model <[legacy]-supported-model-formats/[legacy]-convert-pytorch> + Converting a TensorFlow Lite Model <[legacy]-supported-model-formats/[legacy]-convert-tensorflow-lite> + Converting a PaddlePaddle Model <[legacy]-supported-model-formats/[legacy]-convert-paddle> + Model Conversion Tutorials <[legacy]-supported-model-formats/[legacy]-conversion-tutorials> + +.. meta:: + :description: Learn about supported model formats and the methods used to convert, read, and compile them in OpenVINO™. + + +**OpenVINO IR (Intermediate Representation)** - the proprietary and default format of OpenVINO, benefiting from the full extent of its features. All other supported model formats, as listed below, are converted to :doc:`OpenVINO IR <../../../openvino-ir-format>` to enable inference. Consider storing your model in this format to minimize first-inference latency, perform model optimization, and, in some cases, save space on your drive. + +**PyTorch, TensorFlow, ONNX, and PaddlePaddle** - can be used with OpenVINO Runtime API directly, +which means you do not need to save them as OpenVINO IR before including them in your application. +OpenVINO can read, compile, and convert them automatically, as part of its pipeline. + +In the Python API, these options are provided as three separate methods: +``read_model()``, ``compile_model()``, and ``convert_model()``. +The ``convert_model()`` method enables you to perform additional adjustments +to the model, such as setting shapes, changing model input types or layouts, +cutting parts of the model, freezing inputs, etc. For a detailed description +of the conversion process, see the +:doc:`model conversion guide <../legacy-conversion-api>`. + +Here are code examples of how to use these methods with different model formats: + +.. tab-set:: + + .. tab-item:: PyTorch + :sync: torch + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + * The ``convert_model()`` method: + + This is the only method applicable to PyTorch models. + + .. dropdown:: List of supported formats: + + * **Python objects**: + + * ``torch.nn.Module`` + * ``torch.jit.ScriptModule`` + * ``torch.jit.ScriptFunction`` + + .. code-block:: py + :force: + + import openvino + import torchvision + from openvino.tools.mo import convert_model + core = openvino.Core() + + model = torchvision.models.resnet50(weights='DEFAULT') + ov_model = convert_model(model) + compiled_model = core.compile_model(ov_model, "AUTO") + + For more details on conversion, refer to the + :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-pytorch>` + and an example `tutorial `__ + on this topic. + + .. tab-item:: TensorFlow + :sync: tf + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + * The ``convert_model()`` method: + + When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + + .. dropdown:: List of supported formats: + + * **Files**: + + * SavedModel - ```` or ``.pb`` + * Checkpoint - ``.pb`` or ``.pbtxt`` + * MetaGraph - ``.meta`` + + * **Python objects**: + + * ``tf.keras.Model`` + * ``tf.keras.layers.Layer`` + * ``tf.Module`` + * ``tf.compat.v1.Graph`` + * ``tf.compat.v1.GraphDef`` + * ``tf.function`` + * ``tf.compat.v1.session`` + * ``tf.train.checkpoint`` + + .. code-block:: py + :force: + + import openvino + from openvino.tools.mo import convert_model + + core = openvino.Core() + ov_model = convert_model("saved_model.pb") + compiled_model = core.compile_model(ov_model, "AUTO") + + For more details on conversion, refer to the + :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>` + and an example `tutorial `__ + on this topic. + + * The ``read_model()`` and ``compile_model()`` methods: + + .. dropdown:: List of supported formats: + + * **Files**: + + * SavedModel - ```` or ``.pb`` + * Checkpoint - ``.pb`` or ``.pbtxt`` + * MetaGraph - ``.meta`` + + .. code-block:: py + :force: + + ov_model = read_model("saved_model.pb") + compiled_model = core.compile_model(ov_model, "AUTO") + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C++ + :sync: cpp + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * SavedModel - ```` or ``.pb`` + * Checkpoint - ``.pb`` or ``.pbtxt`` + * MetaGraph - ``.meta`` + + .. code-block:: cpp + + ov::CompiledModel compiled_model = core.compile_model("saved_model.pb", "AUTO"); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C + :sync: c + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * SavedModel - ```` or ``.pb`` + * Checkpoint - ``.pb`` or ``.pbtxt`` + * MetaGraph - ``.meta`` + + .. code-block:: c + + ov_compiled_model_t* compiled_model = NULL; + ov_core_compile_model_from_file(core, "saved_model.pb", "AUTO", 0, &compiled_model); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: CLI + :sync: cli + + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + + .. code-block:: sh + + mo --input_model .pb + + For details on the conversion, refer to the + :doc:`article <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>`. + + .. tab-item:: TensorFlow Lite + :sync: tflite + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + * The ``convert_model()`` method: + + When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: py + :force: + + import openvino + from openvino.tools.mo import convert_model + + core = openvino.Core() + ov_model = convert_model(".tflite") + compiled_model = core.compile_model(ov_model, "AUTO") + + For more details on conversion, refer to the + :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>` + and an example `tutorial `__ + on this topic. + + + * The ``read_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: py + :force: + + import openvino + + core = openvino.Core() + ov_model = core.read_model(".tflite") + compiled_model = core.compile_model(ov_model, "AUTO") + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: py + :force: + + import openvino + + core = openvino.Core() + compiled_model = core.compile_model(".tflite", "AUTO") + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + + .. tab-item:: C++ + :sync: cpp + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: cpp + + ov::CompiledModel compiled_model = core.compile_model(".tflite", "AUTO"); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C + :sync: c + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: c + + ov_compiled_model_t* compiled_model = NULL; + ov_core_compile_model_from_file(core, ".tflite", "AUTO", 0, &compiled_model); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: CLI + :sync: cli + + * The ``convert_model()`` method: + + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.tflite`` + + .. code-block:: sh + + mo --input_model .tflite + + For details on the conversion, refer to the + :doc:`article <[legacy]-supported-model-formats/[legacy]-convert-tensorflow-lite>`. + + .. tab-item:: ONNX + :sync: onnx + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + * The ``convert_model()`` method: + + When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: py + :force: + + import openvino + from openvino.tools.mo import convert_model + + core = openvino.Core() + ov_model = convert_model(".onnx") + compiled_model = core.compile_model(ov_model, "AUTO") + + For more details on conversion, refer to the + :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-onnx>` + and an example `tutorial `__ + on this topic. + + + * The ``read_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: py + :force: + + import openvino + core = openvino.Core() + + ov_model = core.read_model(".onnx") + compiled_model = core.compile_model(ov_model, "AUTO") + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: py + :force: + + import openvino + core = openvino.Core() + + compiled_model = core.compile_model(".onnx", "AUTO") + + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + + .. tab-item:: C++ + :sync: cpp + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: cpp + + ov::CompiledModel compiled_model = core.compile_model(".onnx", "AUTO"); + + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C + :sync: c + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: c + + ov_compiled_model_t* compiled_model = NULL; + ov_core_compile_model_from_file(core, ".onnx", "AUTO", 0, &compiled_model); + + For details on the conversion, refer to the :doc:`article <[legacy]-supported-model-formats/[legacy]-convert-onnx>` + + .. tab-item:: CLI + :sync: cli + + * The ``convert_model()`` method: + + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.onnx`` + + .. code-block:: sh + + mo --input_model .onnx + + For details on the conversion, refer to the + :doc:`article <[legacy]-supported-model-formats/[legacy]-convert-onnx>` + + .. tab-item:: PaddlePaddle + :sync: pdpd + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + * The ``convert_model()`` method: + + When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + * **Python objects**: + + * ``paddle.hapi.model.Model`` + * ``paddle.fluid.dygraph.layers.Layer`` + * ``paddle.fluid.executor.Executor`` + + .. code-block:: py + :force: + + import openvino + from openvino.tools.mo import convert_model + + core = openvino.Core() + ov_model = convert_model(".pdmodel") + compiled_model = core.compile_model(ov_model, "AUTO") + + For more details on conversion, refer to the + :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-paddle>` + and an example `tutorial `__ + on this topic. + + * The ``read_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + .. code-block:: py + :force: + + import openvino + core = openvino.Core() + + ov_model = read_model(".pdmodel") + compiled_model = core.compile_model(ov_model, "AUTO") + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + .. code-block:: py + :force: + + import openvino + core = openvino.Core() + + compiled_model = core.compile_model(".pdmodel", "AUTO") + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C++ + :sync: cpp + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + .. code-block:: cpp + + ov::CompiledModel compiled_model = core.compile_model(".pdmodel", "AUTO"); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: C + :sync: c + + * The ``compile_model()`` method: + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + .. code-block:: c + + ov_compiled_model_t* compiled_model = NULL; + ov_core_compile_model_from_file(core, ".pdmodel", "AUTO", 0, &compiled_model); + + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application <../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>`. + + .. tab-item:: CLI + :sync: cli + + * The ``convert_model()`` method: + + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + + .. dropdown:: List of supported formats: + + * **Files**: + + * ``.pdmodel`` + + .. code-block:: sh + + mo --input_model .pdmodel + + For details on the conversion, refer to the + :doc:`article <[legacy]-supported-model-formats/[legacy]-convert-paddle>`. + + +As OpenVINO support for **MXNet, Caffe, and Kaldi formats** has been **discontinued**, converting these legacy formats +to OpenVINO IR or ONNX before running inference should be considered the default path for use with OpenVINO. + +.. note:: + + If you want to keep working with the legacy formats the old way, refer to a previous + `OpenVINO LTS version and its documentation `__ . + + OpenVINO versions of 2023 are mostly compatible with the old instructions, + through a deprecated MO tool, installed with the deprecated OpenVINO Developer Tools package. + + `OpenVINO 2023.0 `__ is the last + release officially supporting the MO conversion process for the legacy formats. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials.rst new file mode 100644 index 00000000000000..5fbe486a20960a --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials.rst @@ -0,0 +1,59 @@ +[LEGACY] Model Conversion Tutorials +==================================================== + + +.. toctree:: + :maxdepth: 1 + :hidden: + + [legacy]-conversion-tutorials/convert-tensorflow-attention-ocr + [legacy]-conversion-tutorials/convert-tensorflow-bert + [legacy]-conversion-tutorials/convert-tensorflow-crnn + [legacy]-conversion-tutorials/convert-tensorflow-deep-speech + [legacy]-conversion-tutorials/convert-tensorflow-efficient-det + [legacy]-conversion-tutorials/convert-tensorflow-face-net + [legacy]-conversion-tutorials/convert-tensorflow-gnmt + [legacy]-conversion-tutorials/convert-tensorflow-language-1b + [legacy]-conversion-tutorials/convert-tensorflow-ncf + [legacy]-conversion-tutorials/convert-tensorflow-object-detection + [legacy]-conversion-tutorials/convert-tensorflow-retina-net + [legacy]-conversion-tutorials/convert-tensorflow-slim-library + [legacy]-conversion-tutorials/convert-tensorflow-wide-and-deep-family + [legacy]-conversion-tutorials/convert-tensorflow-xlnet + [legacy]-conversion-tutorials/convert-tensorflow-yolo + [legacy]-conversion-tutorials/convert-onnx-faster-r-cnn + [legacy]-conversion-tutorials/convert-onnx-gpt-2 + [legacy]-conversion-tutorials/convert-onnx-mask-r-cnn + [legacy]-conversion-tutorials/convert-pytorch-bert-ner + [legacy]-conversion-tutorials/convert-pytorch-cascade-rcnn-r-101 + [legacy]-conversion-tutorials/convert-pytorch-f3-net + [legacy]-conversion-tutorials/convert-pytorch-quartz-net + [legacy]-conversion-tutorials/convert-pytorch-rcan + [legacy]-conversion-tutorials/convert-pytorch-rnn-t + [legacy]-conversion-tutorials/convert-pytorch-yolact + + +.. meta:: + :description: Get to know conversion methods for specific TensorFlow, ONNX, and PyTorch models. + + +.. danger:: + + The code described in the tutorials has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../learn-openvino/interactive-tutorials-python>`. + +This section provides a set of tutorials that demonstrate conversion methods for specific +TensorFlow, ONNX, and PyTorch models. Note that these instructions do not cover all use +cases and may not reflect your particular needs. +Before studying the tutorials, try to convert the model out-of-the-box by specifying only the +``--input_model`` parameter in the command line. + +.. note:: + + Apache MXNet, Caffe, and Kaldi are no longer directly supported by OpenVINO. + +You will find a collection of :doc:`Python tutorials <../../../../../learn-openvino/interactive-tutorials-python>` written for running on Jupyter notebooks +that provide an introduction to the OpenVINO™ toolkit and explain how to use the Python API and tools for +optimized deep learning inference. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst new file mode 100644 index 00000000000000..7880b261c80b81 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst @@ -0,0 +1,41 @@ +Converting an ONNX Faster R-CNN Model +===================================== + + +.. meta:: + :description: Learn how to convert a Faster R-CNN model + from ONNX to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model `__: + +1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). + +2. Generate the Intermediate Representation of the model, by changing your current working directory to the model conversion API installation directory, and running model conversion with the following parameters: + + .. code-block:: sh + + mo \ + --input_model FasterRCNN-10.onnx \ + --input_shape [1,3,800,800] \ + --input 0:2 \ + --mean_values [102.9801,115.9465,122.7717] \ + --transformations_config front/onnx/faster_rcnn.json + + + Be aware that the height and width specified with the ``input_shape`` command line parameter + could be different. For more information about supported input image dimensions and + required pre- and post-processing steps, refer to the + `Faster R-CNN article `__. + +3. Interpret the outputs of the generated IR: class indices, probabilities and box coordinates. Below are the outputs from the ``DetectionOutput`` layer: + + * class indices + * probabilities + * box coordinates + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst new file mode 100644 index 00000000000000..4c10c941c7fb47 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst @@ -0,0 +1,34 @@ +Converting an ONNX GPT-2 Model +============================== + + +.. meta:: + :description: Learn how to convert a pre-trained GPT-2 + model from ONNX to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`Public pre-trained GPT-2 model `__ is a large +transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text. + +Downloading the Pre-Trained Base GPT-2 Model +############################################ + +To download the model, go to `this model `__, and press **Download**. + +To download the model and sample test data, go to `this model `__, and press **Download**. + +Converting an ONNX GPT-2 Model to IR +#################################### + +Generate the Intermediate Representation of the model GPT-2 by running model conversion with the following parameters: + +.. code-block:: sh + + mo --input_model gpt2-10.onnx --input_shape [X,Y,Z] --output_dir + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-mask-r-cnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-mask-r-cnn.rst new file mode 100644 index 00000000000000..6158f5bdcb59ed --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-mask-r-cnn.rst @@ -0,0 +1,41 @@ +Converting an ONNX Mask R-CNN Model +=================================== + + +.. meta:: + :description: Learn how to convert a pre-trained Mask + R-CNN model from ONNX to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +The instructions below are applicable **only** to the Mask R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model `__. + +1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). + +2. Generate the Intermediate Representation of the model by changing your current working directory to the model conversion API installation directory and running model conversion with the following parameters: + + .. code-block:: sh + + mo \ + --input_model mask_rcnn_R_50_FPN_1x.onnx \ + --input "0:2" \ + --input_shape [1,3,800,800] \ + --mean_values [102.9801,115.9465,122.7717] \ + --transformations_config front/onnx/mask_rcnn.json + + + Be aware that the height and width specified with the ``input_shape`` command line parameter could be different. For more information about supported input image dimensions and required pre- and post-processing steps, refer to the `documentation `__. + +3. Interpret the outputs of the generated IR file: masks, class indices, probabilities and box coordinates: + + * masks + * class indices + * probabilities + * box coordinates + +The first one is a layer with the name ``6849/sink_port_0``, and rest are outputs from the ``DetectionOutput`` layer. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-bert-ner.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-bert-ner.rst new file mode 100644 index 00000000000000..e89d21f28c66c4 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-bert-ner.rst @@ -0,0 +1,76 @@ +Converting a PyTorch BERT-NER Model +=================================== + + +.. meta:: + :description: Learn how to convert a BERT-NER model + from PyTorch to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +The goal of this article is to present a step-by-step guide on how to convert PyTorch BERT-NER model to OpenVINO IR. First, you need to download the model and convert it to ONNX. + + +Downloading and Converting the Model to ONNX +############################################ + +To download a pretrained model or train the model yourself, refer +to the `instructions `__ in the +BERT-NER model repository. The model with configuration files is stored in the ``out_base`` directory. + +To convert the model to ONNX format, create and run the following script in the root +directory of the model repository. If you download the pretrained model, you need +to download `bert.py `__ to run the script. +The instructions were tested with the commit-SHA: ``e5be564156f194f1becb0d82aeaf6e762d9eb9ed``. + +.. code-block:: py + :force: + + import torch + + from bert import Ner + + ner = Ner("out_base") + + input_ids, input_mask, segment_ids, valid_positions = ner.preprocess('Steve went to Paris') + input_ids = torch.tensor([input_ids], dtype=torch.long, device=ner.device) + input_mask = torch.tensor([input_mask], dtype=torch.long, device=ner.device) + segment_ids = torch.tensor([segment_ids], dtype=torch.long, device=ner.device) + valid_ids = torch.tensor([valid_positions], dtype=torch.long, device=ner.device) + + ner_model, tknizr, model_config = ner.load_model("out_base") + + with torch.no_grad(): + logits = ner_model(input_ids, segment_ids, input_mask, valid_ids) + torch.onnx.export(ner_model, + (input_ids, segment_ids, input_mask, valid_ids), + "bert-ner.onnx", + input_names=['input_ids', 'segment_ids', 'input_mask', 'valid_ids'], + output_names=['output'], + dynamic_axes={ + "input_ids": {0: "batch_size"}, + "segment_ids": {0: "batch_size"}, + "input_mask": {0: "batch_size"}, + "valid_ids": {0: "batch_size"}, + "output": {0: "output"} + }, + opset_version=11, + ) + + +The script generates ONNX model file ``bert-ner.onnx``. + +Converting an ONNX BERT-NER model to IR +####################################### + +.. code-block:: sh + + mo --input_model bert-ner.onnx --input "input_mask[1,128],segment_ids[1,128],input_ids[1,128]" + + +where ``1`` is ``batch_size`` and ``128`` is ``sequence_length``. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-cascade-rcnn-r-101.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-cascade-rcnn-r-101.rst new file mode 100644 index 00000000000000..a61ca5e79f1c30 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-cascade-rcnn-r-101.rst @@ -0,0 +1,51 @@ +Converting a PyTorch Cascade RCNN R-101 Model +============================================= + + +.. meta:: + :description: Learn how to convert a Cascade RCNN R-101 + model from PyTorch to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +The goal of this article is to present a step-by-step guide on how to convert a PyTorch Cascade RCNN R-101 model to OpenVINO IR. First, you need to download the model and convert it to ONNX. + +Downloading and Converting Model to ONNX +######################################## + +* Clone the `repository `__ : + + .. code-block:: sh + + git clone https://github.com/open-mmlab/mmdetection + cd mmdetection + + + .. note:: + + To set up an environment, refer to the `instructions `__. + +* Download the pre-trained `model `__. The model is also available `here `__. + +* To convert the model to ONNX format, use this `script `__. + + .. code-block:: sh + + python3 tools/deployment/pytorch2onnx.py configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth --output-file cascade_rcnn_r101_fpn_1x_coco.onnx + + +The script generates ONNX model file ``cascade_rcnn_r101_fpn_1x_coco.onnx`` in the directory ``tools/deployment/``. If required, specify the model name or output directory, using ``--output-file /.onnx``. + +Converting an ONNX Cascade RCNN R-101 Model to OpenVINO IR +########################################################## + +.. code-block:: sh + + mo --input_model cascade_rcnn_r101_fpn_1x_coco.onnx --mean_values [123.675,116.28,103.53] --scale_values [58.395,57.12,57.375] + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-f3-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-f3-net.rst new file mode 100644 index 00000000000000..d1391cfb1519ba --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-f3-net.rst @@ -0,0 +1,55 @@ +Converting a PyTorch F3Net Model +================================ + + +.. meta:: + :description: Learn how to convert a F3Net model + from PyTorch to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`F3Net `__ : Fusion, Feedback and Focus for Salient Object Detection + +Cloning the F3Net Repository +############################ + +To clone the repository, run the following command: + +.. code-block:: sh + + git clone http://github.com/weijun88/F3Net.git + + +Downloading and Converting the Model to ONNX +############################################ + +To download the pretrained model or train the model yourself, refer to the +`instructions `__ in the F3Net model repository. First, convert the model to ONNX format. Create and run the following Python script in the ``src`` directory of the model repository: + +.. code-block:: py + :force: + + import torch + from dataset import Config + from net import F3Net + + cfg = Config(mode='test', snapshot=) + net = F3Net(cfg) + image = torch.zeros([1, 3, 352, 352]) + torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11) + + +The script generates the ONNX model file ``f3net.onnx``. The model conversion was tested with the commit-SHA: ``eecace3adf1e8946b571a4f4397681252f9dc1b8``. + +Converting an ONNX F3Net Model to IR +#################################### + +.. code-block:: sh + + mo --input_model /f3net.onnx + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst new file mode 100644 index 00000000000000..f1ee885dae0b26 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst @@ -0,0 +1,61 @@ +Converting a PyTorch QuartzNet Model +==================================== + + +.. meta:: + :description: Learn how to convert a QuartzNet model + from PyTorch to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`NeMo project `__ provides the QuartzNet model. + +Downloading the Pre-trained QuartzNet Model +########################################### + +To download the pre-trained model, refer to the `NeMo Speech Models Catalog `__. +Here are the instructions on how to obtain QuartzNet in ONNX format. + +1. Install the NeMo toolkit, using the `instructions `__. + +2. Run the following code: + + .. code-block:: py + :force: + + import nemo + import nemo.collections.asr as nemo_asr + + quartznet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En") + # Export QuartzNet model to ONNX format + quartznet.decoder.export('decoder_qn.onnx') + quartznet.encoder.export('encoder_qn.onnx') + quartznet.export('qn.onnx') + + + This code produces 3 ONNX model files: ``encoder_qn.onnx``, ``decoder_qn.onnx``, ``qn.onnx``. + They are ``decoder``, ``encoder``, and a combined ``decoder(encoder(x))`` models, respectively. + +Converting an ONNX QuartzNet model to IR +######################################## + +If using a combined model: + +.. code-block:: sh + + mo --input_model /qt.onnx --input_shape [B,64,X] + +If using separate models: + +.. code-block:: sh + + mo --input_model /encoder_qt.onnx --input_shape [B,64,X] + mo --input_model /decoder_qt.onnx --input_shape [B,1024,Y] + + +Where shape is determined by the audio file Mel-Spectrogram length: ``B`` - batch dimension, ``X`` - dimension based on the input length, ``Y`` - determined by encoder output, usually ``X / 2``. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rcan.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rcan.rst new file mode 100644 index 00000000000000..7e9fb7b5717cbd --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rcan.rst @@ -0,0 +1,49 @@ +Converting a PyTorch RCAN Model +=============================== + + +.. meta:: + :description: Learn how to convert a RCAN model + from PyTorch to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`RCAN `__ : Image Super-Resolution Using Very Deep Residual Channel Attention Networks + +Downloading and Converting the Model to ONNX +############################################ + +To download the pre-trained model or train the model yourself, refer to the `instruction `__ in the RCAN model repository. First, convert the model to ONNX format. Create and run the script with the following content in the root +directory of the model repository: + +.. code-block:: py + :force: + + from argparse import Namespace + + import torch + + from RCAN_TestCode.code.model.rcan import RCAN + + config = Namespace(n_feats=64, n_resblocks=4, n_resgroups=2, reduction=16, scale=[2], data_train='DIV2K', res_scale=1, + n_colors=3, rgb_range=255) + net = RCAN(config) + net.eval() + dummy_input = torch.randn(1, 3, 360, 640) + torch.onnx.export(net, dummy_input, 'RCAN.onnx') + + +The script generates the ONNX model file ``RCAN.onnx``. More information about model parameters (``n_resblocks``, ``n_resgroups``, and others) and their different values can be found in the model repository. The model conversion was tested with the commit-SHA: ``3339ebc59519c3bb2b5719b87dd36515ec7f3ba7``. + +Converting an ONNX RCAN Model to IR +################################### + +.. code-block:: sh + + mo --input_model RCAN.onnx + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst new file mode 100644 index 00000000000000..ad646568aed598 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst @@ -0,0 +1,137 @@ +Converting a PyTorch RNN-T Model +================================ + + +.. meta:: + :description: Learn how to convert a RNN-T model + from PyTorch to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This guide covers conversion of RNN-T model from `MLCommons `__ repository. Follow +the instructions below to export a PyTorch model into ONNX, before converting it to IR: + +**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull +only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**: + +.. code-block:: sh + + git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1 + cd rnnt_for_openvino + git checkout HEAD speech_recognition/rnnt + + +**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for +pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to +your full clone at **Step 5**. Skip this step if you have a shallow clone. + +.. code-block:: sh + + mkdir rnnt_for_openvino + cd rnnt_for_openvino + + +**Step 3**. Download pre-trained weights for PyTorch implementation from `here `__. +For UNIX-like systems, you can use ``wget``: + +.. code-block:: sh + + wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt + + +The link was taken from ``setup.sh`` in the ``speech_recoginitin/rnnt`` subfolder. You will get exactly the same weights as +if you were following the `guide `__. + +**Step 4**. Install required Python packages: + +.. code-block:: sh + + pip3 install torch toml + + +**Step 5**. Export RNN-T model into ONNX, using the script below. Copy the code below into a file named +``export_rnnt_to_onnx.py`` and run it in the current directory ``rnnt_for_openvino``: + +.. note:: + + If you already have a full clone of MLCommons inference repository, you need + to specify the ``mlcommons_inference_path`` variable. + +.. code-block:: py + :force: + + import toml + import torch + import sys + + + def load_and_migrate_checkpoint(ckpt_path): + checkpoint = torch.load(ckpt_path, map_location="cpu") + migrated_state_dict = {} + for key, value in checkpoint['state_dict'].items(): + key = key.replace("joint_net", "joint.net") + migrated_state_dict[key] = value + del migrated_state_dict["audio_preprocessor.featurizer.fb"] + del migrated_state_dict["audio_preprocessor.featurizer.window"] + return migrated_state_dict + + + mlcommons_inference_path = './' # specify relative path for MLCommons inferene + checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt' + config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml' + config = toml.load(config_toml) + rnnt_vocab = config['labels']['labels'] + sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch') + + from model_separable_rnnt import RNNT + + model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval']) + model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path)) + + seq_length, batch_size, feature_length = 157, 1, 240 + inp = torch.randn([seq_length, batch_size, feature_length]) + feature_length = torch.LongTensor([seq_length]) + x_padded, x_lens = model.encoder(inp, feature_length) + torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12, + input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'], + dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}}) + + symbol = torch.LongTensor([[20]]) + hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320]) + g, hidden = model.prediction.forward(symbol, hidden) + torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12, + input_names=['symbol', 'hidden_in_1', 'hidden_in_2'], + output_names=['g', 'hidden_out_1', 'hidden_out_2'], + dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}}) + + f = torch.randn([batch_size, 1, 1024]) + model.joint.forward(f, g) + torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12, + input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}}) + + +.. code-block:: sh + + python3 export_rnnt_to_onnx.py + + +After completing this step, the files ``rnnt_encoder.onnx``, ``rnnt_prediction.onnx``, and ``rnnt_joint.onnx`` will be saved in the current directory. + +**Step 6**. Run the conversion commands: + +.. code-block:: sh + + mo --input_model rnnt_encoder.onnx --input "input[157,1,240],feature_length->157" + mo --input_model rnnt_prediction.onnx --input "symbol[1,1],hidden_in_1[2,1,320],hidden_in_2[2,1,320]" + mo --input_model rnnt_joint.onnx --input "0[1,1,1024],1[1,1,320]" + + +.. note:: + + The hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves network :doc:`reshapeability <../../../../../../openvino-workflow/running-inference/changing-input-shape>`. Therefore, input shapes can be changed manually to any value during either conversion or inference. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-yolact.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-yolact.rst new file mode 100644 index 00000000000000..0eacbd6c5b0bf9 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-yolact.rst @@ -0,0 +1,222 @@ +Converting a PyTorch YOLACT Model +================================= + + +.. meta:: + :description: Learn how to convert a YOLACT model + from PyTorch to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +You Only Look At CoefficienTs (YOLACT) is a simple, fully convolutional model for real-time instance segmentation. +The PyTorch implementation is publicly available in `this GitHub repository `__. +The YOLACT++ model is not supported, because it uses deformable convolutional layers that cannot be represented in ONNX format. + +.. _patch-file-yolact: + +Creating a Patch File +##################### + +Before converting the model, create a patch file for the repository. +The patch modifies the framework code by adding a special command-line argument to the framework options. The argument enables inference graph dumping: + +1. Go to a writable directory and create a ``YOLACT_onnx_export.patch`` file. +2. Copy the following diff code to the file: + + .. code-block:: console + + From 76deb67d4f09f29feda1a633358caa18335d9e9f Mon Sep 17 00:00:00 2001 + From: "OpenVINO" + Date: Fri, 12 Mar 2021 00:27:35 +0300 + Subject: [PATCH] Add export to ONNX + + --- + eval.py | 5 ++++- + utils/augmentations.py | 7 +++++-- + yolact.py | 29 +++++++++++++++++++---------- + 3 files changed, 28 insertions(+), 13 deletions(-) + + diff --git a/eval.py b/eval.py + index 547bc0a..bde0680 100644 + --- a/eval.py + +++ b/eval.py + @@ -593,9 +593,12 @@ def badhash(x): + return x + + def evalimage(net:Yolact, path:str, save_path:str=None): + - frame = torch.from_numpy(cv2.imread(path)).cuda().float() + + frame = torch.from_numpy(cv2.imread(path)).float() + + if torch.cuda.is_available(): + + frame = frame.cuda() + batch = FastBaseTransform()(frame.unsqueeze(0)) + preds = net(batch) + + torch.onnx.export(net, batch, "yolact.onnx", opset_version=11) + + img_numpy = prep_display(preds, frame, None, None, undo_transform=False) + + diff --git a/utils/augmentations.py b/utils/augmentations.py + index cc7a73a..2420603 100644 + --- a/utils/augmentations.py + +++ b/utils/augmentations.py + @@ -623,8 +623,11 @@ class FastBaseTransform(torch.nn.Module): + def __init__(self): + super().__init__() + + - self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None] + - self.std = torch.Tensor( STD ).float().cuda()[None, :, None, None] + + self.mean = torch.Tensor(MEANS).float()[None, :, None, None] + + self.std = torch.Tensor( STD ).float()[None, :, None, None] + + if torch.cuda.is_available(): + + self.mean.cuda() + + self.std.cuda() + self.transform = cfg.backbone.transform + + def forward(self, img): + diff --git a/yolact.py b/yolact.py + index d83703b..f8c787c 100644 + --- a/yolact.py + +++ b/yolact.py + @@ -17,19 +17,22 @@ import torch.backends.cudnn as cudnn + from utils import timer + from utils.functions import MovingAverage, make_net + + -# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. + -# See the bug report here: https://github.com/pytorch/pytorch/issues/17108 + -torch.cuda.current_device() + - + -# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules + -use_jit = torch.cuda.device_count() <= 1 + -if not use_jit: + - print('Multiple GPUs detected! Turning off JIT.') + +use_jit = False + + ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module + script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn + + + +def decode(loc, priors): + + variances = [0.1, 0.2] + + boxes = torch.cat((priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2) + + + + boxes_result1 = boxes[:, :, :2] - boxes[:, :, 2:] / 2 + + boxes_result2 = boxes[:, :, 2:] + boxes_result1 + + boxes_result = torch.cat((boxes_result1, boxes_result2), 2) + + + + return boxes_result + + + + class Concat(nn.Module): + def __init__(self, nets, extra_params): + @@ -476,7 +479,10 @@ class Yolact(nn.Module): + + def load_weights(self, path): + """ Loads weights from a compressed save file. """ + - state_dict = torch.load(path) + + if torch.cuda.is_available(): + + state_dict = torch.load(path) + + else: + + state_dict = torch.load(path, map_location=torch.device('cpu')) + + # For backward compatibility, remove these (the new variable is called layers) + for key in list(state_dict.keys()): + @@ -673,8 +679,11 @@ class Yolact(nn.Module): + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + + - return self.detect(pred_outs, self) + + pred_outs['boxes'] = decode(pred_outs['loc'], pred_outs['priors']) # decode output boxes + + + pred_outs.pop('priors') # remove unused in postprocessing layers + + pred_outs.pop('loc') # remove unused in postprocessing layers + + return pred_outs + + + + -- + + +3. Save and close the file. + +Converting a YOLACT Model to the OpenVINO IR format +################################################### + +**Step 1**. Clone the GitHub repository and check out the commit: + +1. Clone the YOLACT repository: + + .. code-block:: sh + + git clone https://github.com/dbolya/yolact + + +2. Check out the necessary commit: + + .. code-block:: sh + + git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239 + + +3. Set up the environment as described in ``README.md``. + +**Step 2**. Download a pre-trained model from the list attached in the ``Evaluation`` section of ``README.md`` document, for example ``yolact_base_54_800000.pth``. + +**Step 3**. Export the model to ONNX format. + +1. Apply the `YOLACT_onnx_export.patch` patch to the repository. Refer to the :ref:`Create a Patch File ` instructions if you do not have it: + + .. code-block:: sh + + git apply /path/to/patch/YOLACT_onnx_export.patch + + +2. Evaluate the YOLACT model to export it to ONNX format: + + .. code-block:: sh + + python3 eval.py \ + --trained_model=/path/to/yolact_base_54_800000.pth \ + --score_threshold=0.3 \ + --top_k=10 \ + --image=/path/to/image.jpg \ + --cuda=False + + +3. The script may fail, but you should get ``yolact.onnx`` file. + +**Step 4**. Convert the model to the IR: + +.. code-block:: sh + + mo --input_model /path/to/yolact.onnx + + +**Step 5**. Embed input preprocessing into the IR: + +To get performance gain by offloading to the OpenVINO application of mean/scale values and RGB->BGR conversion, use the following model conversion API parameters: + +* If the backbone of the model is Resnet50-FPN or Resnet101-FPN, use the following MO command line: + + .. code-block:: sh + + mo \ + --input_model /path/to/yolact.onnx \ + --reverse_input_channels \ + --mean_values "[123.68, 116.78, 103.94]" \ + --scale_values "[58.40, 57.12, 57.38]" + + +* If the backbone of the model is Darknet53-FPN, use the following MO command line: + + .. code-block:: sh + + mo \ + --input_model /path/to/yolact.onnx \ + --reverse_input_channels \ + --scale 255 + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-attention-ocr.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-attention-ocr.rst new file mode 100644 index 00000000000000..dd419456ccbcd3 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-attention-ocr.rst @@ -0,0 +1,60 @@ +Converting a TensorFlow Attention OCR Model +=========================================== + + +.. meta:: + :description: Learn how to convert the Attention OCR + model from the TensorFlow Attention OCR repository to the + OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert the Attention OCR (AOCR) model from the `TensorFlow Attention OCR repository `__ to the Intermediate Representation (IR). + +Extracting a Model from ``aocr`` Library +######################################## + +To get an AOCR model, download ``aocr`` Python library: + +.. code-block:: sh + + pip install git+https://github.com/emedvedev/attention-ocr.git@master#egg=aocr + +This library contains a pretrained model and allows training and running AOCR, using the command line. After installation of `aocr`, extract the model: + +.. code-block:: sh + + aocr export --format=frozengraph model/path/ + +Once extracted, the model can be found in ``model/path/`` folder. + +Converting the TensorFlow AOCR Model to IR +########################################## + +The original AOCR model includes the preprocessing data, which contains: + +* Decoding input data to binary format where input data is an image represented as a string. +* Resizing binary image to working resolution. + +The resized image is sent to the convolution neural network (CNN). Because model conversion API does not support image decoding, the preprocessing part of the model should be cut off, using the ``input`` command-line parameter. + +.. code-block:: sh + + mo \ + --input_model=model/path/frozen_graph.pb \ + --input="map/TensorArrayStack/TensorArrayGatherV3:0[1,32,86,1]" \ + --output "transpose_1,transpose_2" \ + --output_dir path/to/ir/ + + +Where: + +* ``map/TensorArrayStack/TensorArrayGatherV3:0[1 32 86 1]`` - name of node producing tensor after preprocessing. +* ``transpose_1`` - name of the node producing tensor with predicted characters. +* ``transpose_2`` - name of the node producing tensor with predicted characters probabilities. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-bert.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-bert.rst new file mode 100644 index 00000000000000..197b6e13c4e27a --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-bert.rst @@ -0,0 +1,170 @@ +Converting a TensorFlow BERT Model +================================== + + +.. meta:: + :description: Learn how to convert a BERT model + from TensorFlow to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +Pretrained models for BERT (Bidirectional Encoder Representations from Transformers) are +`publicly available `__. + +.. _supported_models: + +Supported Models +################ + +The following models from the pretrained `BERT model list `__ are currently supported: + +* ``BERT-Base, Cased`` +* ``BERT-Base, Uncased`` +* ``BERT-Base, Multilingual Cased`` +* ``BERT-Base, Multilingual Uncased`` +* ``BERT-Base, Chinese`` +* ``BERT-Large, Cased`` +* ``BERT-Large, Uncased`` + +Downloading the Pretrained BERT Model +##################################### + +Download and unzip an archive with the `BERT-Base, Multilingual Uncased Model `__. + +After the archive is unzipped, the directory ``uncased_L-12_H-768_A-12`` is created and contains the following files: + +* ``bert_config.json`` +* ``bert_model.ckpt.data-00000-of-00001`` +* ``bert_model.ckpt.index`` +* ``bert_model.ckpt.meta`` +* ``vocab.txt`` + +Pretrained model meta-graph files are ``bert_model.ckpt.*``. + +Converting a TensorFlow BERT Model to IR +######################################### + +To generate the BERT Intermediate Representation (IR) of the model, run model conversion with the following parameters: + +.. code-block:: sh + + mo \ + --input_meta_graph uncased_L-12_H-768_A-12/bert_model.ckpt.meta \ + --output bert/pooler/dense/Tanh \ + --input Placeholder{i32},Placeholder_1{i32},Placeholder_2{i32} + + +Pretrained models are not suitable for batch reshaping out-of-the-box because of multiple hardcoded shapes in the model. + +Converting a Reshapable TensorFlow BERT Model to OpenVINO IR +============================================================= + +Follow these steps to make a pretrained TensorFlow BERT model reshapable over batch dimension: + +1. Download a pretrained BERT model you want to use from the `Supported Models list <#supported_models>`__. + +2. Clone google-research/bert git repository: + + .. code-block:: sh + + https://github.com/google-research/bert.git + +3. Go to the root directory of the cloned repository: + + .. code-block:: sh + + cd bert + +4. (Optional) Checkout to the commit that the conversion was tested on: + + .. code-block:: sh + + git checkout eedf5716c + +5. Download script to load GLUE data: + + * For UNIX-like systems, run the following command: + + .. code-block:: sh + + wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py + + * For Windows systems: + + Download the `Python script `__ to the current working directory. + +6. Download GLUE data by running: + + .. code-block:: sh + + python3 download_glue_data.py --tasks MRPC + +7. Open the file ``modeling.py`` in the text editor and delete lines 923-924. They should look like this: + + .. code-block:: py + :force: + + if not non_static_indexes: + return shape + +8. Open the file ``run_classifier.py`` and insert the following code after the line 645: + + .. code-block:: py + :force: + + import os, sys + import tensorflow as tf + from tensorflow.python.framework import graph_io + with tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph()) as sess: + (assignment_map, initialized_variable_names) = \ + modeling.get_assignment_map_from_checkpoint(tf.compat.v1.trainable_variables(), init_checkpoint) + tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) + sess.run(tf.compat.v1.global_variables_initializer()) + frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["bert/pooler/dense/Tanh"]) + graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False) + print('BERT frozen model path {}'.format(os.path.join(os.path.dirname(__file__), 'inference_graph.pb'))) + sys.exit(0) + + Lines before the inserted code should look like this: + + .. code-block:: py + :force: + + (total_loss, per_example_loss, logits, probabilities) = create_model( + bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, + num_labels, use_one_hot_embeddings) + + +9. Set environment variables ``BERT_BASE_DIR``, ``BERT_REPO_DIR`` and run the script ``run_classifier.py`` to create ``inference_graph.pb`` file in the root of the cloned BERT repository. + + .. code-block:: sh + + export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12 + export BERT_REPO_DIR=/current/working/directory + + python3 run_classifier.py \ + --task_name=MRPC \ + --do_eval=true \ + --data_dir=$BERT_REPO_DIR/glue_data/MRPC \ + --vocab_file=$BERT_BASE_DIR/vocab.txt \ + --bert_config_file=$BERT_BASE_DIR/bert_config.json \ + --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ + --output_dir=./ + + Run model conversion with the following command line parameters to generate reshape-able BERT Intermediate Representation (IR): + + .. code-block:: sh + + mo \ + --input_model inference_graph.pb \ + --input "IteratorGetNext:0{i32}[1,128],IteratorGetNext:1{i32}[1,128],IteratorGetNext:4{i32}[1,128]" + +For other applicable parameters, refer to the :doc:`Convert Model from TensorFlow <../[legacy]-convert-tensorflow>` guide. + +For more information about reshape abilities, refer to the :doc:`Using Shape Inference <../../../../../../openvino-workflow/running-inference/changing-input-shape>` guide. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-crnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-crnn.rst new file mode 100644 index 00000000000000..a94d72b4508f3c --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-crnn.rst @@ -0,0 +1,86 @@ +Converting a TensorFlow CRNN Model +================================== + + +.. meta:: + :description: Learn how to convert a CRNN model + from TensorFlow to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert a CRNN model to OpenVINO™ Intermediate Representation (IR). + +There are several public versions of TensorFlow CRNN model implementation available on GitHub. This tutorial explains how to convert the model from +the `CRNN Tensorflow `__ repository to IR, and is validated with Python 3.7, TensorFlow 1.15.0, and protobuf 3.19.0. +If you have another implementation of CRNN model, it can be converted to OpenVINO IR in a similar way. You need to get inference graph and run model conversion of it. + +**To convert the model to IR:** + +**Step 1.** Clone this GitHub repository and check out the commit: + +1. Clone the repository: + + .. code-block:: sh + + git clone https://github.com/MaybeShewill-CV/CRNN_Tensorflow.git + +2. Go to the ``CRNN_Tensorflow`` directory of the cloned repository: + + .. code-block:: sh + + cd path/to/CRNN_Tensorflow + +3. Check out the necessary commit: + + .. code-block:: sh + + git checkout 64f1f1867bffaacfeacc7a80eebf5834a5726122 + + +**Step 2.** Train the model using the framework or the pretrained checkpoint provided in this repository. + + +**Step 3.** Create an inference graph: + +1. Add the ``CRNN_Tensorflow`` folder to ``PYTHONPATH``. + + * For Linux: + + .. code-block:: sh + + export PYTHONPATH="${PYTHONPATH}:/path/to/CRNN_Tensorflow/" + + + * For Windows, add ``/path/to/CRNN_Tensorflow/`` to the ``PYTHONPATH`` environment variable in settings. + +2. Edit the ``tools/demo_shadownet.py`` script. After ``saver.restore(sess=sess, save_path=weights_path)`` line, add the following code: + + .. code-block:: py + :force: + + from tensorflow.python.framework import graph_io + frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['shadow/LSTMLayers/transpose_time_major']) + graph_io.write_graph(frozen, '.', 'frozen_graph.pb', as_text=False) + +3. Run the demo with the following command: + + .. code-block:: sh + + python tools/demo_shadownet.py --image_path data/test_images/test_01.jpg --weights_path model/shadownet/shadownet_2017-10-17-11-47-46.ckpt-199999 + + + If you want to use your checkpoint, replace the path in the ``--weights_path`` parameter with a path to your checkpoint. + +4. In the ``CRNN_Tensorflow`` directory, you will find the inference CRNN graph ``frozen_graph.pb``. You can use this graph with OpenVINO to convert the model to IR and then run inference. + +**Step 4.** Convert the model to IR: + +.. code-block:: sh + + mo --input_model path/to/your/CRNN_Tensorflow/frozen_graph.pb + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-deep-speech.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-deep-speech.rst new file mode 100644 index 00000000000000..e572b26324faf3 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-deep-speech.rst @@ -0,0 +1,108 @@ +Converting a TensorFlow DeepSpeech Model +======================================== + + +.. meta:: + :description: Learn how to convert a DeepSpeech model + from TensorFlow to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`DeepSpeech project `__ provides an engine to train speech-to-text models. + +Downloading the Pretrained DeepSpeech Model +########################################### + +Create a directory where model and metagraph with pretrained weights will be stored: + +.. code-block:: sh + + mkdir deepspeech + cd deepspeech + +`Pre-trained English speech-to-text model `__ is publicly available. +To download the model, follow the instruction below: + +* For UNIX-like systems, run the following command: + + .. code-block:: sh + + wget -O - https://github.com/mozilla/DeepSpeech/archive/v0.8.2.tar.gz | tar xvfz - + wget -O - https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-checkpoint.tar.gz | tar xvfz - + +* For Windows systems: + + 1. Download `the archive with the model `__. + 2. Download the `TensorFlow MetaGraph with pre-trained weights `__. + 3. Unpack it with a file archiver application. + +Freezing the Model into a "\*.pb File" +###################################### + +After unpacking the archives above, you have to freeze the model. This requires +TensorFlow version 1, which is not available under Python 3.8, so you need Python 3.7 or lower. +Before freezing, deploy a virtual environment and install the required packages: + +.. code-block:: sh + + virtualenv --python=python3.7 venv-deep-speech + source venv-deep-speech/bin/activate + cd DeepSpeech-0.8.2 + pip3 install -e . + +Freeze the model with the following command: + +.. code-block:: sh + + python3 DeepSpeech.py --checkpoint_dir ../deepspeech-0.8.2-checkpoint --export_dir ../ + +After that, you will get the pretrained frozen model file ``output_graph.pb`` in the directory ``deepspeech`` created at +the beginning. The model contains the preprocessing and main parts. The first preprocessing part performs conversion of input +spectrogram into a form useful for speech recognition (mel). This part of the model is not convertible into +the IR because it contains unsupported operations ``AudioSpectrogram`` and ``Mfcc``. + +The main and most computationally expensive part of the model converts the preprocessed audio into text. +There are two specificities with the supported part of the model. + +The first is that the model contains an input with sequence length. So the model can be converted with +a fixed input length shape, thus the model is not reshapable. +Refer to the :doc:`Using Shape Inference <../../../../../../openvino-workflow/running-inference/changing-input-shape>` guide. + +The second is that the frozen model still has two variables: ``previous_state_c`` and ``previous_state_h``, figure +with the frozen \*.pb model is below. It means that the model keeps training these variables at each inference. + +.. image:: ../../../../../../assets/images/DeepSpeech-0.8.2.png + +At the first inference, the variables are initialized with zero tensors. After execution, the results of the ``BlockLSTM`` +are assigned to cell state and hidden state, which are these two variables. + +Converting the Main Part of DeepSpeech Model into OpenVINO IR +############################################################# + +Model conversion API assumes that the output model is for inference only. That is why you should cut ``previous_state_c`` and ``previous_state_h`` variables off and resolve keeping cell and hidden states on the application level. + +There are certain limitations for the model conversion: + +* Time length (``time_len``) and sequence length (``seq_len``) are equal. +* Original model cannot be reshaped, so you should keep original shapes. + +To generate the IR, run model conversion with the following parameters: + +.. code-block:: sh + + mo \ + --input_model output_graph.pb \ + --input "input_lengths->[16],input_node[1,16,19,26],previous_state_h[1,2048],previous_state_c[1,2048]" \ + --output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits" + + +Where: + +* ``input_lengths->[16]`` Replaces the input node with name "input_lengths" with a constant tensor of shape [1] with a single integer value of 16. This means that the model now can consume input sequences of length 16 only. +* ``input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]`` replaces the variables with a placeholder. +* ``output ".../GatherNd_1,.../GatherNd,logits"`` output node names. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-efficient-det.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-efficient-det.rst new file mode 100644 index 00000000000000..c894765a5dc604 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-efficient-det.rst @@ -0,0 +1,90 @@ +Converting TensorFlow EfficientDet Models +========================================= + + +.. meta:: + :description: Learn how to convert an EfficientDet model + from TensorFlow to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert EfficientDet public object detection models to the Intermediate Representation (IR). + +.. _efficientdet-to-ir: + +Converting EfficientDet Model to the IR +####################################### + +There are several public versions of EfficientDet model implementation available on GitHub. This tutorial explains how to +convert models from the `repository `__ (commit 96e1fee) to the OpenVINO format. + +Download and extract the model checkpoint `efficientdet-d4.tar.gz `__ +referenced in the **"Pretrained EfficientDet Checkpoints"** section of the model repository: + +.. code-block:: sh + + wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz + tar zxvf efficientdet-d4.tar.gz + +Converting an EfficientDet TensorFlow Model to the IR ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To generate the IR of the EfficientDet TensorFlow model, run: + +.. code-block:: sh + + mo \ + --input_meta_graph efficientdet-d4/model.meta \ + --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \ + --reverse_input_channels + + +Where ``$IMAGE_SIZE`` is the size that the input image of the original TensorFlow model will be resized to. Different +EfficientDet models were trained with different input image sizes. To determine the right one, refer to the ``efficientdet_model_param_dict`` +dictionary in the `hparams_config.py `__ file. +The attribute ``image_size`` specifies the shape to be defined for the model conversion. + +.. note:: + + The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``--reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <../../[legacy]-setting-input-shapes>` guide. + +OpenVINO toolkit provides samples that can be used to infer EfficientDet model. +For more information, refer to the `Open Model Zoo Demos `__. + +.. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format have are now + published on `Hugging Face `__. + + +Interpreting Results of the TensorFlow Model and the IR +####################################################### + +The TensorFlow model produces as output a list of 7-element tuples: ``[image_id, y_min, x_min, y_max, x_max, confidence, class_id]``, where: + +* ``image_id`` -- image batch index. +* ``y_min`` -- absolute ``y`` coordinate of the lower left corner of the detected object. +* ``x_min`` -- absolute ``x`` coordinate of the lower left corner of the detected object. +* ``y_max`` -- absolute ``y`` coordinate of the upper right corner of the detected object. +* ``x_max`` -- absolute ``x`` coordinate of the upper right corner of the detected object. +* ``confidence`` -- the confidence of the detected object. +* ``class_id`` -- the id of the detected object class counted from 1. + +The output of the IR is a list of 7-element tuples: ``[image_id, class_id, confidence, x_min, y_min, x_max, y_max]``, where: + +* ``image_id`` -- image batch index. +* ``class_id`` -- the id of the detected object class counted from 0. +* ``confidence`` -- the confidence of the detected object. +* ``x_min`` -- normalized ``x`` coordinate of the lower left corner of the detected object. +* ``y_min`` -- normalized ``y`` coordinate of the lower left corner of the detected object. +* ``x_max`` -- normalized ``x`` coordinate of the upper right corner of the detected object. +* ``y_max`` -- normalized ``y`` coordinate of the upper right corner of the detected object. + +The first element with ``image_id = -1`` means end of data. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-face-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-face-net.rst new file mode 100644 index 00000000000000..a528718349f717 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-face-net.rst @@ -0,0 +1,42 @@ +Converting TensorFlow FaceNet Models +==================================== + + +.. meta:: + :description: Learn how to convert a FaceNet model + from TensorFlow to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Supported Model Formats <../../../../../../openvino-workflow/model-preparation>` article. + +`Public pre-trained FaceNet models `__ contain both training +and inference part of graph. Switch between this two states is manageable with placeholder value. +Intermediate Representation (IR) models are intended for inference, which means that train part is redundant. + +There are two inputs in this network: boolean ``phase_train`` which manages state of the graph (train/infer) and +``batch_size`` which is a part of batch joining pattern. + +.. image:: ../../../../../../assets/images/FaceNet.svg + +Converting a TensorFlow FaceNet Model to the IR +############################################### + +To generate a FaceNet OpenVINO model, feed a TensorFlow FaceNet model to model conversion API with the following parameters: + +.. code-block:: sh + + mo + --input_model path_to_model/model_name.pb \ + --freeze_placeholder_with_value "phase_train->False" + + +The batch joining pattern transforms to a placeholder with the model default shape if ``--input_shape`` or ``--batch``/``-b`` are not provided. Otherwise, the placeholder shape has custom parameters. + +* ``freeze_placeholder_with_value "phase_train->False"`` to switch graph to inference mode +* ``batch`*/*`-b`` is applicable to override original network batch +* ``input_shape`` is applicable with or without ``input`` +* other options are applicable + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-gnmt.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-gnmt.rst new file mode 100644 index 00000000000000..b8d2c592ed931d --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-gnmt.rst @@ -0,0 +1,315 @@ +Converting a TensorFlow GNMT Model +================================== + + +.. meta:: + :description: Learn how to convert a GNMT model + from TensorFlow to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert Google Neural Machine Translation (GNMT) model to the Intermediate Representation (IR). + +There are several public versions of TensorFlow GNMT model implementation available on GitHub. This tutorial explains how to convert the GNMT model from the `TensorFlow Neural Machine Translation (NMT) repository `__ to the IR. + +Creating a Patch File +##################### + +Before converting the model, you need to create a patch file for the repository. The patch modifies the framework code by adding a special command-line argument to the framework options that enables inference graph dumping: + +1. Go to a writable directory and create a ``GNMT_inference.patch`` file. +2. Copy the following diff code to the file: + + .. code-block:: py + + diff --git a/nmt/inference.py b/nmt/inference.py + index 2cbef07..e185490 100644 + --- a/nmt/inference.py + +++ b/nmt/inference.py + @@ -17,9 +17,11 @@ + from __future__ import print_function + + import codecs + +import os + import time + + import tensorflow as tf + +from tensorflow.python.framework import graph_io + + from . import attention_model + from . import gnmt_model + @@ -105,6 +107,29 @@ def start_sess_and_load_model(infer_model, ckpt_path): + return sess, loaded_infer_model + + + +def inference_dump_graph(ckpt_path, path_to_dump, hparams, scope=None): + + model_creator = get_model_creator(hparams) + + infer_model = model_helper.create_infer_model(model_creator, hparams, scope) + + sess = tf.Session( + + graph=infer_model.graph, config=utils.get_config_proto()) + + with infer_model.graph.as_default(): + + loaded_infer_model = model_helper.load_model( + + infer_model.model, ckpt_path, sess, "infer") + + utils.print_out("Dumping inference graph to {}".format(path_to_dump)) + + loaded_infer_model.saver.save( + + sess, + + os.path.join(path_to_dump + 'inference_GNMT_graph') + + ) + + utils.print_out("Dumping done!") + + + + output_node_name = 'index_to_string_Lookup' + + utils.print_out("Freezing GNMT graph with output node {}...".format(output_node_name)) + + frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, + + [output_node_name]) + + graph_io.write_graph(frozen, '.', os.path.join(path_to_dump, 'frozen_GNMT_inference_graph.pb'), as_text=False) + + utils.print_out("Freezing done. Freezed model frozen_GNMT_inference_graph.pb saved to {}".format(path_to_dump)) + + + + + def inference(ckpt_path, + inference_input_file, + inference_output_file, + diff --git a/nmt/nmt.py b/nmt/nmt.py + index f5823d8..a733748 100644 + --- a/nmt/nmt.py + +++ b/nmt/nmt.py + @@ -310,6 +310,13 @@ def add_arguments(parser): + parser.add_argument("--num_intra_threads", type=int, default=0, + help="number of intra_op_parallelism_threads") + + + # Special argument for inference model dumping without inference + + parser.add_argument("--dump_inference_model", type="bool", nargs="?", + + const=True, default=False, + + help="Argument for dump inference graph for specified trained ckpt") + + + + parser.add_argument("--path_to_dump", type=str, default="", + + help="Path to dump inference graph.") + + def create_hparams(flags): + """Create training hparams.""" + @@ -396,6 +403,9 @@ def create_hparams(flags): + language_model=flags.language_model, + num_intra_threads=flags.num_intra_threads, + num_inter_threads=flags.num_inter_threads, + + + + dump_inference_model=flags.dump_inference_model, + + path_to_dump=flags.path_to_dump, + ) + + + @@ -613,7 +623,7 @@ def create_or_load_hparams( + return hparams + + + -def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""): + +def run_main(flags, default_hparams, train_fn, inference_fn, inference_dump, target_session=""): + """Run main.""" + # Job + jobid = flags.jobid + @@ -653,8 +663,26 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""): + out_dir, default_hparams, flags.hparams_path, + save_hparams=(jobid == 0)) + + - ## Train / Decode + - if flags.inference_input_file: + + # Dumping inference model + + if flags.dump_inference_model: + + # Inference indices + + hparams.inference_indices = None + + if flags.inference_list: + + (hparams.inference_indices) = ( + + [int(token) for token in flags.inference_list.split(",")]) + + + + # Ckpt + + ckpt = flags.ckpt + + if not ckpt: + + ckpt = tf.train.latest_checkpoint(out_dir) + + + + # Path to dump graph + + assert flags.path_to_dump != "", "Please, specify path_to_dump model." + + path_to_dump = flags.path_to_dump + + if not tf.gfile.Exists(path_to_dump): tf.gfile.MakeDirs(path_to_dump) + + + + inference_dump(ckpt, path_to_dump, hparams) + + elif flags.inference_input_file: + # Inference output directory + trans_file = flags.inference_output_file + assert trans_file + @@ -693,7 +721,8 @@ def main(unused_argv): + default_hparams = create_hparams(FLAGS) + train_fn = train.train + inference_fn = inference.inference + - run_main(FLAGS, default_hparams, train_fn, inference_fn) + + inference_dump = inference.inference_dump_graph + + run_main(FLAGS, default_hparams, train_fn, inference_fn, inference_dump) + + + if __name__ == "__main__": + + +3. Save and close the file. + +Converting a GNMT Model to the IR +################################# + +.. note:: Use TensorFlow version 1.13 or lower. + +**Step 1**. Clone the GitHub repository and check out the commit: + +1. Clone the NMT repository: + + .. code-block:: sh + + git clone https://github.com/tensorflow/nmt.git + +2. Check out the necessary commit: + + .. code-block:: sh + + git checkout b278487980832417ad8ac701c672b5c3dc7fa553 + + +**Step 2**. Get a trained model. You have two options: + +* Train the model with the GNMT ``wmt16_gnmt_4_layer.json`` or ``wmt16_gnmt_8_layer.json`` configuration file using the NMT framework. +* *Do not use the pre-trained checkpoints provided in the NMT repository, as they are outdated and can be incompatible with the current repository version.* + +This tutorial assumes the use of the trained GNMT model from ``wmt16_gnmt_4_layer.json`` config, German to English translation. + +**Step 3**. Create an inference graph: + +The OpenVINO assumes that a model is used for inference only. Hence, before converting the model into the IR, you need to transform the training graph into the inference graph. +For the GNMT model, the training graph and the inference graph have different decoders: the training graph uses a greedy search decoding algorithm, while the inference graph uses a beam search decoding algorithm. + +1. Apply the ``GNMT_inference.patch`` patch to the repository. `Create a Patch File <#Creating-a-Patch-File>`__ instructions if you do not have it: + + .. code-block:: sh + + git apply /path/to/patch/GNMT_inference.patch + + +2. Run the NMT framework to dump the inference model: + + .. code-block:: sh + + python -m nmt.nmt + --src=de + --tgt=en + --ckpt=/path/to/ckpt/translate.ckpt + --hparams_path=/path/to/repository/nmt/nmt/standard_hparams/wmt16_gnmt_4_layer.json + --vocab_prefix=/path/to/vocab/vocab.bpe.32000 + --out_dir="" + --dump_inference_model + --infer_mode beam_search + --path_to_dump /path/to/dump/model/ + + +If you use different checkpoints, use the corresponding values for the ``src``, ``tgt``, ``ckpt``, ``hparams_path``, and ``vocab_prefix`` parameters. +Inference checkpoint ``inference_GNMT_graph`` and frozen inference graph ``frozen_GNMT_inference_graph.pb`` will appear in the ``/path/to/dump/model/`` folder. + +To generate ``vocab.bpe.32000``, execute the ``nmt/scripts/wmt16_en_de.sh`` script. If you face an issue of a size mismatch between the checkpoint graph's embedding layer and vocabulary (both src and target), make sure you add the following code to the ``nmt.py`` file to the ``extend_hparams`` function after the line 508 (after initialization of the ``src_vocab_size`` and ``tgt_vocab_size`` variables): + +.. code-block:: py + :force: + + src_vocab_size -= 1 + tgt_vocab_size -= 1 + + +**Step 4**. Convert the model to the IR: + +.. code-block:: sh + + mo + --input_model /path/to/dump/model/frozen_GNMT_inference_graph.pb + --input "IteratorGetNext:1{i32}[1],IteratorGetNext:0{i32}[1,50],dynamic_seq2seq/hash_table_Lookup_1:0[1]->[2],dynamic_seq2seq/hash_table_Lookup:0[1]->[1]" + --output dynamic_seq2seq/decoder/decoder/GatherTree + --output_dir /path/to/output/IR/ + + +Input and output cutting with the ``--input`` and ``--output`` options is required since OpenVINO™ does not support ``IteratorGetNext`` and ``LookupTableFindV2`` operations. + +Input cutting: + +* ``IteratorGetNext`` operation iterates over a dataset. It is cut by output ports: port 0 contains data tensor with shape ``[batch_size, max_sequence_length]``, port 1 contains ``sequence_length`` for every batch with shape ``[batch_size]``. + +* ``LookupTableFindV2`` operations (``dynamic_seq2seq/hash_table_Lookup_1`` and ``dynamic_seq2seq/hash_table_Lookup`` nodes in the graph) are cut with constant values). + +Output cutting: + +* ``LookupTableFindV2`` operation is cut from the output and the ``dynamic_seq2seq/decoder/decoder/GatherTree`` node is treated as a new exit point. + +For more information about model cutting, refer to the :doc:`Cutting Off Parts of a Model <../../[legacy]-cutting-parts-of-a-model>` guide. + +Using a GNMT Model +################## + +.. note:: + + This step assumes you have converted a model to the Intermediate Representation. + +Inputs of the model: + +* ``IteratorGetNext/placeholder_out_port_0`` input with shape ``[batch_size, max_sequence_length]`` contains ``batch_size`` decoded input sentences. Every sentence is decoded the same way as indices of sentence elements in vocabulary and padded with index of ``eos`` (end of sentence symbol). If the length of the sentence is less than ``max_sequence_length``, remaining elements are filled with index of ``eos`` token. + +* ``IteratorGetNext/placeholder_out_port_1`` input with shape ``[batch_size]`` contains sequence lengths for every sentence from the first input. For example, if ``max_sequence_length = 50``, ``batch_size = 1`` and the sentence has only 30 elements, then the input tensor for ``IteratorGetNext/placeholder_out_port_1`` should be ``[30]``. + + +Outputs of the model: + +* ``dynamic_seq2seq/decoder/decoder/GatherTree`` tensor with shape ``[max_sequence_length * 2, batch, beam_size]``, + that contains ``beam_size`` best translations for every sentence from input (also decoded as indices of words in + vocabulary). + +.. note:: + The shape of this tensor in TensorFlow can be different: instead of ``max_sequence_length * 2``, it can be any value less than that, because OpenVINO does not support dynamic shapes of outputs, while TensorFlow can stop decoding iterations when ``eos`` symbol is generated. + +Running GNMT IR +--------------- + +1. With benchmark app: + + .. code-block:: sh + + benchmark_app -m -d CPU + + +2. With OpenVINO Runtime Python API: + + .. note:: + + Before running the example, insert a path to your GNMT ``.xml`` and ``.bin`` files into ``MODEL_PATH`` and ``WEIGHTS_PATH``, and fill ``input_data_tensor`` and ``seq_lengths`` tensors according to your input data. + + .. code-block:: py + :force: + + from openvino.inference_engine import IENetwork, IECore + + MODEL_PATH = '/path/to/IR/frozen_GNMT_inference_graph.xml' + WEIGHTS_PATH = '/path/to/IR/frozen_GNMT_inference_graph.bin' + + # Creating network + net = IENetwork( + model=MODEL_PATH, + weights=WEIGHTS_PATH) + + # Creating input data + input_data = {'IteratorGetNext/placeholder_out_port_0': input_data_tensor, + 'IteratorGetNext/placeholder_out_port_1': seq_lengths} + + # Creating plugin and loading extensions + ie = IECore() + ie.add_extension(extension_path="libcpu_extension.so", device_name="CPU") + + # Loading network + exec_net = ie.load_network(network=net, device_name="CPU") + + # Run inference + result_ie = exec_net.infer(input_data) + + +For more information about Python API, refer to the :doc:`OpenVINO Runtime Python API <../../../../../../api/ie_python_api/api>` guide. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-language-1b.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-language-1b.rst new file mode 100644 index 00000000000000..1b51809f9d1b6b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-language-1b.rst @@ -0,0 +1,131 @@ +Converting a TensorFlow Language Model on One Billion Word Benchmark +==================================================================== + + +.. meta:: + :description: Learn how to convert a TensorFlow Language + Model on One Billion Word Benchmark to the OpenVINO Intermediate + Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +Downloading a Pre-trained Language Model on One Billion Word Benchmark +###################################################################### + +TensorFlow provides a pretrained `Language Model on One Billion Word Benchmark `__. + +To download the model for IR conversion, follow the instructions: + +1. Create new directory to store the model: + + .. code-block:: sh + + mkdir lm_1b + +2. Go to the ``lm_1b`` directory: + + .. code-block:: sh + + cd lm_1b + +3. Download the model GraphDef file: + + .. code-block:: sh + + wget http://download.tensorflow.org/models/LM_LSTM_CNN/graph-2016-09-10.pbtxt + +4. Create new directory to store 12 checkpoint shared files: + + .. code-block:: sh + + mkdir ckpt + +5. Go to the ``ckpt`` directory: + + .. code-block:: sh + + cd ckpt + +6. Download 12 checkpoint shared files: + + .. code-block:: sh + + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-base + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-char-embedding + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-lstm + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax0 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax1 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax2 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax3 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax4 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax5 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax6 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax7 + wget http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax8 + + +Once you have downloaded the pretrained model files, you will have the ``lm_1b`` directory with the following hierarchy: + +.. code-block:: sh + + lm_1b/ + graph-2016-09-10.pbtxt + ckpt/ + ckpt-base + ckpt-char-embedding + ckpt-lstm + ckpt-softmax0 + ckpt-softmax1 + ckpt-softmax2 + ckpt-softmax3 + ckpt-softmax4 + ckpt-softmax5 + ckpt-softmax6 + ckpt-softmax7 + ckpt-softmax8 + + + +.. image:: ../../../../../../assets/images/lm_1b.svg + +The frozen model still has two variables: ``Variable`` and ``Variable_1``. +It means that the model keeps training those variables at each inference. + +At the first inference of this graph, the variables are initialized by initial values. +After executing the ``lstm`` nodes, results of execution are assigned to these two variables. + +With each inference of the ``lm_1b`` graph, ``lstm`` initial states data is taken from previous inference +from variables, and states of current inference of ``lstm`` is reassigned to the same variables. + +It helps the model to remember the context of the words that it takes as input. + +Converting a TensorFlow Language Model on One Billion Word Benchmark to IR +########################################################################## + +Model Optimizer assumes that output model is for inference only. +Therefore, you should cut those variables off and resolve keeping cell and hidden states on application level. + +There is a certain limitation for the model conversion: the original model cannot be reshaped, so you should keep original shapes. + +To generate the ``lm_1b`` Intermediate Representation (IR), provide TensorFlow ``lm_1b`` model to the +Model Optimizer with parameters: + +.. code-block:: sh + + mo + --input_model lm_1b/graph-2016-09-10.pbtxt \ + --input_checkpoint lm_1b/ckpt \ + --input_model_is_text \ + --input_shape [50],[50],[1,9216],[1,9216] \ + --output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2 \ + --input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read + +Where: + +* ``--input char_embedding/EmbeddingLookupUnique/Unique:0,char_embedding/EmbeddingLookupUnique/Unique:1,Variable/read,Variable_1/read`` and ``--input_shape [50],[50],[1,9216],[1,9216]`` replace the variables with a placeholder. +* ``--output softmax_out,lstm/lstm_0/concat_2,lstm/lstm_1/concat_2`` specifies output node name and names of LSTM cell states. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-ncf.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-ncf.rst new file mode 100644 index 00000000000000..a8592e75d65b31 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-ncf.rst @@ -0,0 +1,68 @@ +Converting a TensorFlow Neural Collaborative Filtering Model +============================================================ + + +.. meta:: + :description: Learn how to convert a Neural Collaborative + Filtering Model from TensorFlow to the OpenVINO Intermediate + Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert Neural Collaborative Filtering (NCF) model to the OpenVINO Intermediate Representation. + +`Public TensorFlow NCF model `__ does not contain pre-trained weights. To convert this model to the IR: + +1. Use `the instructions `__ from this repository to train the model. + +2. Freeze the inference graph you get in the previous step in ``model_dir``, following the instructions from the **Freezing Custom Models in Python** section of the :doc:`Converting a TensorFlow Model <../[legacy]-convert-tensorflow>` guide. + + Run the following commands: + + .. code-block:: py + :force: + + import tensorflow as tf + from tensorflow.python.framework import graph_io + + sess = tf.compat.v1.Session() + saver = tf.compat.v1.train.import_meta_graph("/path/to/model/model.meta") + saver.restore(sess, tf.train.latest_checkpoint('/path/to/model/')) + + frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, \ + ["rating/BiasAdd"]) + graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False) + + where ``rating/BiasAdd`` is an output node. + +3. Convert the model to the OpenVINO format. If you look at your frozen model, you can see that it has one input that is split into four ``ResourceGather`` layers. (Click image to zoom in.) + + .. image:: ../../../../../../assets/images/NCF_start.svg + + However, as the model conversion API does not support such data feeding, you should skip it. Cut + the edges incoming in ``ResourceGather`` port 1: + + .. code-block:: sh + + mo --input_model inference_graph.pb \ + --input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \ + 1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup \ + --input_shape [256],[256],[256],[256] \ + --output_dir + + In the ``input_shape`` parameter, 256 specifies the ``batch_size`` for your model. + +Alternatively, you can do steps 2 and 3 in one command line: + +.. code-block:: sh + + mo --input_meta_graph /path/to/model/model.meta \ + --input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \ + 1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup \ + --input_shape [256],[256],[256],[256] --output rating/BiasAdd \ + --output_dir + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection.rst new file mode 100644 index 00000000000000..ad321a4abb3cda --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection.rst @@ -0,0 +1,184 @@ +Converting TensorFlow Object Detection API Models +================================================= + + +.. meta:: + :description: Learn how to convert Object Detection + API Models from TensorFlow to the OpenVINO Intermediate + Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +* Starting with the 2022.1 release, model conversion API can convert the TensorFlow Object Detection API Faster and Mask RCNNs topologies differently. By default, model conversion adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the preprocessing applied to the input image (refer to the :doc:`Proposal <../../../../../openvino-ir-format/operation-sets/operation-specs/detection/proposal-4>` operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model conversion API can generate IR for such models and insert operation :doc:`DetectionOutput <../../../../../openvino-ir-format/operation-sets/operation-specs/detection/detectionoutput-1>` instead of ``Proposal``. The `DetectionOutput` operation does not require additional model input "image_info". Moreover, for some models the produced inference results are closer to the original TensorFlow model. In order to trigger new behavior, the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal". +* Starting with the 2021.1 release, model conversion API converts the TensorFlow Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the OpenVINO Runtime using dedicated reshape API. Refer to the :doc:`Using Shape Inference <../../../../../../openvino-workflow/running-inference/changing-input-shape>` guide for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. +* To generate IRs for TF 1 SSD topologies, model conversion API creates a number of ``PriorBoxClustered`` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the OpenVINO Runtime using dedicated API. The reshaping is supported for all SSD topologies except FPNs, which contain hardcoded shapes for some operations preventing from changing topology input shape. + +Converting a Model +################## + +You can download TensorFlow Object Detection API models from the `TensorFlow 1 Detection Model Zoo `__ or `TensorFlow 2 Detection Model Zoo `__. + +.. note:: + + Before converting, make sure you have configured model conversion API. For configuration steps, refer to the :doc:`Convert a Model <../../../legacy-conversion-api>`. + +To convert a TensorFlow Object Detection API model, run the ``mo`` command with the following required parameters: + +* ``input_model `` - File with a pretrained model (binary or text .pb file after freezing) OR ``saved_model_dir `` for the TensorFlow 2 models +* ``transformations_config `` - A subgraph replacement configuration file with transformations description. For the models downloaded from the TensorFlow Object Detection API zoo, you can find the configuration files in the ``/openvino/tools/mo/front/tf`` directory. Use: + + * ``ssd_v2_support.json`` - for frozen SSD topologies from the models zoo version up to 1.13.X inclusively + * ``ssd_support_api_v.1.14.json`` - for SSD topologies trained using the TensorFlow Object Detection API version 1.14 up to 1.14.X inclusively + * ``ssd_support_api_v.1.15.json`` - for SSD topologies trained using the TensorFlow Object Detection API version 1.15 up to 2.0 + * ``ssd_support_api_v.2.0.json`` - for SSD topologies trained using the TensorFlow Object Detection API version 2.0 up to 2.3.X inclusively + * ``ssd_support_api_v.2.4.json`` - for SSD topologies trained using the TensorFlow Object Detection API version 2.4 or higher + * ``efficient_det_support_api_v.2.0.json`` - for EfficientDet topologies trained using the TensorFlow Object Detection API version 2.0 up to 2.3.X inclusively + * ``efficient_det_support_api_v.2.4.json`` - for EfficientDet topologies trained using the TensorFlow Object Detection API version 2.4 or higher + * ``faster_rcnn_support.json`` - for Faster R-CNN topologies from the TF 1.X models zoo trained with TensorFlow version up to 1.6.X inclusively + * ``faster_rcnn_support_api_v1.7.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 1.7.0 up to 1.9.X inclusively + * ``faster_rcnn_support_api_v1.10.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 1.10.0 up to 1.12.X inclusively + * ``faster_rcnn_support_api_v1.13.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 1.13.X + * ``faster_rcnn_support_api_v1.14.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 1.14.0 up to 1.14.X inclusively + * ``faster_rcnn_support_api_v1.15.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 1.15.0 up to 2.0 + * ``faster_rcnn_support_api_v2.0.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 2.0 up to 2.3.X inclusively + * ``faster_rcnn_support_api_v2.4.json`` - for Faster R-CNN topologies trained using the TensorFlow Object Detection API version 2.4 or higher + * ``mask_rcnn_support.json`` - for Mask R-CNN topologies from the TF 1.X models zoo trained with TensorFlow version 1.9.0 or lower. + * ``mask_rcnn_support_api_v1.7.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 1.7.0 up to 1.9.X inclusively + * ``mask_rcnn_support_api_v1.11.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 1.11.0 up to 1.12.X inclusively + * ``mask_rcnn_support_api_v1.13.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 1.13.0 up to 1.13.X inclusively + * ``mask_rcnn_support_api_v1.14.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 1.14.0 up to 1.14.X inclusively + * ``mask_rcnn_support_api_v1.15.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 1.15.0 up to 2.0 + * ``mask_rcnn_support_api_v2.0.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 2.0 up to 2.3.X inclusively + * ``mask_rcnn_support_api_v2.4.json`` - for Mask R-CNN topologies trained using the TensorFlow Object Detection API version 2.4 or higher + * ``rfcn_support.json`` - for RFCN topology from the models zoo trained with TensorFlow version up to 1.9.X inclusively + * ``rfcn_support_api_v1.10.json`` - for RFCN topology from the models zoo frozen with TensorFlow version 1.10.0 up to 1.12.X inclusively + * ``rfcn_support_api_v1.13.json`` - for RFCN topology from the models zoo frozen with TensorFlow version 1.13.X + * ``rfcn_support_api_v1.14.json`` - for RFCN topology from the models zoo frozen with TensorFlow version 1.14.0 or higher + +* ``tensorflow_object_detection_api_pipeline_config `` - A special configuration file that describes the topology hyper-parameters and structure of the TensorFlow Object Detection API model. For the models downloaded from the TensorFlow Object Detection API zoo, the configuration file is named ``pipeline.config``. If you plan to train a model yourself, you can find templates for these files in the `models repository `__. +* ``input_shape`` (optional) - A custom input image shape. For more information how the ``input_shape`` parameter is handled for the TensorFlow Object Detection API models, refer to the `Custom Input Shape <#Custom-Input-Shape>`__ guide. + +.. note:: + + The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``reverse_input_channels``. Otherwise, inference results may be incorrect. If you convert a TensorFlow Object Detection API model to use with the OpenVINO sample applications, you must specify the ``reverse_input_channels`` parameter. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <../../[legacy]-setting-input-shapes>` guide. + +Additionally to the mandatory parameters listed above you can use optional conversion parameters if needed. A full list of parameters is available in the :doc:`Converting a TensorFlow Model <../[legacy]-convert-tensorflow>` guide. + +For example, if you downloaded the pre-trained `SSD InceptionV2 topology `__ and extracted archive to the directory ``/tmp/ssd_inception_v2_coco_2018_01_28``, the sample command line to convert the model looks as follows: + +.. code-block:: sh + + mo --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --transformations_config front/tf/ssd_v2_support.json --tensorflow_object_detection_api_pipeline_config /tmp/ssd_inception_v2_coco_2018_01_28/pipeline.config --reverse_input_channels + + +OpenVINO™ Toolkit Samples and Open Model Zoo Demos +################################################## + +OpenVINO comes with a number of samples to demonstrate use of OpenVINO Runtime API. Additionally, +Open Model Zoo provides set of demo applications to show implementation of close to real life applications, +based on deep learning in various tasks, including Image Classification, Visual Object Detection, Text Recognition, +Speech Recognition, Natural Language Processing and others. Refer to the links below for more details. + +* :doc:`OpenVINO Samples <../../../../../../learn-openvino/openvino-samples>` +* :doc:`Open Model Zoo Demos <../../../../model-zoo>` + +.. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + +Feeding Input Images to the Samples +################################### + +There are several important notes about feeding input images to the samples: + +1. OpenVINO samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of preprocessing is defined in the pipeline configuration file in the section ``image_resizer``. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true"). + +2. TensorFlow implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like ``.jpg``) could give different results in the sample and TensorFlow. If it is necessary to compare accuracy between the TensorFlow and the OpenVINO, it is recommended to pass pre-resized input image in a non-compressed format (like ``.bmp``). + +3. If you want to infer the model with the OpenVINO samples, convert the model specifying the ``reverse_input_channels`` command line parameter. The samples load images in BGR channels order, while TensorFlow models were trained with images in RGB order. When the ``reverse_input_channels`` command line parameter is specified, model conversion API performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order. + +4. Read carefully the messages printed by model conversion API. They contain important instructions on how to prepare input data before running the inference and how to interpret the output. + +Custom Input Shape +################## + +Model conversion handles the command line parameter ``input_shape`` for TensorFlow Object Detection API models in a special way depending on the image resizer type defined in the ``pipeline.config`` file. TensorFlow Object Detection API generates different ``Preprocessor`` sub-graph based on the image resizer type. Model conversion API supports two types of image resizer: + +* ``fixed_shape_resizer`` --- *Stretches* input image to the specific height and width. The ``pipeline.config`` snippet below shows a ``fixed_shape_resizer`` sample definition: + + .. code-block:: sh + + image_resizer { + fixed_shape_resizer { + height: 300 + width: 300 + } + } + +* ``keep_aspect_ratio_resizer`` --- Resizes the input image *keeping aspect ratio* to satisfy the minimum and maximum size constraints. The ``pipeline.config`` snippet below shows a ``keep_aspect_ratio_resizer`` sample definition: + + .. code-block:: sh + + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 600 + max_dimension: 1024 + } + } + +If an additional parameter "pad_to_max_dimension" is equal to "true", then the resized image will be padded with 0s to the square image of size "max_dimension". + +Fixed Shape Resizer Replacement ++++++++++++++++++++++++++++++++ + +* If the ``input_shape`` command line parameter is not specified, model conversion generates an input operation with the height and width as defined in the ``pipeline.config``. + +* If the ``input_shape [1, H, W, 3]`` command line parameter is specified, model conversion sets the input operation height to ``H`` and width to ``W`` and convert the model. However, the conversion may fail because of the following reasons: + + * The model is not reshape-able, meaning that it's not possible to change the size of the model input image. For example, SSD FPN models have ``Reshape`` operations with hard-coded output shapes, but the input size to these ``Reshape`` instances depends on the input image size. In this case, model conversion API shows an error during the shape inference phase. Run model conversion with ``log_level DEBUG`` to see the inferred operations output shapes to see the mismatch. + * Custom input shape is too small. For example, if you specify ``input_shape [1,100,100,3]`` to convert a SSD Inception V2 model, one of convolution or pooling nodes decreases input tensor spatial dimensions to non-positive values. In this case, model conversion API shows error message like this: '[ ERROR ] Shape [ 1 -1 -1 256] is not fully defined for output X of "node_name".' + + +Keeping Aspect Ratio Resizer Replacement +++++++++++++++++++++++++++++++++++++++++ + +* If the ``input_shape`` command line parameter is not specified, model conversion API generates an input operation with both height and width equal to the value of parameter ``min_dimension`` in the ``keep_aspect_ratio_resizer``. + +* If the ``input_shape [1, H, W, 3]`` command line parameter is specified, model conversion API scales the specified input image height ``H`` and width ``W`` to satisfy the ``min_dimension`` and ``max_dimension`` constraints defined in the ``keep_aspect_ratio_resizer``. The following function calculates the input operation height and width: + + .. code-block:: py + :force: + + def calculate_shape_keeping_aspect_ratio(H: int, W: int, min_dimension: int, max_dimension: int): + ratio_min = min_dimension / min(H, W) + ratio_max = max_dimension / max(H, W) + ratio = min(ratio_min, ratio_max) + return int(round(H * ratio)), int(round(W * ratio)) + +The ``input_shape`` command line parameter should be specified only if the "pad_to_max_dimension" does not exist of is set to "false" in the ``keep_aspect_ratio_resizer``. + +Models with ``keep_aspect_ratio_resizer`` were trained to recognize object in real aspect ratio, in contrast with most of the classification topologies trained to recognize objects stretched vertically and horizontally as well. By default, topologies are converted with ``keep_aspect_ratio_resizer`` to consume a square input image. If the non-square image is provided as input, it is stretched without keeping aspect ratio that results to object detection quality decrease. + +.. note:: + + It is highly recommended to specify the ``input_shape`` command line parameter for the models with ``keep_aspect_ratio_resizer``, if the input image dimensions are known in advance. + +Model Conversion Process in Detail +################################## + +This section is intended for users who want to understand how model conversion API performs Object Detection API models conversion in details. The information in this section is also useful for users having complex models that are not converted with model conversion API out of the box. It is highly recommended to read the **Graph Transformation Extensions** section in the :doc:`[Legacy] Model Optimizer Extensibility <../../../legacy-model-optimizer-extensibility>` documentation first to understand sub-graph replacement concepts which are used here. + +It is also important to open the model in the `TensorBoard `__ to see the topology structure. Model conversion API can create an event file that can be then fed to the TensorBoard tool. Run model conversion, providing two command line parameters: + +* ``input_model `` --- Path to the frozen model. +* ``tensorboard_logdir`` --- Path to the directory where TensorBoard looks for the event files. + +Implementation of the transformations for Object Detection API models is located in the `file `__. Refer to the code in this file to understand the details of the conversion process. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-retina-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-retina-net.rst new file mode 100644 index 00000000000000..db2c6424367f58 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-retina-net.rst @@ -0,0 +1,31 @@ +Converting a TensorFlow RetinaNet Model +======================================= + + +.. meta:: + :description: Learn how to convert a RetinaNet model + from TensorFlow to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python ../../../../../../learn-openvino/interactive-tutorials-python <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This tutorial explains how to convert a RetinaNet model to the Intermediate Representation (IR). + +`Public RetinaNet model `__ does not contain pretrained TensorFlow weights. +To convert this model to the TensorFlow format, follow the `Reproduce Keras to TensorFlow Conversion tutorial `__. + +After converting the model to TensorFlow format, run the following command: + +.. code-block:: sh + + mo --input "input_1[1,1333,1333,3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --transformations_config front/tf/retinanet.json + + +Where ``transformations_config`` command-line parameter specifies the configuration json file containing model conversion hints for model conversion API. +The json file contains some parameters that need to be changed if you train the model yourself. It also contains information on how to match endpoints +to replace the subgraph nodes. After the model is converted to the OpenVINO IR format, the output nodes will be replaced with DetectionOutput layer. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-slim-library.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-slim-library.rst new file mode 100644 index 00000000000000..847d44fce813b1 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-slim-library.rst @@ -0,0 +1,117 @@ +Converting TensorFlow Slim Image Classification Model Library Models +==================================================================== + + +.. meta:: + :description: Learn how to convert a Slim Image + Classification model from TensorFlow to the OpenVINO + Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +`TensorFlow-Slim Image Classification Model Library `__ is a library to define, train and evaluate classification models in TensorFlow. The library contains Python scripts defining the classification topologies together with checkpoint files for several pre-trained classification topologies. To convert a TensorFlow-Slim library model, complete the following steps: + +1. Download the TensorFlow-Slim models `git repository `__. +2. Download the pre-trained model `checkpoint `__. +3. Export the inference graph. +4. Convert the model using model conversion API. + +The `Example of an Inception V1 Model Conversion <#example_of_an_inception_v1_model_conversion>`__ below illustrates the process of converting an Inception V1 Model. + +Example of an Inception V1 Model Conversion +########################################### + +This example demonstrates how to convert the model on Linux OSes, but it could be easily adopted for the Windows OSes. + +**Step 1**. Create a new directory to clone the TensorFlow-Slim git repository to: + +.. code-block:: sh + + mkdir tf_models + +.. code-block:: sh + + git clone https://github.com/tensorflow/models.git tf_models + + +**Step 2**. Download and unpack the `Inception V1 model checkpoint file `__: + +.. code-block:: sh + + wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz + +.. code-block:: sh + + tar xzvf inception_v1_2016_08_28.tar.gz + +**Step 3**. Export the inference graph --- the protobuf file (``.pb``) containing the architecture of the topology. This file *does not* contain the neural network weights and cannot be used for inference. + +.. code-block:: sh + + python3 tf_models/research/slim/export_inference_graph.py \ + --model_name inception_v1 \ + --output_file inception_v1_inference_graph.pb + + +Model conversion API comes with the summarize graph utility, which identifies graph input and output nodes. Run the utility to determine input/output nodes of the Inception V1 model: + +.. code-block:: sh + + python3 /openvino/tools/mo/utils/summarize_graph.py --input_model ./inception_v1_inference_graph.pb + +The output looks as follows: + +.. code-block:: sh + + 1 input(s) detected: + Name: input, type: float32, shape: (-1,224,224,3) + 1 output(s) detected: + InceptionV1/Logits/Predictions/Reshape_1 + +The tool finds one input node with name ``input``, type ``float32``, fixed image size ``(224,224,3)`` and undefined batch size ``-1``. The output node name is ``InceptionV1/Logits/Predictions/Reshape_1``. + +**Step 4**. Convert the model with the model conversion API: + +.. code-block:: sh + + mo --input_model ./inception_v1_inference_graph.pb --input_checkpoint ./inception_v1.ckpt -b 1 --mean_value [127.5,127.5,127.5] --scale 127.5 + + +The ``-b`` command line parameter is required because model conversion API cannot convert a model with undefined input size. + +For the information on why ``--mean_values`` and ``--scale`` command-line parameters are used, refer to the `Mean and Scale Values for TensorFlow-Slim Models <#Mean-and-Scale-Values-for-TensorFlow-Slim-Models>`__. + +Mean and Scale Values for TensorFlow-Slim Models +################################################# + +The TensorFlow-Slim Models were trained with normalized input data. There are several different normalization algorithms used in the Slim library. OpenVINO classification sample does not perform image pre-processing except resizing to the input layer size. It is necessary to pass mean and scale values to model conversion API so they are embedded into the generated IR in order to get correct classification results. + +The file `preprocessing_factory.py `__ contains a dictionary variable ``preprocessing_fn_map`` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values. + +The `inception_preprocessing.py `__ file defines the pre-processing function for the Inception models. The ``preprocess_for_eval`` function contains the following code: + +.. code-block:: py + :force: + + ... + import tensorflow as tf + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + ... + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + + +Firstly, the ``image`` is converted to data type `tf.float32` and the values in the tensor are scaled to the ``[0, 1]`` range using the `tf.image.convert_image_dtype `__ function. Then the ``0.5`` is subtracted from the image values and values multiplied by ``2.0``. The final image range of values is ``[-1, 1]``. + +OpenVINO classification sample reads an input image as a three-dimensional array of integer values from the range ``[0, 255]``. In order to scale them to ``[-1, 1]`` range, the mean value ``127.5`` for each image channel should be specified as well as a scale factor ``127.5``. + +Similarly, the mean/scale values can be determined for other Slim models. + +The exact mean/scale values are defined in the table with list of supported TensorFlow-Slim models at the :doc:`Converting a TensorFlow Model <../[legacy]-convert-tensorflow>` guide. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-wide-and-deep-family.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-wide-and-deep-family.rst new file mode 100644 index 00000000000000..d2f83fa12d8e67 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-wide-and-deep-family.rst @@ -0,0 +1,166 @@ +Converting TensorFlow Wide and Deep Family Models +================================================= + + +.. meta:: + :description: Learn how to convert Wide and Deep Family + models from TensorFlow to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +The Wide and Deep models is a combination of wide and deep parts for memorization and generalization of object features respectively. +These models can contain different types of object features such as numerical, categorical, sparse and sequential features. These feature types are specified +through Tensorflow tf.feature_column API. Table below presents what feature types are supported by the OpenVINO toolkit. + +.. list-table:: + :header-rows: 1 + + * - numeric + - (weighted) categorical + - categorical with hash + - bucketized + - sequential + - crossed + * - yes + - yes + - no + - yes + - yes + - no + + +.. note:: The categorical with hash and crossed features are currently unsupported since OpenVINO does not cover tensors of the `string` type and operations with them. + +Preparing an Example of Wide and Deep Model +########################################### + +**Step 1**. Clone the GitHub repository with TensorFlow models and move to the directory with an example of Wide and Deep model: + +.. code-block:: sh + + git clone https://github.com/tensorflow/models.git --branch r2.2.0; + cd official/r1/wide_deep + + +The Wide and Deep model is no longer in the master branch of the repository but is still available in the r2.2.0 branch. + + +**Step 2**. Train the model + +As the OpenVINO™ toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model +by changing the ``build_model_columns()`` function in `census_dataset.py` as follows: + +.. code-block:: py + :force: + + def build_model_columns(): + """Builds a set of wide and deep feature columns.""" + # Continuous variable columns + age = tf.feature_column.numeric_column('age') + education_num = tf.feature_column.numeric_column('education_num') + capital_gain = tf.feature_column.numeric_column('capital_gain') + capital_loss = tf.feature_column.numeric_column('capital_loss') + hours_per_week = tf.feature_column.numeric_column('hours_per_week') + education = tf.feature_column.categorical_column_with_vocabulary_list( + 'education', [ + 'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college', + 'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school', + '5th-6th', '10th', '1st-4th', 'Preschool', '12th']) + marital_status = tf.feature_column.categorical_column_with_vocabulary_list( + 'marital_status', [ + 'Married-civ-spouse', 'Divorced', 'Married-spouse-absent', + 'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed']) + relationship = tf.feature_column.categorical_column_with_vocabulary_list( + 'relationship', [ + 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', + 'Other-relative']) + workclass = tf.feature_column.categorical_column_with_vocabulary_list( + 'workclass', [ + 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov', + 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked']) + # To show an example of hashing: + #occupation = tf.feature_column.categorical_column_with_hash_bucket( + # 'occupation', hash_bucket_size=_HASH_BUCKET_SIZE) + # Transformations. + age_buckets = tf.feature_column.bucketized_column( + age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) + # Wide columns and deep columns. + base_columns = [ + education, marital_status, relationship, workclass, + age_buckets, + ] + crossed_columns = [] + wide_columns = base_columns + crossed_columns + deep_columns = [ + age, + education_num, + capital_gain, + capital_loss, + hours_per_week, + tf.feature_column.indicator_column(workclass), + tf.feature_column.indicator_column(education), + tf.feature_column.indicator_column(marital_status), + tf.feature_column.indicator_column(relationship), + # To show an example of embedding + ] + return wide_columns, deep_columns + +After that, start training with the following command: + +.. code-block:: sh + + python census_main.py + + +Converting the Wide and Deep Model to IR +######################################## + +Use the following command line to convert the saved model file with the checkpoint: + +.. code-block:: sh + + mo + --input_checkpoint checkpoint --input_meta_graph model.ckpt.meta + --input "IteratorGetNext:0[2], + IteratorGetNext:1[2], + IteratorGetNext:2[2], + IteratorGetNext:4[2], + IteratorGetNext:7[2], + linear/linear_model/linear_model/linear_model/education/to_sparse_input/indices:0[10,2]{i64}, + linear/linear_model/linear_model/linear_model/education/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + linear/linear_model/linear_model/linear_model/education/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/indices:0[10,2]{i64}, + linear/linear_model/linear_model/linear_model/marital_status/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + linear/linear_model/linear_model/linear_model/marital_status/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/indices:0[10,2]{i64}, + linear/linear_model/linear_model/linear_model/relationship/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + linear/linear_model/linear_model/linear_model/relationship/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/indices:0[10,2]{i64}, + linear/linear_model/linear_model/linear_model/workclass/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + linear/linear_model/linear_model/linear_model/workclass/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/indices:0[10,2]{i64}, + dnn/input_from_feature_columns/input_layer/education_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + dnn/input_from_feature_columns/input_layer/education_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/indices:0[10,2]{i64}, + dnn/input_from_feature_columns/input_layer/marital_status_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + dnn/input_from_feature_columns/input_layer/marital_status_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/indices:0[10,2]{i64}, + dnn/input_from_feature_columns/input_layer/relationship_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50], + dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/indices:0[10,2]{i64}, + dnn/input_from_feature_columns/input_layer/workclass_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64}, + dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2,50]" + --output head/predictions/probabilities + + +The model contains operations unsupported by the OpenVINO™ toolkit such as ``IteratorGetNext`` and ``LookupTableFindV2``, so the Model Optimizer must prune these nodes. +The pruning is specified through `--input` option. The prunings for ``IteratorGetNext:*`` nodes correspond to numeric features. +The pruning for each categorical feature consists of three prunings for the following nodes: ``*/to_sparse_input/indices:0``, ``*/hash_table_Lookup/LookupTableFindV2:0``, and ``*/to_sparse_input/dense_shape:0``. + +The above command line generates an OpenVINO model for a batch of two objects, with the total number of actual categorical feature values equal to 10 and maximum size of a sparse categorical feature for one object equal to 50. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-xlnet.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-xlnet.rst new file mode 100644 index 00000000000000..853614de85feed --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-xlnet.rst @@ -0,0 +1,208 @@ +Converting a TensorFlow XLNet Model +=================================== + + +.. meta:: + :description: Learn how to convert an XLNet model from + TensorFlow to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +Pretrained models for XLNet (Bidirectional Encoder Representations from Transformers) are +`publicly available `__. + +Supported Models +################ + +The following models from the pretrained `XLNet model list `__ are currently supported: + +* `XLNet-Large, Cased `__ +* `XLNet-Base, Cased `__ + +Downloading the Pretrained Base XLNet Model +########################################### + +Download and unzip an archive with the `XLNet-Base, Cased `__. + +After the archive is unzipped, the directory ``cased_L-12_H-768_A-12`` is created and contains the following files: + +* TensorFlow checkpoint (``xlnet_model.ckpt``), containing the pretrained weights (which is actually 3 files) +* sentence piece model (``spiece.model``) used for (de)tokenization +* config file (``xlnet_config.json``), which specifies the hyperparameters of the model + +To get pb-file from the archive contents, you need to do the following. + +1. Run commands + + .. code-block:: sh + + cd ~ + mkdir XLNet-Base + cd XLNet-Base + git clone https://github.com/zihangdai/xlnet + wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip + unzip cased_L-12_H-768_A-12.zip + mkdir try_save + + +2. Save and run the following Python script in `~/XLNet-Base/xlnet`: + + .. note:: The original model repository has been tested with TensorFlow 1.13.1 under Python2. + + .. code-block:: py + :force: + + from collections import namedtuple + + import tensorflow as tf + from tensorflow.python.framework import graph_io + + import model_utils + import xlnet + + LENGTHS = 50 + BATCH = 1 + OUTPUT_DIR = '~/XLNet-Base/try_save/' + INIT_CKPT_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_model.ckpt' + XLNET_CONFIG_PATH = '~/XLNet-Base/xlnet_cased_L-12_H-768_A-12/xlnet_config.json' + + FLags = namedtuple('FLags', 'use_tpu init_checkpoint') + FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH) + + xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH) + run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,) + + + sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids') + sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids') + sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask') + + with tf.compat.v1.Session() as sess: + xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, + input_ids=sentence_features_input_idx, + seg_ids=sentence_features_segment_ids, + input_mask=sentence_features_input_mask) + + sess.run(tf.compat.v1.global_variables_initializer()) + model_utils.init_from_checkpoint(FLAGS, True) + + # Save the variables to disk. + saver = tf.compat.v1.train.Saver() + + # Saving checkpoint + save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt") + + # Freezing model + outputs = ['model/transformer/dropout_2/Identity'] + graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs) + + # Saving non-frozen and frozen model to pb + graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False) + graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb', + as_text=False) + + # Write to tensorboard + with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer: + writer.flush() + +Downloading the Pretrained Large XLNet Model +############################################ + +Download and unzip an archive with the `XLNet-Base, Cased `__. + +After unzipping the archive, the directory ``cased_L-12_H-1024_A-16`` is created and contains the following files: + +* TensorFlow checkpoint (``xlnet_model.ckpt``) containing the pretrained weights (which is actually 3 files) +* sentence piece model (``spiece.model``) used for (de)tokenization +* config file (``xlnet_config.json``) which specifies the hyperparameters of the model + +To get ``pb-file`` from the archive contents, follow the instructions below: + +1. Run commands + + .. code-block:: sh + + cd ~ + mkdir XLNet-Large + cd XLNet-Large + git clone https://github.com/zihangdai/xlnet + wget https://storage.googleapis.com/xlnet/released_models/cased_L-24_H-1024_A-16.zip + unzip cased_L-24_H-1024_A-16.zip + mkdir try_save + + +2. Save and run the following Python script in ``~/XLNet-Large/xlnet``: + + .. code-block:: py + :force: + + from collections import namedtuple + + import tensorflow as tf + from tensorflow.python.framework import graph_io + + import model_utils + import xlnet + + LENGTHS = 50 + BATCH = 1 + OUTPUT_DIR = '~/XLNet-Large/try_save' + INIT_CKPT_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_model.ckpt' + XLNET_CONFIG_PATH = '~/XLNet-Large/cased_L-24_H-1024_A-16/xlnet_config.json' + + FLags = namedtuple('FLags', 'use_tpu init_checkpoint') + FLAGS = FLags(use_tpu=False, init_checkpoint=INIT_CKPT_PATH) + + xlnet_config = xlnet.XLNetConfig(json_path=XLNET_CONFIG_PATH) + run_config = xlnet.RunConfig(is_training=False, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1,) + + + sentence_features_input_idx = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='input_ids') + sentence_features_segment_ids = tf.compat.v1.placeholder(tf.int32, shape=[LENGTHS, BATCH], name='seg_ids') + sentence_features_input_mask = tf.compat.v1.placeholder(tf.float32, shape=[LENGTHS, BATCH], name='input_mask') + + with tf.compat.v1.Session() as sess: + xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, + input_ids=sentence_features_input_idx, + seg_ids=sentence_features_segment_ids, + input_mask=sentence_features_input_mask) + + sess.run(tf.compat.v1.global_variables_initializer()) + model_utils.init_from_checkpoint(FLAGS, True) + + # Save the variables to disk. + saver = tf.compat.v1.train.Saver() + + # Saving checkpoint + save_path = saver.save(sess, OUTPUT_DIR + "model.ckpt") + + # Freezing model + outputs = ['model/transformer/dropout_2/Identity'] + graph_def_freezed = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs) + + # Saving non-frozen and frozen model to pb + graph_io.write_graph(sess.graph.as_graph_def(), OUTPUT_DIR, 'model.pb', as_text=False) + graph_io.write_graph(graph_def_freezed,OUTPUT_DIR, 'model_frozen.pb', + as_text=False) + + # Write to tensorboard + with tf.compat.v1.summary.FileWriter(logdir=OUTPUT_DIR, graph_def=graph_def_freezed) as writer: + writer.flush() + + +The script should save into ``~/XLNet-Large/xlnet``. + +Converting a frozen TensorFlow XLNet Model to IR +################################################# + +To generate the XLNet Intermediate Representation (IR) of the model, run model conversion with the following parameters: + +.. code-block:: sh + + mo --input_model path-to-model/model_frozen.pb \ + --input "input_mask[50,1],input_ids[50,1],seg_ids[50,1]" + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst new file mode 100644 index 00000000000000..e7e8072b1bda05 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst @@ -0,0 +1,322 @@ +Converting TensorFlow YOLO Models +================================= + + +.. meta:: + :description: Learn how to convert YOLO models from + TensorFlow to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. + +This document explains how to convert real-time object detection YOLOv1, YOLOv2, YOLOv3 and YOLOv4 public models to the Intermediate Representation (IR). All YOLO models are originally implemented in the DarkNet framework and consist of two files: + +* The ``.cfg`` file with model configurations +* The ``.weights`` file with model weights + +Depending on a YOLO model version, the ``convert_model()`` method converts it differently: + +- YOLOv4 must be first converted from Keras to TensorFlow 2. +- YOLOv3 has several implementations. This tutorial uses a TensorFlow implementation of YOLOv3 model, which can be directly converted to an IR. +- YOLOv1 and YOLOv2 models must be first converted to TensorFlow using DarkFlow. + +Converting a YOLOv4 Model to IR +############################### + +This section explains how to convert the YOLOv4 Keras model from the `repository `__ to an IR. To convert the YOLOv4 model, follow the instructions below: + +1. Download YOLOv4 weights and associated with it cfg file: + + - for YOLOv4 ( `weights `__ / `config file `__ ) + - for YOLOv4-tiny ( `weights `__ / `config file `__ ) + +2. Clone the repository with the YOLOv4 model: + + .. code-block:: sh + + git clone https://github.com/david8862/keras-YOLOv3-model-set + + +3. Convert the model to the TensorFlow 2 format: + + - for YOLOv4: + + .. code-block:: sh + + python keras-YOLOv3-model-set/tools/model_converter/convert.py /yolov4.cfg /yolov4.weights + + + - for YOLOv4-tiny: + + .. code-block:: sh + + python keras-YOLOv3-model-set/tools/model_converter/convert.py /yolov4-tiny.cfg /yolov4-tiny.weights + + +4. Run model conversion from the TensorFlow 2 to an IR format: + + .. note:: + + Before you run the conversion, make sure you have installed all the model conversion API dependencies for TensorFlow 2. + + If you get errors, you may need to add the additional step to divide the input by 255: + + .. code-block:: sh + + --scale_values=image_input[255] + + + .. code-block:: sh + + mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4 + + +Converting YOLOv3 Model to the OpenVINO format +############################################## + +There are several public versions of TensorFlow YOLOv3 model implementation available on GitHub. This section explains how to convert YOLOv3 model from +the `repository `__ (commit ed60b90) to an IR , but the process is similar for other versions of TensorFlow YOLOv3 model. + +Overview of YOLOv3 Model Architecture ++++++++++++++++++++++++++++++++++++++ + +Originally, YOLOv3 model includes feature extractor called ``Darknet-53`` with three branches at the end that make detections at three different scales. These branches must end with the YOLO ``Region`` layer. + +``Region`` layer was first introduced in the DarkNet framework. Other frameworks, including TensorFlow, do not have the ``Region`` implemented as a single layer, so every author of public YOLOv3 model creates it using simple layers. This badly affects performance. For this reason, the main idea of YOLOv3 model conversion to IR is to cut off these custom ``Region`` -like parts of the model and complete the model with the ``Region`` layers where required. + +Dumping a YOLOv3 TensorFlow Model ++++++++++++++++++++++++++++++++++ + +To dump TensorFlow model out of `GitHub repository `__ (commit ed60b90), follow the instructions below: + +1. Clone the repository: + + .. code-block:: sh + + git clone https://github.com/mystic123/tensorflow-yolo-v3.git + cd tensorflow-yolo-v3 + + +2. (Optional) Checkout to the commit that the conversion was tested on: + + .. code-block:: sh + + git checkout ed60b90 + + +3. Download `coco.names `__ file from the DarkNet website **OR** use labels that fit your task. +4. Download the `yolov3.weights `__ (for the YOLOv3 model) or `yolov3-tiny.weights `__ (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure. +5. Install PIL, which is used by the conversion script in the repo: + + .. code-block:: sh + + pip install pillow + + +6. Run a converter: + + .. note:: This converter works with TensorFlow 1.x and numpy 1.19 or lower. + + + - For YOLO-v3: + + .. code-block:: sh + + python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights + + + - For YOLOv3-tiny: + + .. code-block:: sh + + python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3-tiny.weights --tiny + + + At this step, you may receive a warning like ``WARNING:tensorflow:Entity <...> could not be transformed and will be executed as-is.``. To work around this issue, switch to gast 0.2.2 with the following command: + + .. code-block:: sh + + pip3 install --user gast==0.2.2 + + +If you have YOLOv3 weights trained for an input image with the size different from 416 (320, 608 or your own), provide the ``--size`` key with the size of your image specified while running the converter. For example, run the following command for an image with size 608: + +.. code-block:: sh + + python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3_608.weights --size 608 + + +Converting a YOLOv3 TensorFlow Model to the OpenVINO format ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To solve the problems explained in the `YOLOv3 architecture overview <#overview-of-yolov3-model-architecture>`__ section, use the ``yolo_v3.json`` or ``yolo_v3_tiny.json`` (depending on a model) configuration file with custom operations located in the ``/tools/model_optimizer/extensions/front/tf`` repository. + +It consists of several attributes: + +.. code-block:: sh + + [ + { + "id": "TFYOLOV3", + "match_kind": "general", + "custom_attributes": { + "classes": 80, + "anchors": [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326], + "coords": 4, + "num": 9, + "masks":[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + "entry_points": ["detector/yolo-v3/Reshape", "detector/yolo-v3/Reshape_4", "detector/yolo-v3/Reshape_8"] + } + } + ] + + +where: + +- ``id`` and ``match_kind`` are parameters that you cannot change. +- ``custom_attributes`` is a parameter that stores all the YOLOv3 specific attributes: + + - ``classes``, ``coords``, ``num``, and ``masks`` are attributes that you should copy from the configuration file that was used for model training. If you used DarkNet officially shared weights, you can use ``yolov3.cfg`` or ``yolov3-tiny.cfg`` configuration file from `GitHub repository `__. Replace the default values in ``custom_attributes`` with the parameters that follow the ``[yolo]`` titles in the configuration file. + - ``anchors`` is an optional parameter that is not used while inference of the model, but it used in a demo to parse ``Region`` layer output + - ``entry_points`` is a node name list to cut off the model and append the ``Region`` layer with custom attributes specified above. + + +To generate an IR of the YOLOv3 TensorFlow model, run: + +.. code-block:: sh + + mo \ + --input_model /path/to/yolo_v3.pb \ + --transformations_config front/tf/yolo_v3.json \ + --batch 1 \ + --output_dir + + +To generate an IR of the YOLOv3-tiny TensorFlow model, run: + +.. code-block:: sh + + mo \ + --input_model /path/to/yolo_v3_tiny.pb \ + --transformations_config front/tf/yolo_v3_tiny.json \ + --batch 1 \ + --output_dir + + +where: + +* ``batch`` defines shape of model input. In the example, ``batch`` is equal to 1, but you can also specify other integers larger than 1. +* ``transformations_config`` adds missing ``Region`` layers to the model. In the IR, the ``Region`` layer has name ``RegionYolo``. + +.. note:: + + The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <../../[legacy]-setting-input-shapes>` guide. + + +OpenVINO toolkit provides a demo that uses YOLOv3 model. Refer to the `Object Detection C++ Demo `__ for more information. + +Converting YOLOv1 and YOLOv2 Models to the IR +############################################# + +Before converting, choose a YOLOv1 or YOLOv2 model version that best suits your task. Download model configuration file and corresponding weight file: + +* From `DarkFlow repository `__ : configuration files are stored in the ``cfg`` directory, links to weight files are given in the ``README.md`` file. The files from this repository are adapted for conversion to TensorFlow using DarkFlow. +* From DarkNet website and repository: configuration files are stored in the ``cfg`` directory of the `repository `__, links to weight files are given on the `YOLOv1 `__ and `YOLOv2 `__ websites. + +To convert DarkNet YOLOv1 and YOLOv2 models to the OpenVINO format, follow these steps: + +1. `Install DarkFlow <#installing-darkflow>`__ +2. `Convert DarkNet YOLOv1 or YOLOv2 model to TensorFlow <#converting-a-darknet-yolov1-or-yolov2-model-to-tensorflow>`__ using DarkFlow +3. `Convert TensorFlow YOLOv1 or YOLOv2 model to IR <#converting-a-tensorflow-yolov1-or-yolov2-model-to-the-ir>`__ + + +Installing DarkFlow ++++++++++++++++++++++ + +You need DarkFlow to convert YOLOv1 and YOLOv2 models to TensorFlow. To install DarkFlow: + +1. Install DarkFlow `required dependencies `__. +2. Clone DarkFlow git repository: + + .. code-block:: sh + + git clone https://github.com/thtrieu/darkflow.git + + +3. Go to the root directory of the cloned repository: + + .. code-block:: sh + + cd darkflow + + +4. Install DarkFlow, using the instructions from the ``README.md`` file in the `DarkFlow repository `__. + + +Converting a DarkNet YOLOv1 or YOLOv2 Model to TensorFlow ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To convert YOLOv1 or YOLOv2 model to TensorFlow, go to the root directory of the cloned DarkFlow repository, place the previously downloaded \*.cfg and \*.weights files in the current directory and run the following command: + +- For YOLOv1: + + .. code-block:: sh + + python3 flow --model yolov1.cfg --load yolov1.weights --savepb + + +- For YOLOv2 with VOC dataset ``--labels`` argument should be specified and additional changes in the original exporting script are required. In the `file `__ change line 121 from ``self.offset = 16`` to ``self.offset = 20``. Then run: + + .. code-block:: sh + + python3 flow --model yolov2-voc.cfg --load yolov2-voc.weights --labels voc-labels.txt --savepb + + +VOC labels can be found on the following `link `__ + +General conversion command is: + +.. code-block:: sh + + python3 flow --model /.cfg --load /.weights --labels --savepb + + +For YOLOv1, the ``--labels`` argument can be skipped. If the model was successfully converted, you can find the ``.meta`` and ``.pb`` files. +in ``built_graph`` subdirectory of the cloned DarkFlow repository. + +File ``.pb`` is a TensorFlow representation of the YOLO model. + +Converting a TensorFlow YOLOv1 or YOLOv2 Model to the IR +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Converted TensorFlow YOLO model is missing ``Region`` layer and its parameters. Original YOLO ``Region`` layer parameters are stored in the configuration ``/.cfg`` file under the ``[region]`` title. + +To recreate the original model structure, use the corresponding yolo ``.json`` configuration file with custom operations and ``Region`` layer parameters when converting the model to the IR. This file is located in the ``/tools/model_optimizer/extensions/front/tf`` directory. + +If chosen model has specific values of these parameters, create another configuration file with custom operations and use it for conversion. + +To generate the IR of the YOLOv1 model, provide TensorFlow YOLOv1 or YOLOv2 model to model conversion API with the following parameters: + +.. code-block:: sh + + mo + --input_model /.pb \ + --batch 1 \ + --scale 255 \ + --transformations_config front/tf/.json + + +where: + +* ``batch`` defines shape of model input. In the example, ``batch`` is equal to 1, but you can also specify other integers larger than 1. +* ``scale`` specifies scale factor that input values will be divided by. The model was trained with input values in the range ``[0,1]``. OpenVINO toolkit samples read input images as values in ``[0,255]`` range, so the scale 255 must be applied. +* ``transformations_config`` adds missing ``Region`` layers to the model. In the IR, the ``Region`` layer has name ``RegionYolo``. For other applicable parameters, refer to the :doc:`Convert Model from TensorFlow <../[legacy]-convert-tensorflow>` guide. + +.. note:: + + The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the ``RGB<->BGR`` conversion specifying the command-line parameter: ``reverse_input_channels``. Otherwise, inference results may be incorrect. For more information about the parameter, refer to the **When to Reverse Input Channels** section of the :doc:`Converting a Model to Intermediate Representation (IR) <../../[legacy]-setting-input-shapes>` guide. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-onnx.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-onnx.rst new file mode 100644 index 00000000000000..a864a037d488b7 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-onnx.rst @@ -0,0 +1,70 @@ +[LEGACY] Converting an ONNX Model +============================================= + +.. meta:: + :description: Learn how to convert a model from the + ONNX format to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Converting an ONNX Model <../../../../../openvino-workflow/model-preparation/convert-model-onnx>` article. + + +.. note:: ONNX models are supported via FrontEnd API. You may skip conversion to IR and read models directly by OpenVINO runtime API. Refer to the :doc:`inference example <../../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` for more details. Using ``convert_model`` is still necessary in more complex cases, such as new custom inputs/outputs in model pruning, adding pre-processing, or using Python conversion extensions. + +Converting an ONNX Model +######################## + +The model conversion process assumes you have an ONNX model that was directly downloaded from a public repository or converted from any framework that supports exporting to the ONNX format. + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + To convert an ONNX model, run ``convert_model()`` method with the path to the ``.onnx`` file: + + .. code-block:: py + :force: + + import openvino + from openvino.tools.mo import convert_model + + core = openvino.Core() + ov_model = convert_model(".onnx") + compiled_model = core.compile_model(ov_model, "AUTO") + + .. important:: + + The ``convert_model()`` method returns ``ov.Model`` that you can optimize, compile, or save to a file for subsequent use. + + .. tab-item:: CLI + :sync: cli + + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + + .. code-block:: sh + + mo --input_model .onnx + + +There are no ONNX-specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see the *General Conversion Parameters* section in the :doc:`Converting a Model to Intermediate Representation (IR) <../[legacy]-setting-input-shapes>` guide. + +Supported ONNX Layers +##################### + +For the list of supported standard layers, refer to the :doc:`Supported Operations <../../../../../about-openvino/compatibility-and-support/supported-operations>` page. + +Additional Resources +#################### + +See the :doc:`Model Conversion Tutorials <[legacy]-conversion-tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific ONNX models. Here are some examples: + +* :doc:`Convert ONNX Faster R-CNN Model <[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn>` +* :doc:`Convert ONNX GPT-2 Model <[legacy]-conversion-tutorials/convert-onnx-gpt-2>` +* :doc:`Convert ONNX Mask R-CNN Model <[legacy]-conversion-tutorials/convert-onnx-mask-r-cnn>` + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-paddle.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-paddle.rst new file mode 100644 index 00000000000000..041a14f93547b6 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-paddle.rst @@ -0,0 +1,139 @@ +[LEGACY] Converting a PaddlePaddle Model +====================================================== + + +.. meta:: + :description: Learn how to convert a model from the + PaddlePaddle format to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Converting a PaddlePaddle Model <../../../../../openvino-workflow/model-preparation/convert-model-paddle>` article. + + +This page provides general instructions on how to convert a model from a PaddlePaddle format to the OpenVINO IR format using Model Optimizer. The instructions are different depending on PaddlePaddle model format. + +.. note:: PaddlePaddle models are supported via FrontEnd API. You may skip conversion to IR and read models directly by OpenVINO runtime API. Refer to the :doc:`inference example <../../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` for more details. Using ``convert_model`` is still necessary in more complex cases, such as new custom inputs/outputs in model pruning, adding pre-processing, or using Python conversion extensions. + +Converting PaddlePaddle Model Inference Format +############################################## + +PaddlePaddle inference model includes ``.pdmodel`` (storing model structure) and ``.pdiparams`` (storing model weight). For how to export PaddlePaddle inference model, please refer to the `Exporting PaddlePaddle Inference Model `__ Chinese guide. + + +To convert a PaddlePaddle model, use the ``mo`` script and specify the path to the input ``.pdmodel`` model file: + +.. code-block:: sh + + mo --input_model .pdmodel + +**For example**, this command converts a yolo v3 PaddlePaddle network to OpenVINO IR network: + +.. code-block:: sh + + mo --input_model=yolov3.pdmodel --input=image,im_shape,scale_factor --input_shape=[1,3,608,608],[1,2],[1,2] --reverse_input_channels --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 + +Converting PaddlePaddle Model From Memory Using Python API +########################################################## + +Model conversion API supports passing the following PaddlePaddle models directly from memory: + +* ``paddle.hapi.model.Model`` +* ``paddle.fluid.dygraph.layers.Layer`` +* ``paddle.fluid.executor.Executor`` + +When you convert certain PaddlePaddle models, you may need to set the ``example_input`` or ``example_output`` parameters first. Below you will find examples that show how to convert aforementioned model formats using the parameters. + +* ``paddle.hapi.model.Model`` + + .. code-block:: py + :force: + + import paddle + from openvino.tools.mo import convert_model + + # create a paddle.hapi.model.Model format model + resnet50 = paddle.vision.models.resnet50() + x = paddle.static.InputSpec([1,3,224,224], 'float32', 'x') + y = paddle.static.InputSpec([1,1000], 'float32', 'y') + + model = paddle.Model(resnet50, x, y) + + # convert to OpenVINO IR format + ov_model = convert_model(model) + + # optional: serialize OpenVINO IR to *.xml & *.bin + from openvino.runtime import serialize + serialize(ov_model, "ov_model.xml", "ov_model.bin") + +* ``paddle.fluid.dygraph.layers.Layer`` + + ``example_input`` is required while ``example_output`` is optional, and accept the following formats: + + ``list`` with tensor(``paddle.Tensor``) or InputSpec(``paddle.static.input.InputSpec``) + + .. code-block:: py + :force: + + import paddle + from openvino.tools.mo import convert_model + + # create a paddle.fluid.dygraph.layers.Layer format model + model = paddle.vision.models.resnet50() + x = paddle.rand([1,3,224,224]) + + # convert to OpenVINO IR format + ov_model = convert_model(model, example_input=[x]) + +* ``paddle.fluid.executor.Executor`` + + ``example_input`` and ``example_output`` are required, and accept the following formats: + + ``list`` or ``tuple`` with variable(``paddle.static.data``) + + .. code-block:: py + :force: + + import paddle + from openvino.tools.mo import convert_model + + paddle.enable_static() + + # create a paddle.fluid.executor.Executor format model + x = paddle.static.data(name="x", shape=[1,3,224]) + y = paddle.static.data(name="y", shape=[1,3,224]) + relu = paddle.nn.ReLU() + sigmoid = paddle.nn.Sigmoid() + y = sigmoid(relu(x)) + + exe = paddle.static.Executor(paddle.CPUPlace()) + exe.run(paddle.static.default_startup_program()) + + # convert to OpenVINO IR format + ov_model = convert_model(exe, example_input=[x], example_output=[y]) + + +.. important:: + + The ``convert_model()`` method returns ``ov.Model`` that you can optimize, compile, or save to a file for subsequent use. + + +Supported PaddlePaddle Layers +############################# + +For the list of supported standard layers, refer to the :doc:`Supported Operations <../../../../../about-openvino/compatibility-and-support/supported-operations>` page. + +Frequently Asked Questions (FAQ) +################################ + +The model conversion API displays explanatory messages for typographical errors, incorrectly used options, or other issues. They describe the potential cause of the problem and give a link to the :doc:`Model Optimizer FAQ <../[legacy]-model-optimizer-faq>`, which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in :doc:`Convert a Model <../../legacy-conversion-api>` to help you understand what went wrong. + +Additional Resources +#################### + +See the :doc:`Model Conversion Tutorials <[legacy]-conversion-tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific PaddlePaddle models. + + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-pytorch.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-pytorch.rst new file mode 100644 index 00000000000000..2ab66a49cd3546 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-pytorch.rst @@ -0,0 +1,111 @@ +[LEGACY] Converting a PyTorch Model +============================================ + + +.. meta:: + :description: Learn how to convert a model from the + PyTorch format to the OpenVINO Intermediate Representation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Converting a PyTorch Model <../../../../../openvino-workflow/model-preparation/convert-model-pytorch>` article. + +This page provides instructions on how to convert a model from the PyTorch format to the OpenVINO IR format. + +The conversion is a required step to run inference using OpenVINO API. +It is not required if you choose to work with OpenVINO under the PyTorch framework, +using its :doc:`torch.compile feature <../../../../../openvino-workflow/torch-compile>`. + +Converting a PyTorch model with PyTorch Frontend +############################################################### + +To convert a PyTorch model to the OpenVINO IR format, use the OVC API (superseding the previously used tool, MO). To do so, use the ``convert_model()`` method, like so: + + +.. code-block:: py + :force: + + import torchvision + import torch + from openvino.tools.mo import convert_model + + model = torchvision.models.resnet50(weights='DEFAULT') + ov_model = convert_model(model) + +Following PyTorch model formats are supported: + +* ``torch.nn.Module`` +* ``torch.jit.ScriptModule`` +* ``torch.jit.ScriptFunction`` + +Converting certain PyTorch models may require model tracing, which needs the ``example_input`` +parameter to be set, for example: + +.. code-block:: py + :force: + + import torchvision + import torch + from openvino.tools.mo import convert_model + + model = torchvision.models.resnet50(weights='DEFAULT') + ov_model = convert_model(model, example_input=torch.randn(1, 3, 100, 100)) + +``example_input`` accepts the following formats: + +* ``openvino.runtime.Tensor`` +* ``torch.Tensor`` +* ``np.ndarray`` +* ``list`` or ``tuple`` with tensors (``openvino.runtime.Tensor`` / ``torch.Tensor`` / ``np.ndarray``) +* ``dictionary`` where key is the input name, value is the tensor (``openvino.runtime.Tensor`` / ``torch.Tensor`` / ``np.ndarray``) + +Sometimes ``convert_model`` will produce inputs of the model with dynamic rank or dynamic type. +Such model may not be supported by the hardware chosen for inference. To avoid this issue, +use the ``input`` argument of ``convert_model``. For more information, refer to :doc:`Convert Models Represented as Python Objects <../[legacy]-convert-models-as-python-objects>`. + +.. important:: + + The ``convert_model()`` method returns ``ov.Model`` that you can optimize, compile, or save to a file for subsequent use. + +Exporting a PyTorch Model to ONNX Format +######################################## + +It is also possible to export a PyTorch model to ONNX and then convert it to OpenVINO IR. To convert and deploy a PyTorch model this way, follow these steps: + +1. `Export a PyTorch model to ONNX <#exporting-a-pytorch-model-to-onnx-format>`__. +2. :doc:`Convert an ONNX model <[legacy]-convert-onnx>` to produce an optimized :doc:`Intermediate Representation <../../../../openvino-ir-format/operation-sets>` of the model based on the trained network topology, weights, and biases values. + +PyTorch models are defined in Python. To export them, use the ``torch.onnx.export()`` method. The code to +evaluate or test the model is usually provided with its code and can be used for its initialization and export. +The export to ONNX is crucial for this process, but it is covered by PyTorch framework, therefore, It will not be covered here in detail. +For more information, refer to the `Exporting PyTorch models to ONNX format `__ guide. + +To export a PyTorch model, you need to obtain the model as an instance of ``torch.nn.Module`` class and call the ``export`` function. + +.. code-block:: py + :force: + + import torch + + # Instantiate your model. This is just a regular PyTorch model that will be exported in the following steps. + model = SomeModel() + # Evaluate the model to switch some operations from training mode to inference. + model.eval() + # Create dummy input for the model. It will be used to run the model inside export function. + dummy_input = torch.randn(1, 3, 224, 224) + # Call the export function + torch.onnx.export(model, (dummy_input, ), 'model.onnx') + + +Additional Resources +#################### + +See the :doc:`Model Conversion Tutorials <[legacy]-conversion-tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific PyTorch models. Here are some examples: + +* :doc:`Convert PyTorch BERT-NER Model <[legacy]-conversion-tutorials/convert-pytorch-bert-ner>` +* :doc:`Convert PyTorch RCAN Model <[legacy]-conversion-tutorials/convert-pytorch-rcan>` +* :doc:`Convert PyTorch YOLACT Model <[legacy]-conversion-tutorials/convert-pytorch-yolact>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow-lite.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow-lite.rst new file mode 100644 index 00000000000000..6d9256cdf09994 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow-lite.rst @@ -0,0 +1,37 @@ +[LEGACY] Converting a TensorFlow Lite Model +===================================================== + + +.. meta:: + :description: Learn how to convert a model from a + TensorFlow Lite format to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Converting a TensorFlow Lite Model <../../../../../openvino-workflow/model-preparation/convert-model-tensorflow-lite>` article. + +To convert a TensorFlow Lite model, use the ``mo`` script and specify the path to the input ``.tflite`` model file: + +.. code-block:: sh + + mo --input_model .tflite + +TensorFlow Lite models are supported via FrontEnd API. You may skip conversion to IR and read models directly by OpenVINO runtime API. Refer to the :doc:`inference example <../../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` for more details. Using ``convert_model`` is still necessary in more complex cases, such as new custom inputs/outputs in model pruning, adding pre-processing, or using Python conversion extensions. + +.. important:: + + The ``convert_model()`` method returns ``ov.Model`` that you can optimize, compile, or save to a file for subsequent use. + +Supported TensorFlow Lite Layers +################################### + +For the list of supported standard layers, refer to the :doc:`Supported Operations <../../../../../about-openvino/compatibility-and-support/supported-operations>` page. + +Supported TensorFlow Lite Models +################################### + +More than eighty percent of public TensorFlow Lite models are supported from open sources `TensorFlow Hub `__ and `MediaPipe `__. +Unsupported models usually have custom TensorFlow Lite operations. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst new file mode 100644 index 00000000000000..2bcb6fde9b833b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst @@ -0,0 +1,359 @@ +[LEGACY] Converting a TensorFlow Model +============================================ + +.. meta:: + :description: Learn how to convert a model from a + TensorFlow format to the OpenVINO Intermediate Representation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Converting a TensorFlow Model <../../../../../openvino-workflow/model-preparation/convert-model-tensorflow>` article. + + +.. note:: TensorFlow models are supported via FrontEnd API. You may skip conversion to IR and read models directly by OpenVINO runtime API. Refer to the :doc:`inference example <../../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` for more details. Using ``convert_model`` is still necessary in more complex cases, such as new custom inputs/outputs in model pruning, adding pre-processing, or using Python conversion extensions. + +The conversion instructions are different depending on whether your model was created with TensorFlow v1.X or TensorFlow v2.X. + +Converting TensorFlow 1 Models +############################### + +Converting Frozen Model Format ++++++++++++++++++++++++++++++++ + +To convert a TensorFlow model, use the ``*mo*`` script to simply convert a model with a path to the input model *.pb* file: + +.. code-block:: sh + + mo --input_model .pb + + +Converting Non-Frozen Model Formats ++++++++++++++++++++++++++++++++++++ + +There are three ways to store non-frozen TensorFlow models and convert them by model conversion API: + +1. **Checkpoint**. In this case, a model consists of two files: ``inference_graph.pb`` (or ``inference_graph.pbtxt``) and ``checkpoint_file.ckpt``. +If you do not have an inference graph file, refer to the `Freezing Custom Models in Python <#freezing-custom-models-in-python>`__ section. +To convert the model with the inference graph in ``.pb`` format, run the `mo` script with a path to the checkpoint file: + +.. code-block:: sh + + mo --input_model .pb --input_checkpoint + +To convert the model with the inference graph in ``.pbtxt`` format, run the ``mo`` script with a path to the checkpoint file: + +.. code-block:: sh + + mo --input_model .pbtxt --input_checkpoint --input_model_is_text + + +2. **MetaGraph**. In this case, a model consists of three or four files stored in the same directory: ``model_name.meta``, ``model_name.index``, +``model_name.data-00000-of-00001`` (the numbers may vary), and ``checkpoint`` (optional). +To convert such TensorFlow model, run the `mo` script with a path to the MetaGraph ``.meta`` file: + +.. code-block:: sh + + mo --input_meta_graph .meta + + +3. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file +and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. +To convert such TensorFlow model, run the ``mo`` script with a path to the SavedModel directory: + +.. code-block:: sh + + mo --saved_model_dir + + +You can convert TensorFlow 1.x SavedModel format in the environment that has a 1.x or 2.x version of TensorFlow. However, TensorFlow 2.x SavedModel format strictly requires the 2.x version of TensorFlow. +If a model contains operations currently unsupported by OpenVINO, prune these operations by explicit specification of input nodes using the ``--input`` option. +To determine custom input nodes, display a graph of the model in TensorBoard. To generate TensorBoard logs of the graph, use the ``--tensorboard_logs`` option. +TensorFlow 2.x SavedModel format has a specific graph due to eager execution. In case of pruning, find custom input nodes in the ``StatefulPartitionedCall/*`` subgraph of TensorFlow 2.x SavedModel format. + +Freezing Custom Models in Python +++++++++++++++++++++++++++++++++ + +When a network is defined in Python code, you have to create an inference graph file. Graphs are usually built in a form +that allows model training. That means all trainable parameters are represented as variables in the graph. +To be able to use such graph with model conversion API, it should be frozen and dumped to a file with the following code: + +.. code-block:: py + :force: + + import tensorflow as tf + from tensorflow.python.framework import graph_io + frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["name_of_the_output_node"]) + graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False) + +Where: + +* ``sess`` is the instance of the TensorFlow Session object where the network topology is defined. +* ``["name_of_the_output_node"]`` is the list of output node names in the graph; ``frozen`` graph will include only those nodes from the original ``sess.graph_def`` that are directly or indirectly used to compute given output nodes. The ``'name_of_the_output_node'`` is an example of a possible output node name. You should derive the names based on your own graph. +* ``./`` is the directory where the inference graph file should be generated. +* ``inference_graph.pb`` is the name of the generated inference graph file. +* ``as_text`` specifies whether the generated file should be in human readable text format or binary. + +Converting TensorFlow 2 Models +############################### + +To convert TensorFlow 2 models, ensure that `openvino-dev[tensorflow2]` is installed via `pip`. +TensorFlow 2.X officially supports two model formats: SavedModel and Keras H5 (or HDF5). +Below are the instructions on how to convert each of them. + +SavedModel Format ++++++++++++++++++ + +A model in the SavedModel format consists of a directory with a ``saved_model.pb`` file and two subfolders: ``variables`` and ``assets``. +To convert such a model, run the `mo` script with a path to the SavedModel directory: + +.. code-block:: sh + + mo --saved_model_dir + +TensorFlow 2 SavedModel format strictly requires the 2.x version of TensorFlow installed in the +environment for conversion to the Intermediate Representation (IR). + +If a model contains operations currently unsupported by OpenVINO™, +prune these operations by explicit specification of input nodes using the ``--input`` or ``--output`` +options. To determine custom input nodes, visualize a model graph in the TensorBoard. + +TensorFlow 2 SavedModel format has a specific graph structure due to eager execution. In case of +pruning, find custom input nodes in the ``StatefulPartitionedCall/*`` subgraph. + +Since the 2023.0 release, direct pruning of models in SavedModel format is not supported. +It is essential to freeze the model before pruning. Use the following code snippet for model freezing: + +.. code-block:: py + :force: + + import tensorflow as tf + from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 + saved_model_dir = "./saved_model" + imported = tf.saved_model.load(saved_model_dir) + # retrieve the concrete function and freeze + concrete_func = imported.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + frozen_func = convert_variables_to_constants_v2(concrete_func, + lower_control_flow=False, + aggressive_inlining=True) + # retrieve GraphDef and save it into .pb format + graph_def = frozen_func.graph.as_graph_def(add_shapes=True) + tf.io.write_graph(graph_def, '.', 'model.pb', as_text=False) + +Keras H5 +++++++++ + +If you have a model in HDF5 format, load the model using TensorFlow 2 and serialize it to +SavedModel format. Here is an example of how to do it: + +.. code-block:: py + :force: + + import tensorflow as tf + model = tf.keras.models.load_model('model.h5') + tf.saved_model.save(model,'model') + + +The Keras H5 model with a custom layer has specifics to be converted into SavedModel format. +For example, the model with a custom layer ``CustomLayer`` from ``custom_layer.py`` is converted as follows: + +.. code-block:: py + :force: + + import tensorflow as tf + from custom_layer import CustomLayer + model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': CustomLayer}) + tf.saved_model.save(model,'model') + + +Then follow the above instructions for the SavedModel format. + +.. note:: + + Do not use other hacks to resave TensorFlow 2 models into TensorFlow 1 formats. + +Command-Line Interface (CLI) Examples Using TensorFlow-Specific Parameters +########################################################################## + +* Launching model conversion for Inception V1 frozen model when model file is a plain text protobuf: + + .. code-block:: sh + + mo --input_model inception_v1.pbtxt --input_model_is_text -b 1 + + +* Launching model conversion for Inception V1 frozen model and dump information about the graph to TensorBoard log dir ``/tmp/log_dir`` + + .. code-block:: sh + + mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir + + +* Launching model conversion for BERT model in the SavedModel format, with three inputs. Specify explicitly the input shapes where the batch size and the sequence length equal 2 and 30 respectively. + + .. code-block:: sh + + mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30] + +Conversion of TensorFlow models from memory using Python API +############################################################ + +Model conversion API supports passing TensorFlow/TensorFlow2 models directly from memory. + +* ``tf.keras.Model`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + model = tf.keras.applications.ResNet50(weights="imagenet") + ov_model = convert_model(model) + + +* ``tf.keras.layers.Layer``. Requires setting the "input_shape". + + .. code-block:: py + :force: + + import tensorflow_hub as hub + from openvino.tools.mo import convert_model + + model = hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/5") + ov_model = convert_model(model, input_shape=[-1, 224, 224, 3]) + +* ``tf.Module``. Requires setting the "input_shape". + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + class MyModule(tf.Module): + def __init__(self, name=None): + super().__init__(name=name) + self.variable1 = tf.Variable(5.0, name="var1") + self.variable2 = tf.Variable(1.0, name="var2") + def __call__(self, x): + return self.variable1 * x + self.variable2 + + model = MyModule(name="simple_module") + ov_model = convert_model(model, input_shape=[-1]) + +* ``tf.compat.v1.Graph`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + model = sess.graph + + ov_model = convert_model(model) + +* ``tf.compat.v1.GraphDef`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + model = sess.graph_def + + ov_model = convert_model(model) + +* ``tf.function`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + @tf.function( + input_signature=[tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32), + tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32)]) + def func(x, y): + return tf.nn.sigmoid(tf.nn.relu(x + y)) + + ov_model = convert_model(func) + +* ``tf.compat.v1.session`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + + ov_model = convert_model(sess) + +* ``tf.train.checkpoint`` + + .. code-block:: py + :force: + + import tensorflow as tf + from openvino.tools.mo import convert_model + + model = tf.keras.Model(...) + checkpoint = tf.train.Checkpoint(model) + save_path = checkpoint.save(save_directory) + # ... + checkpoint.restore(save_path) + ov_model = convert_model(checkpoint) + +.. important:: + + The ``convert_model()`` method returns ``ov.Model`` that you can optimize, compile, or save to a file for subsequent use. + +Supported TensorFlow and TensorFlow 2 Keras Layers +################################################## + +For the list of supported standard layers, refer to the :doc:`Supported Operations <../../../../../about-openvino/compatibility-and-support/supported-operations>` page. + +Frequently Asked Questions (FAQ) +################################ + +The model conversion API provides explanatory messages if it is unable to run to completion due to typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the :doc:`Model Optimizer FAQ <../[legacy]-model-optimizer-faq>`. The FAQ provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in :doc:`Convert a Model <../../legacy-conversion-api>` to help you understand what went wrong. + +Summary +####### + +In this document, you learned: + +* Basic information about how the model conversion API works with TensorFlow models. +* Which TensorFlow models are supported. +* How to freeze a TensorFlow model. +* How to convert a trained TensorFlow model using model conversion API with both framework-agnostic and TensorFlow-specific command-line parameters. + +Additional Resources +#################### + +See the :doc:`Model Conversion Tutorials <[legacy]-conversion-tutorials>` page for a set of tutorials providing step-by-step instructions for converting specific TensorFlow models. Here are some examples: + +* :doc:`Convert TensorFlow EfficientDet Models <[legacy]-conversion-tutorials/convert-tensorflow-efficient-det>` +* :doc:`Convert TensorFlow FaceNet Models <[legacy]-conversion-tutorials/convert-tensorflow-face-net>` +* :doc:`Convert TensorFlow Object Detection API Models <[legacy]-conversion-tutorials/convert-tensorflow-object-detection>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-troubleshooting-reshape-errors.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-troubleshooting-reshape-errors.rst new file mode 100644 index 00000000000000..4d5c282a947d1b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-troubleshooting-reshape-errors.rst @@ -0,0 +1,54 @@ +[LEGACY] Troubleshooting Reshape Errors +======================================= + + +.. meta:: + :description: In OpenVINO™, you can use several methods to address the issues + of non-reshape-able models and shape collision, which prevent + normal shape propagation. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + +How To Avoid Shape Collision +############################ + +Operation semantics may impose restrictions on input shapes of the operation. +Shape collision during shape propagation may be a sign that new shape does not satisfy the restrictions. +Changing the model input shape may result in intermediate operations shape collision. For example, in the following: + +* The :doc:`Reshape <../../../openvino-ir-format/operation-sets/operation-specs/shape/reshape-1>` operation with a hard-coded output shape value, +* The :doc:`MatMul <../../../openvino-ir-format/operation-sets/operation-specs/matrix/matmul-1>` operation with the ``Const`` second input and this input cannot be resized by spatial dimensions due to operation semantics. + +Model structure and logic should not change significantly after model reshaping. + +* The Global Pooling operation is commonly used to reduce output feature map of classification models output. Having the input of the shape *[N, C, H, W]*, Global Pooling returns the output of the shape *[N, C, 1, 1]*. Model architects usually express Global Pooling with the help of the ``Pooling`` operation with the fixed kernel size *[H, W]*. During spatial reshape, having the input of the shape *[N, C, H1, W1]*, ``Pooling`` with the fixed kernel size *[H, W]* returns the output of the shape *[N, C, H2, W2]*, where *H2* and *W2* are commonly not equal to *1*. It breaks the classification model structure. For example, the public `Inception family models from TensorFlow `__ have this issue. + +* Changing the model input shape may significantly affect its accuracy. For example, Object Detection models from TensorFlow have resizing restrictions by design. To keep the model valid after the reshape, choose a new input shape that satisfies conditions listed in the ``pipeline.config`` file. + +.. _how-to-fix-non-reshape-able-model: + +How To Fix Non-Reshape-able Model +################################# + +To fix some operators which prevent normal shape propagation: + +* see if the issue can be fixed via changing the values of some operators' input. For example, the most common problem of non-reshape-able models is a ``Reshape`` operator with a hard-coded output shape. You can cut-off the hard-coded second input of ``Reshape`` and fill it in with relaxed values. For the following example in the diagram below, the model conversion API command line should read: + + .. code-block:: sh + + mo --input_model path/to/model --input data[8,3,224,224],1:reshaped[2]->[0,-1]` + + + With ``1:reshaped[2]``, it is required to cut the second input (counting from zero, so ``1:`` means the second input) of the operation named ``reshaped`` and replace it with a ``Parameter`` with shape ``[2]``. + With ``->[0 -1]``, this new ``Parameter`` is replaced by a ``Constant`` operator which has the ``[0, -1]`` value. + Since the ``Reshape`` operator has ``0`` and ``-1`` as specific values, it allows propagating shapes freely without losing the intended meaning of ``Reshape``. For more information, see :doc:`the specification <../../../openvino-ir-format/operation-sets/operation-specs/shape/reshape-1>`. + + .. image:: ../../../../assets/images/batch_relaxation.png + +* transform the model conversion on the back phase. For more information, see the :doc:`How to Convert a Model <../legacy-model-optimizer-extensibility>`, +* transform OpenVINO Model during the runtime. For more information, see :doc:`OpenVINO Runtime Transformations <../../../openvino-extensibility/transformation-api>`, +* modify the original model with the help of the original framework. + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst new file mode 100644 index 00000000000000..3d2365f45ffe3b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst @@ -0,0 +1,326 @@ +Legacy Model Optimizer Extensibility +==================================== + + + +.. toctree:: + :maxdepth: 1 + :hidden: + + legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification + legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions + legacy-model-optimizer-extensibility/[legacy]-extending-model-optimizer-with-caffe-python-layers + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../openvino-extensibility/frontend-extensions>` article. + +This article describes Model Optimizer internals. Altering them may result in application instability, and in case of future changes to the API, lack of backward compatibility. + +.. note:: + If you want to add support for ONNX, TensorFlow Lite, PaddlePaddle or TensorFlow operations, or you are not familiar with other extension alternatives in OpenVINO, read :doc:`this guide <../../openvino-extensibility>` instead. + +.. _model-optimizer-extensibility: + +Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described :doc:`here <../../openvino-ir-format/operation-sets>`. +This mechanism is a core part of Model Optimizer, as a huge set of examples showing how to add custom logic to support your model. + +There are several cases when the customization is needed: + +* A model contains operation(s) not known for the Model Optimizer, but these operation(s) could be expressed as a combination of supported operations. In this case, a custom transformation should be implemented to replace unsupported operation(s) with supported ones. +* A model contains a sub-graph of operations that can be replaced with a smaller number of operations to get better performance. This example corresponds to so-called *fusing transformations* (e.g., replacing a sub-graph performing the calculation :math:`x/(1.0+e^{-(beta*x)})` with a single operation of type :doc:`Swish <../../openvino-ir-format/operation-sets/operation-specs/activation/swish-4>`. +* A model contains a custom framework operation (the operation that is not a part of an official operation set of the framework) that was developed using the framework extensibility mechanism. In this case, Model Optimizer should know how to handle the operation and generate a corresponding section in an IR for it. + +It is necessary to figure out how Model Optimizer represents a model in a memory and converts it to an IR before +going into details of the Model Optimizer extensibility mechanism. + +.. note:: + All paths in this article are provided relatively to the Model Optimizer installation directory if not stated otherwise. + +.. _mo_model_representation_in_memory: + +============================== +Model Representation in Memory +============================== + +The model can be represented as a directed graph, where nodes are operations and edges correspond to data passing from a +producer operation (node) to a consumer operation (node). + +Model Optimizer uses Python class ``mo.graph.graph.Graph`` instance to represent the computation graph in memory during +the model conversion. This class is inherited from the ``networkx.MultiDiGraph`` class of the standard ``networkx`` Python +library. It provides many convenient methods to traverse and modify the graph. Refer to the ``mo/graph/graph.py`` file for examples. + +Model Optimizer keeps all necessary information about the operation in node attributes. Model Optimizer uses the ``mo.graph.graph.Node`` class defined in the ``mo/graph/graph.py`` file, which is a wrapper on top of a ``networkx`` node attributes +dictionary, and provides many convenient methods to work with the node. For example, the node ``my_node`` attribute with a +name ``my_attr`` can be retrieved from the node with the following code ``my_node.my_attr``, which is equivalent to obtaining +attribute with name ``my_attr`` in the ``graph.node[my_node]`` dictionary. For the class implementation details, refer to the ``mo/graph/graph.py`` file. + +An operation may have several inputs and outputs. For example, operation :doc:`Split <../../openvino-ir-format/operation-sets/operation-specs/movement/split-1>` has +two inputs: data to split and axis to split along, and variable number of outputs depending on a value of attribute +``num_splits``. Each input data to the operation is passed to a specific operation **input port**. An operation produces +the output data from an **output port**. Input and output ports are numbered from 0 independently. Model Optimizer uses +classes ``mo.graph.port.Port`` and ``mo.graph.connection.Connection``, which are useful abstraction to perform graph +modifications like nodes connecting/re-connecting and graph traversing. These classes are widely used in the Model +Optimizer code so it is easy to find a lot of usage examples. + +There is no dedicated class corresponding to an edge, so low-level graph manipulation is needed to get access to +edge attributes if needed. Meanwhile, most manipulations with nodes connections should be done with help of the +``mo.graph.connection.Connection`` and ``mo.graph.port.Port`` classes. Thus, low-level graph manipulation is error prone and +is strongly not recommended. + +Further details and examples related to a model representation in memory are provided in the sections below, in a context +for a better explanation. For more information on how to use ports and connections, refer to the :doc:`Graph Traversal and Modification Using Ports and Connections ` article. + +.. _mo_model_conversion_pipeline: + +========================= +Model Conversion Pipeline +========================= + +A model conversion pipeline can be represented with the following diagram: + +.. image:: ../../../assets/images/MO_conversion_pipeline.svg + +Each conversion step is reviewed in details below. + +Model Loading +############# + +Model Optimizer gets a trained model file as an input. The model loader component of Model Optimizer reads a model file +using Python bindings provided with the framework and builds an in-memory representation of a computation graph. There +is a separate loader for each supported framework. These loaders are implemented in the +``extensions/load//loader.py`` files of Model Optimizer. + +.. note:: + Model Optimizer uses a special parser for Caffe models built on top of the ``caffe.proto`` file. In the case of a model loading failure, Model Optimizer throws an error and requests preparation of the parser that can read the model. For more information on how to prepare the custom Caffe parser, refer to the :ref:`question #1 ` in the :doc:`Model Optimizer FAQ `. + +The result of a model loading step is a ``Graph`` object, which can be depicted like in the following example: + +.. image:: ../../../assets/images/MO_graph_after_loader.svg + +Model Optimizer loader saves an operation instance framework description (usually it is a Protobuf message) into a node +attribute usually with a name ``pb`` for each operation of an input model. It is important that this is a +**framework-specific** description of an operation. This means that an operation (e.g. +:doc:`Convolution <../../openvino-ir-format/operation-sets/operation-specs/convolution/convolution-1>` may be represented differently in, for example, Caffe and +TensorFlow frameworks but performs the same calculations from a mathematical point of view. + +In the image above, the **Operation 2** has one input and two outputs. The tensor produced from the output **port 0** is +consumed with the **Operation 5** (the input **port 0**) and **Operation 3** (the input **port 1**). The tensor produced from the +output **port 1** is consumed with the **Operation 4** (the input **port 0**). + +Each edge has two attributes: ``in`` and ``out``. They contain the input port number of the consumer node and the output port +number of the producer node. These attributes describe the fact that nodes are operations consuming some input tensors +and producing some output tensors. From the perspective of Model Optimizer, nodes themselves are **black boxes** because +they do not contain required information about the operation they perform. + +Operations Attributes Extracting +################################ + +The next step is to parse framework-dependent operation representation saved in a node attribute and update the node +attributes with the operation specific attributes. There are three options to do this. + +1. The extractor extension approach (recommended way to extract attributes for an operation). Explained in details in the :doc:`Operation Extractor ` article. +2. The legacy approach with a built-in extractor. The ``mo/front//extractor.py`` file (for example, the one for Caffe) defines a dictionary with extractors for specific operation types. A key in the dictionary is a type of an operation to trigger the extracting function for and the value is the function. The function has one parameter – a node to extract attributes from. This is a legacy and non-extensible approach so it should be avoided. This mechanism will be removed in future versions of Model Optimizer. + +The extractors execution order is the following: + +* ``CustomLayersMapping.xml`` (for Caffe models only). +* Model Optimizer extension. +* Built-in Model Optimizer extractor. + +The result of operations attributes extracting step can be depicted like in the following example: + +.. image:: ../../../assets/images/MO_graph_after_extractors.svg + +The only difference in the graph from the previous step is that nodes contain dictionary with extracted attributes and +operation-specific attributes needed for Model Optimizer. However, from this step, Model Optimizer does not +need the original representation of the operation/model and just uses Model Optimizer representation (there are some +peculiar cases in which Model Optimizer still uses the ``pb`` attribute, covered in this +article partially). A detailed list of common node attributes and their values is provided in the +:doc:`Model Optimizer Operation ` article. + +Front Phase +########### + +For legacy reasons, you must specify shapes for all not fully-defined inputs of the model. In contrast, other +machine learning frameworks, like TensorFlow, let you create a model with undefined or partially defined input shapes. +As an example, undefined dimension is marked with an integer value ``-1`` in a TensorFlow model or has some string name +in an ONNX model. + +During the front phase, Model Optimizer knows shape of the model inputs and constants only and does not know shapes +(and even ranks) of the intermediate tensors. But information about shapes may not be needed to implement particular +transformation. For example, the transformation ``extensions/front/TopKNormalize.py`` removes an attribute ``k`` from a +``TopK`` node and adds an input constant with the value ``k``. The transformation is needed to convert a ``TopK`` operation. +It comes from frameworks, where a number of output elements is defined as an attribute of the operation to the +OpenVINO :doc:`TopK <../../openvino-ir-format/operation-sets/operation-specs/sort/top-k-3>` operation semantic, which requires this value to be a separate input. + +It is important to mention that sometimes it seems like transformation cannot be implemented during the front phase +because the actual values of inputs or shapes are needed. In fact, manipulations of shapes or values can be implemented +using operations that are added to the graph. Consider the +``extensions/front/onnx/flattenONNX_to_reshape.py`` transformation, which replaces an ONNX +`Flatten `__ operation with a sub-graph of operations performing +the following (when ``axis`` is not equal to 0 and 1): + +1. Calculate a shape of the ``Flatten`` input tensor, using the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation. +2. Get the first ``axis`` elements from the output of ``Shape`` operation and calculate their product, using the :doc:`ReduceProd <../../openvino-ir-format/operation-sets/operation-specs/reduction/reduce-prod-1>` operation. +3. Concatenate output of the ``ReduceProd`` and constant with the value of ``-1`` (for an explanation of this value refer to the :doc:`Reshape <../../openvino-ir-format/operation-sets/operation-specs/shape/reshape-1>` specification page). +4. Use the concatenated value as the second input to the ``Reshape`` operation. + +It is highly recommended to write shape-agnostic transformations to avoid model reshape-ability issues. For more information related to the reshaping of a model, refer to the :doc:`Using Shape Inference <../../../openvino-workflow/running-inference/changing-input-shape>` guide. + +More information on how to develop front phase transformations and dedicated API description is provided in the +:ref:`Front Phase Transformations `. + +.. _mo_partial_inference: + +Partial Inference +################# + +Model Optimizer performs a partial inference of a model during model conversion. This procedure includes output shapes +calculation of all operations in a model and constant folding (value calculation for constant sub-graphs). The constant +folding is needed for the shape inference because in some cases evaluation of constant sub-graph is needed to calculate +output shapes. For example, the output shape for the :doc:`Reshape <../../openvino-ir-format/operation-sets/operation-specs/shape/reshape-1>` operation may be +defined as a mathematical expression using the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation output. + +.. note:: + Model Optimizer does not fold sub-graphs starting from the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation by default because this leads to a model non-reshape-ability (the command-line parameter ``--static_shape`` can override this behavior). For more information related to reshaping of a model, refer to the :doc:`Using Shape Inference <../../../openvino-workflow/running-inference/changing-input-shape>` guide. + +Model Optimizer calculates output shapes for all operations in a model to write them to Intermediate Representation files. + +.. note:: + This is a legacy requirement. Starting with IR version 10, OpenVINO Runtime needs to know shapes of the :doc:`Const <../../openvino-ir-format/operation-sets/operation-specs/infrastructure/constant-1>` and the :doc:`Parameter <../../openvino-ir-format/operation-sets/operation-specs/infrastructure/parameter-1>` operations only. The OpenVINO Runtime calculates output shapes for all operations in a model, using shapes of :doc:`Parameter <../../openvino-ir-format/operation-sets/operation-specs/infrastructure/parameter-1>` and :doc:`Const <../../openvino-ir-format/operation-sets/operation-specs/infrastructure/constant-1>` operations defined with respective operation attributes. + +Model Optimizer inserts **data** nodes to the computation graph before starting the partial inference phase. The data node +corresponds to the specific tensor produced with the operation. Each data node contains two attributes: ``shape``, +containing the shape of the tensor, and ``value``, which may contain the actual value of the tensor. The value for a ``value`` +attribute is equal to ``None`` if this tensor value cannot be calculated. This happens in two cases: when a tensor value +depends on a values passed to the :doc:`Parameter <../../openvino-ir-format/operation-sets/operation-specs/infrastructure/parameter-1>` operation of a model or +Model Optimizer does not have value propagation implementation for the operation. + +Before running partial inference, the graph can be depicted like in the following example: + +.. image:: ../../../assets/images/MO_graph_before_partial_inference.svg + +The difference in a graph structure with a graph during the front phase is not only in the data nodes, but also in the +edge attributes. Note that an ``out`` attribute is specified for edges **from operation** nodes only, while an ``in`` +attribute is specified for edges **from data** nodes only. This corresponds to the fact that a tensor (data node) is +produced from a specific output port of an operation and is consumed with a specific input port of an operation. Also, +a unique data node is created for each output port of an operation. The node may be used as an input node for several +operation nodes. Similarly to the data node **data2_0**, which is consumed with the input **port 1** of the **Operation 3** and +input **port 0** of the **Operation 5**. + +Now, consider how Model Optimizer performs shape and value propagation. Model Optimizer performs graph nodes +topological sort. An error message is thrown if a graph contains a cycle. Then, shape inference functions are called for +each node in the graph, according to the topological order. Each node of the graph must have an attribute called ``infer`` +with a shape inference function, which is a function with one parameter – an instance of the ``Node`` class. The ``infer`` +attribute is usually set in the operation extractor or when a node is added in some transformation using the Model +Optimizer operation class inherited from the ``mo.pos.Op`` class. For more information on how to specify a shape inference function, +refer to the :doc:`Model Optimizer Operation ` and :doc:`Operation Extractor ` articles. + +A shape inference function should calculate an operation (node) output shape(s) based on input shape(s) and operation +(node) attribute(s) and update ``shape`` and optionally ``value`` attributes of the corresponding data node(s). A simplified +example of the shape infer function for the :doc:`Reshape <../../openvino-ir-format/operation-sets/operation-specs/shape/reshape-1>` operation (the full version is +available in the ``mo/ops/reshape.py`` file): + +.. code-block:: py + :force: + + @staticmethod + def infer(node: Node): + name = node.soft_get('name', node.id) + + input_shape = node.in_port(0).data.get_shape() # get the input tensor shape + new_shape = node.in_port(1).data.get_value() # get the value defining the output tensor shape. This tensor may + # have special values like 0 and -1 + + output_shape = ... # calculate output shape without special values like 0 and -1 + + if node.in_port(0).data.get_value() is not None: # if the input value is defined then calculate output value; + # shape will be updated automatically with the value shape + node.out_port(0).data.set_value(node.in_port(0).data.get_value().reshape(output_shape)) + else: # in the opposite case calculate the output shape only + node.out_port(0).data.set_shape(output_shape) + +Methods ``in_port()`` and ``output_port()`` of the ``Node`` class are used to get and set data node attributes. For more information on +how to use them, refer to the :doc:`Graph Traversal and Modification Using Ports and Connections ` article. + +.. note:: + A shape inference function should perform output shape calculation in the original model layout. For example, OpenVINO™ supports Convolution operations in NCHW layout only but TensorFlow supports NHWC layout as well. Model Optimizer shape inference function calculates output shapes for NHWC Convolutions in NHWC layout and only during the layout change phase the shape is converted to NCHW. + +.. note:: + There is a legacy approach to read data node attribute, like ``input_shape = op_node.in_node(0).shape`` and modify data nodes attributes, like ``op_node.out_node(0).shape = some_value``. This approach is still used in the Model Optimizer code but is not recommended. Instead, use the approach described in the :ref:`Ports `. + +Middle Phase +############ + +The middle phase starts after partial inference. At this phase, a graph contains data nodes and output shapes of all +operations in the graph have been calculated. Any transformation implemented at this stage must update the ``shape`` +attribute for all newly added operations. It is highly recommended to use API described in the +:doc:`Graph Traversal and Modification Using Ports and Connections ` because modification of a graph using this API causes automatic re-inference of affected nodes as well as necessary data nodes creation. + +More information on how to develop middle transformations and dedicated API description is provided in the +:ref:`Middle Phase Transformations `. + +NHWC to NCHW Layout Change +########################## + +There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations are triggered by default for TensorFlow models as TensorFlow supports Convolution operations in the NHWC layout. + +This layout change is disabled automatically if the model does not have operations that OpenVINO™ needs to execute in the NCHW layout, for example, Convolutions in NHWC layout. + +For more details on how it works, refer to the source code of the transformations mentioned in the below summary of the process: + +1. Model Optimizer changes output shapes of most of operations producing 4D and 5D (four dimensional and five dimensional) tensors as if they were in NHWC layout to NCHW layout: ``nchw_shape = np.array(nhwc_shape)[0, 3, 1, 2]`` for 4D and ``nchw_shape = np.array(nhwc_shape)[0, 4, 1, 2, 3]`` for 5D. This permutation does not happen for some operations with specific conditions identified during a model conversion. +2. Model Optimizer inserts :doc:`Gather <../../openvino-ir-format/operation-sets/operation-specs/movement/gather-1>` operations to the sub-graph relates to shapes calculation in order to perform shape calculation in a correct layout. +3. Model Optimizer inserts :doc:`Transpose <../../openvino-ir-format/operation-sets/operation-specs/movement/transpose-1>` operations for some operations with specific conditions, identified during a model conversion, to produce correct inference results. + +The main transformations responsible for a layout change are: + +* ``extensions/middle/ApplyPermutations.py`` +* ``extensions/middle/InsertLayoutPropagationTransposes.py`` +* ``extensions/middle/MarkSubgraphsWithCorrectLayout.py`` +* ``extensions/middle/ApplyNHWCtoNCHWpermutation.py`` +* ``extensions/middle/LayoutChangeForConstantShapePaths.py`` + +Back Phase +########## + +The back phase starts after the layout change to NCHW. This phase contains mostly the following transformations: + +1. Transformations that should work with a graph in the NCHW layout and thus cannot be implemented in the middle phase. +2. Transformations that replace nodes corresponding to internal Model Optimizer operations with nodes corresponding to the :doc:`opset <../../openvino-ir-format/operation-sets/available-opsets>` operations. +3. Transformations that normalize operations inputs according to the specification. +4. Final optimization transformations. + +A graph structure during the back phase is the same as during the middle phase. There is no difference in writing middle +and back transformations. + +More information on how to develop back transformations and dedicated API description is provided in the +:ref:`Back Phase Transformations `. + +Intermediate Representation Emitting +#################################### + +The last phase of a model conversion is the Intermediate Representation emitting. Model Optimizer performs the following +steps: + +1. Iterates over all operation nodes in the graph and checks that all nodes have the ``type`` attribute set. This attribute defines the operation type and is used in the OpenVINO to instantiate proper operation from the :doc:`opset <../../openvino-ir-format/operation-sets/available-opsets>` specified in the ``version`` attribute of the node. If a node does not have attribute ``type`` or its value is equal to ``None``, Model Optimizer exits with an error. +2. Performs type inference of graph operations similar to the shape inference. Inferred data types are saved to a port attributes in the IR. +3. Performs topological sort of the graph and changes ``id`` attribute of all operation nodes to be sequential integer values starting from 0. +4. Saves all Constants values to the ``.bin`` file. Constants with the same value are shared among different operations. +5. Generates an ``.xml`` file defining a graph structure. The information about operation inputs and outputs are prepared uniformly for all operations regardless of their type. A list of attributes to be saved to the ``.xml`` file is defined with the ``backend_attrs()`` or ``supported_attrs()`` of the ``Op`` class used for a graph node instantiation. For more information on how the operation attributes are saved to XML, refer to the function ``prepare_emit_ir()`` in the ``mo/pipeline/common.py`` file and :doc:`Model Optimizer Operation ` article. + +==================== +Additional Resources +==================== + +* :doc:`Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™ <../../openvino-ir-format/operation-sets>` +* :doc:`Converting a Model to Intermediate Representation (IR) ` +* :doc:`OpenVINO Model Representation <../../../openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation>` +* :doc:`OpenVINO™ Extensibility Mechanism <../../openvino-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections ` +* :doc:`Model Optimizer Extensions ` +* :doc:`Extending Model Optimizer with Caffe Python Layers ` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-extending-model-optimizer-with-caffe-python-layers.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-extending-model-optimizer-with-caffe-python-layers.rst new file mode 100644 index 00000000000000..4277f68139845b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-extending-model-optimizer-with-caffe-python-layers.rst @@ -0,0 +1,110 @@ +[LEGACY] Extending Model Optimizer with Caffe Python Layers +============================================================ + +.. meta:: + :description: Learn how to extract operator attributes in Model Optimizer to + support a custom Caffe operation written only in Python. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../openvino-extensibility/frontend-extensions>` article. + +This article provides instructions on how to support a custom Caffe operation written only in Python. For example, the +`Faster-R-CNN model `__ implemented in +Caffe contains a custom proposal layer written in Python. The layer is described in the +`Faster-R-CNN prototxt `__ in the following way: + +.. code-block:: sh + + layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rois' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } + } + + +This article describes only a procedure on how to extract operator attributes in Model Optimizer. The rest of the +operation enabling pipeline and information on how to support other Caffe operations (written in C++) is described in +the :doc:`Customize Model Optimizer <../legacy-model-optimizer-extensibility>` guide. + +======================================== +Writing Extractor for Caffe Python Layer +======================================== + +Custom Caffe Python layers have an attribute ``type`` (defining the type of the operation) equal to ``Python`` and two +mandatory attributes ``module`` and ``layer`` in the ``python_param`` dictionary. The ``module`` defines the Python module name +with the layer implementation, while ``layer`` value is an operation type defined by a user. In order to extract +attributes for such an operation it is necessary to implement extractor class inherited from the +``CaffePythonFrontExtractorOp`` class instead of ``FrontExtractorOp`` class, used for standard framework layers. The ``op`` +class attribute value should be set to the ``module + "." + layer`` value so the extractor is triggered for this kind of +operation. + +Below is a simplified example of the extractor for the custom operation Proposal from the mentioned Faster-R-CNN model. +The full code with additional checks can be found `here `__. + +The sample code uses operation ``ProposalOp`` which corresponds to ``Proposal`` operation described in the :doc:`Available Operations Sets <../../../openvino-ir-format/operation-sets/available-opsets>` +page. For a detailed explanation of the extractor, refer to the source code below. + +.. code-block:: py + :force: + + from openvino.tools.mo.ops.proposal import ProposalOp + from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp + + + class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp): + op = 'rpn.proposal_layer.ProposalLayer' # module + "." + layer + enabled = True # extractor is enabled + + @staticmethod + def extract_proposal_params(node, defaults): + param = node.pb.python_param # get the protobuf message representation of the layer attributes + # parse attributes from the layer protobuf message to a Python dictionary + attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str) + update_attrs = defaults + + # the operation expects ratio and scale values to be called "ratio" and "scale" while Caffe uses different names + if 'ratios' in attrs: + attrs['ratio'] = attrs['ratios'] + del attrs['ratios'] + if 'scales' in attrs: + attrs['scale'] = attrs['scales'] + del attrs['scales'] + + update_attrs.update(attrs) + ProposalOp.update_node_stat(node, update_attrs) # update the node attributes + + @classmethod + def extract(cls, node): + # define default values for the Proposal layer attributes + defaults = { + 'feat_stride': 16, + 'base_size': 16, + 'min_size': 16, + 'ratio': [0.5, 1, 2], + 'scale': [8, 16, 32], + 'pre_nms_topn': 6000, + 'post_nms_topn': 300, + 'nms_thresh': 0.7 + } + cls.extract_proposal_params(node, defaults) + return cls.enabled + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections <[legacy]-graph-traversal-and-modification>` +* :doc:`Model Optimizer Extensions <[legacy]-model-optimizer-extensions>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification.rst new file mode 100644 index 00000000000000..55b55a77335f2b --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification.rst @@ -0,0 +1,186 @@ +[LEGACY] Graph Traversal and Modification +=========================================== + +.. meta:: + :description: Learn about deprecated APIs and the Port and Connection classes + in Model Optimizer used for graph traversal and transformation. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../openvino-extensibility/frontend-extensions>` article. + +There are three APIs for a graph traversal and transformation used in the Model Optimizer: + +1. The API provided with the ``networkx`` Python library for the ``networkx.MultiDiGraph`` class, which is the base class for +the ``mo.graph.graph.Graph`` object. For example, the following methods belong to this API level: + +* ``graph.add_edges_from([list])``, +* ``graph.add_node(x, attrs)``, +* ``graph.out_edges(node_id)`` +* other methods where ``graph`` is a an instance of the ``networkx.MultiDiGraph`` class. + +**This is the lowest-level API. Avoid using it in the Model Optimizer transformations**. For more details, refer to the :ref:`Model Representation in Memory ` section. + +2. The API built around the ``mo.graph.graph.Node`` class. The ``Node`` class is the primary class to work with graph nodes +and their attributes. Examples of such methods and functions are: + +* ``node.in_node(y)``, +* ``node.out_node(x)``, +* ``node.get_outputs()``, +* ``node.insert_node_after(n1, y)``, +* ``create_edge(n1, n2)`` + +**There are some "Node" class methods not recommended for use and some functions defined in the mo.graph.graph have been deprecated**. For more details, refer to the ``mo/graph/graph.py`` file. + +3. The high-level API called Model Optimizer Graph API, which uses ``mo.graph.graph.Graph``, ``mo.graph.port.Port`` and +``mo.graph.connection.Connection`` classes. For example, the following methods belong to this API level: + +* ``node.in_port(x)``, +* ``node.out_port(y)``, +* ``port.get_connection()``, +* ``connection.get_source()``, +* ``connection.set_destination(dest_port)`` + +**This is the recommended API for the Model Optimizer transformations and operations implementation**. + +The main benefit of using the Model Optimizer Graph API is that it hides some internal implementation details (the fact that +the graph contains data nodes), provides API to perform safe and predictable graph manipulations, and adds operation +semantic to the graph. This is achieved with introduction of concepts of ports and connections. + +.. note:: + This article is dedicated to the Model Optimizer Graph API only and does not cover other two non-recommended APIs. + +.. _mo_intro_ports: + +===== +Ports +===== + +An operation semantic describes how many inputs and outputs the operation has. For example, +:doc:`Parameter <../../../openvino-ir-format/operation-sets/operation-specs/infrastructure/parameter-1>` and :doc:`Const <../../../openvino-ir-format/operation-sets/operation-specs/infrastructure/constant-1>` operations have no +inputs and have one output, :doc:`ReLU <../../../openvino-ir-format/operation-sets/operation-specs/activation/relu-1>` operation has one input and one output, +:doc:`Split <../../../openvino-ir-format/operation-sets/operation-specs/movement/split-1>` operation has 2 inputs and a variable number of outputs depending on the value of the +attribute ``num_splits``. + +Each operation node in the graph (an instance of the ``Node`` class) has 0 or more input and output ports (instances of +the ``mo.graph.port.Port`` class). The ``Port`` object has several attributes: + +* ``node`` - the instance of the ``Node`` object the port belongs to. +* ``idx`` - the port number. Input and output ports are numbered independently, starting from ``0``. Thus, + :doc:`ReLU <../../../openvino-ir-format/operation-sets/operation-specs/activation/relu-1>` operation has one input port (with index ``0``) and one output port (with index ``0``). +* ``type`` - the type of the port. Could be equal to either ``"in"`` or ``"out"``. +* ``data`` - the object that should be used to get attributes of the corresponding data node. This object has methods ``get_shape()`` / ``set_shape()`` and ``get_value()`` / ``set_value()`` to get/set shape/value of the corresponding data node. For example, ``in_port.data.get_shape()`` returns an input shape of a tensor connected to input port ``in_port`` (``in_port.type == 'in'``), ``out_port.data.get_value()`` returns a value of a tensor produced from output port ``out_port`` (``out_port.type == 'out'``). + +.. note:: + Functions ``get_shape()`` and ``get_value()`` return ``None`` until the partial inference phase. For more information about model conversion phases, refer to the :ref:`Model Conversion Pipeline `. For information about partial inference phase, see the :ref:`Partial Inference `. + +There are several methods of the ``Node`` class to get the instance of a corresponding port: + +* ``in_port(x)`` and ``out_port(x)`` to get the input/output port with number ``x``. +* ``in_ports()`` and ``out_ports()`` to get a dictionary, where key is a port number and the value is the corresponding input/output port. + +Attributes ``in_ports_count`` and ``out_ports_count`` of the ``Op`` class instance define default number of input and output +ports to be created for the ``Node``. However, additional input/output ports can be added using methods +``add_input_port()`` and ``add_output_port()``. Port also can be removed, using the ``delete_input_port()`` and +``delete_output_port()`` methods. + +The ``Port`` class is just an abstraction that works with edges incoming/outgoing to/from a specific ``Node`` instance. For +example, output port with ``idx = 1`` corresponds to the outgoing edge of a node with an attribute ``out = 1``, the input +port with ``idx = 2`` corresponds to the incoming edge of a node with an attribute ``in = 2``. + +Consider the example of a graph part with 4 operation nodes "Op1", "Op2", "Op3", and "Op4" and a number of data nodes +depicted with light green boxes. + +.. image:: ../../../../assets/images/MO_ports_example_1.svg + :scale: 80 % + :align: center + +Operation nodes have input ports (yellow squares) and output ports (light purple squares). Input port may not be +connected. For example, the input **port 2** of node **Op1** does not have incoming edge, while output port always has an +associated data node (after the partial inference when the data nodes are added to the graph), which may have no +consumers. + +Ports can be used to traverse a graph. The method ``get_source()`` of an input port returns an output port producing the +tensor consumed by the input port. It is important that the method works the same during front, middle and back phases of a +model conversion even though the graph structure changes (there are no data nodes in the graph during the front phase). + +Let's assume that there are 4 instances of ``Node`` object ``op1, op2, op3``, and ``op4`` corresponding to nodes **Op1**, **Op2**, +**Op3**, and **Op4**, respectively. The result of ``op2.in_port(0).get_source()`` and ``op4.in_port(1).get_source()`` is the +same object ``op1.out_port(1)`` of type ``Port``. + +The method ``get_destination()`` of an output port returns the input port of the node consuming this tensor. If there are +multiple consumers of this tensor, the error is raised. The method ``get_destinations()`` of an output port returns a +list of input ports consuming the tensor. + +The method ``disconnect()`` removes a node incoming edge corresponding to the specific input port. The method removes +several edges if it is applied during the front phase for a node output port connected with multiple nodes. + +The method ``port.connect(another_port)`` connects output port ``port`` and input port ``another_port``. The method handles +situations when the graph contains data nodes (middle and back phases) and does not create an edge between two nodes +but also automatically creates data node or reuses existing data node. If the method is used during the front phase and +data nodes do not exist, the method creates edge and properly sets ``in`` and ``out`` edge attributes. + +For example, applying the following two methods to the graph above will result in the graph depicted below: + +.. code-block:: py + :force: + + op4.in_port(1).disconnect() + op3.out_port(0).connect(op4.in_port(1)) + +.. image:: ../../../../assets/images/MO_ports_example_2.svg + :scale: 80 % + :align: center + +.. note:: + For a full list of available methods, refer to the ``Node`` class implementation in the ``mo/graph/graph.py`` and ``Port`` class implementation in the ``mo/graph/port.py`` files. + +=========== +Connections +=========== + +Connection is a concept introduced to easily and reliably perform graph modifications. Connection corresponds to a +link between a source output port with one or more destination input ports or a link between a destination input port +and source output port producing data. So each port is connected with one or more ports with help of a connection. +Model Optimizer uses the ``mo.graph.connection.Connection`` class to represent a connection. + +There is only one ``get_connection()`` method of the ``Port`` class to get the instance of the corresponding ``Connection`` +object. If the port is not connected, the returned value is ``None``. + +For example, the ``op3.out_port(0).get_connection()`` method returns a ``Connection`` object encapsulating edges from node +**Op3** to data node **data_3_0** and two edges from data node **data_3_0** to two ports of the node **Op4**. + +The ``Connection`` class provides methods to get source and destination(s) ports the connection corresponds to: + +* ``connection.get_source()`` - returns an output ``Port`` object producing the tensor. +* ``connection.get_destinations()`` - returns a list of input ``Port`` consuming the data. +* ``connection.get_destination()`` - returns a single input ``Port`` consuming the data. If there are multiple consumers, the exception is raised. + +The ``Connection`` class provides methods to modify a graph by changing a source or destination(s) of a connection. For +example, the function call ``op3.out_port(0).get_connection().set_source(op1.out_port(0))`` changes source port of edges +consuming data from port ``op3.out_port(0)`` to ``op1.out_port(0)``. The transformed graph from the sample above is depicted +below: + +.. image:: ../../../../assets/images/MO_connection_example_1.svg + :scale: 80 % + :align: center + +Another example is the ``connection.set_destination(dest_port)`` method. It disconnects ``dest_port`` and all input ports to which +the connection is currently connected and connects the connection source port to ``dest_port``. + +Note that connection works seamlessly during front, middle, and back phases and hides the fact that the graph structure is +different. + +.. note:: + For a full list of available methods, refer to the ``Connection`` class implementation in the ``mo/graph/connection.py`` file. + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` +* :doc:`Model Optimizer Extensions <[legacy]-model-optimizer-extensions>` +* :doc:`Extending Model Optimizer with Caffe Python Layers <[legacy]-extending-model-optimizer-with-caffe-python-layers>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions.rst new file mode 100644 index 00000000000000..db252965cb84e9 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions.rst @@ -0,0 +1,60 @@ +[LEGACY] Model Optimizer Extensions +===================================== + +.. meta:: + :description: Learn about deprecated extensions, which enable injecting logic + to the model conversion pipeline without changing the Model + Optimizer core code. + +.. toctree:: + :maxdepth: 1 + :hidden: + + [legacy]-model-optimizer-extensions/[legacy]-model-optimizer-operation + [legacy]-model-optimizer-extensions/[legacy]-optimizer-extractor + [legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../openvino-extensibility/frontend-extensions>` article. + +Model Optimizer extensions enable you to inject some logic to the model conversion pipeline without changing the Model +Optimizer core code. There are three types of the Model Optimizer extensions: + +1. :doc:`Model Optimizer operation <[legacy]-model-optimizer-extensions/[legacy]-model-optimizer-operation>`. +2. A :doc:`framework operation extractor <[legacy]-model-optimizer-extensions/[legacy]-optimizer-extractor>`. +3. A :doc:`model transformation <[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions>`, which can be executed during front, middle or back phase of the model conversion. + +An extension is just a plain text file with a Python code. The file should contain a class (or classes) inherited from +one of extension base classes. Extension files should be saved to a directory with the following structure: + +.. code-block:: sh + + .// + ops/ - custom operations + front/ - framework independent front transformations + / - front transformations for models only and extractors for operations + / - front transformations for models only and extractors for operations + ... + middle/ - middle transformations + back/ - back transformations + +Model Optimizer uses the same layout internally to keep built-in extensions. The only exception is that the +``mo/ops/`` directory is also used as a source of the Model Optimizer operations due to historical reasons. + +.. note:: + The name of a root directory with extensions should not be equal to "extensions" because it will result in a name conflict with the built-in Model Optimizer extensions. + +.. note:: + Model Optimizer itself is built by using these extensions, so there is a huge number of examples of their usage in the Model Optimizer code. + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../legacy-model-optimizer-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections <[legacy]-graph-traversal-and-modification>` +* :doc:`Extending Model Optimizer with Caffe Python Layers <[legacy]-extending-model-optimizer-with-caffe-python-layers>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions.rst new file mode 100644 index 00000000000000..95f722ee063443 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-graph-transformation-extensions.rst @@ -0,0 +1,605 @@ +[LEGACY] Graph Transformation Extensions +========================================== + +.. meta:: + :description: Learn about various base classes for front, middle and back phase + transformations applied during model conversion with Model Optimizer. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../../openvino-extensibility/frontend-extensions>` article. + +Model Optimizer provides various base classes to implement :ref:`Front Phase Transformations `, +:ref:`Middle Phase Transformations `, and :ref:`Back Phase Transformations `. +All classes have the following common class attributes and methods: + +1. The ``enabled`` attribute specifies whether the transformation is enabled or not. The value can be changed during runtime to enable or disable execution of the transformation during a model conversion. Default value is ``True``. +2. The ``id`` attribute specifies a unique transformation string identifier. This transformation identifier can be used to enable (disable) the transformation by setting environment variable ``MO_ENABLED_TRANSFORMS`` (``MO_DISABLED_TRANSFORMS``) with a comma separated list of ``ids``. The environment variables override the value of the ``enabled`` attribute of the transformation. Instead of using ``id`` attribute value you can add fully defined class name to ``MO_ENABLED_TRANSFORMS`` (``MO_DISABLED_TRANSFORMS``) variable, ``extensions.back.NonmalizeToNormalizeL2.NormalizeToNormalizeL2`` for example. It is an optional attribute. +3. The ``run_not_recursively`` attribute specifies whether the transformation should be executed in the sub-graphs, for example, body of the :doc:`TensorIterator <../../../../openvino-ir-format/operation-sets/operation-specs/infrastructure/tensor-iterator-1>` and the :doc:`Loop <../../../../openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5>`. Default value is ``True``. +4. The ``force_clean_up`` attribute specifies whether the graph clean up should be executed after the transformation. The graph cleanup removes nodes of the graph not reachable from the model inputs. Default value is ``False``. +5. The ``force_shape_inference`` attribute specifies whether the nodes marked with ``need_shape_inference`` attribute equal to ``True`` should be re-inferred after the transformation. Model Optimizer sets this attribute automatically for nodes, input(s) of which were changed during the transformation, or you can set this attribute manually in the transformation for the specific nodes. Default value is ``False``. +6. Attribute ``graph_condition`` specifies a list of functions with one parameter -- ``Graph`` object. The transformation is executed if and only if all functions return ``True``. If the attribute is not set, no check is performed. +7. Method ``run_before()`` returns a list of transformation classes which this transformation should be executed before. +8. Method ``run_after()`` returns a list of transformation classes which this transformation should be executed after. + +.. note:: + Some of the transformation types have specific class attributes and methods, which are explained in the corresponding sections of this document. + +Model Optimizer builds a graph of dependencies between registered transformations and executes them in the topological +order. To execute the transformation during a proper model conversion phase, Model Optimizer defines several +anchor transformations that do nothing. All transformations are ordered with respect to these anchor transformations. +The diagram below shows anchor transformations, some of built-in transformations and dependencies between them: + +.. image:: ../../../../../assets/images/MO_transformations_graph.svg + +User-defined transformations are executed after the corresponding ``Start`` and before the corresponding ``Finish`` anchor +transformations by default (if ``run_before()`` and ``run_after()`` methods have not been overridden). + +.. note:: + The ``PreMiddleStart`` and ``PostMiddleStart`` anchors were introduced due to historical reasons to refactor the Model Optimizer pipeline, which initially had a hardcoded order of transformations. + +.. _mo_front_phase_transformations: + +=========================== +Front Phase Transformations +=========================== + +There are several types of a front phase transformation: + +1. :ref:`Pattern-Defined Front Phase Transformations ` triggered for each sub-graph of the original graph isomorphic to the specified pattern. +2. :ref:`Specific Operation Front Phase Transformations ` triggered for the node with a specific ``op`` attribute value. +3. :ref:`Generic Front Phase Transformations `. +4. Manually enabled transformation, defined with a JSON configuration file (for TensorFlow, ONNX, and PaddlePaddle models), specified using the ``--transformations_config`` command-line parameter: + + 1. :ref:`Node Name Pattern Front Phase Transformations `. + 2. :ref:`Front Phase Transformations Using Start and End Points `. + 3. :ref:`Generic Front Phase Transformations Enabled with Transformations Configuration File `. + +.. _pattern_defined_front_phase_transformations: + +Pattern-Defined Front Phase Transformations +########################################### + +This type of transformation is implemented using ``mo.front.common.replacement.FrontReplacementSubgraph`` and +``mo.front.common.replacement.FrontReplacementPattern`` as base classes and works as follows: + +1. Define a sub-graph to be matched, using a list of nodes with attributes and edges connecting them (edges may also have attributes). +2. Model Optimizer searches for all sub-graphs of the original graph, isomorphic to the specified sub-graph (pattern). +3. Model Optimizer executes the defined function performing graph transformation for each instance of a matched sub-graph. You can override different functions in the base transformation class so the Model Optimizer works differently: + + 1. The ``replace_sub_graph(self, graph, match)`` override the method. In this case Model Optimizer only executes the overridden function, pass the ``graph`` object and a dictionary describing the matched sub-graph. You are required to write the transformation and connect the newly created nodes to the rest of the graph. + 2. The ``generate_sub_graph(self, graph, match)`` override the method. This case is not recommended for use because it is the most complicated approach. It can be effectively replaced with one of two previous approaches. + +The sub-graph pattern is defined in the ``pattern()`` function. This function should return a dictionary with two keys: +``nodes`` and ``edges``: + +* The value for the ``nodes`` key is a list of tuples with two elements. + + * The first element is an alias name for a node that will be used to define edges between nodes and in the transformation function. + * The second element is a dictionary with attributes. The key is a name of an attribute that should exist in the node. The value for the attribute can be some specific value to match or a function that gets a single parameter - the attribute value from the node. The function should return the result of attribute comparison with a dedicated value. + +* The value for the ``edges`` key is a list of tuples with two or three elements. + + * The first element is the alias name of the node producing a tensor. + * The second element is the alias name of the node consuming the tensor. + * The third element (optional) is the dictionary with expected edge attributes. This dictionary usually contains attributes like ``in`` and ``out``, defining input and output ports. + +Consider the example of a front transformation implemented in the ``extensions/front/Mish_fusion.py`` file performing +fusing of the sub-graph defining the :doc:`Mish <../../../../openvino-ir-format/operation-sets/operation-specs/activation/mish-4>` activation function into a single +operation: + +.. code-block:: py + :force: + + from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion + from openvino.tools.mo.ops.activation_ops import Mish + from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph + from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch + from openvino.tools.mo.graph.graph import Graph, rename_nodes + + + class MishFusion(FrontReplacementSubgraph): + """ + The transformation looks for the pattern with Softplus defining the Mish function: Mish(x) = x * tanh(SoftPlus(x)). + """ + enabled = True # Transformation is enabled. + + def run_after(self): # Run this transformation after "SoftplusFusion" transformation. + return [SoftplusFusion] + + def pattern(self): # Define pattern according to formulae x * tanh(SoftPlus(x)). + return dict( + nodes=[ + ('mul', dict(op='Mul')), + ('tanh', dict(op='Tanh')), + ('softplus', dict(op='SoftPlus')), + ], + edges=[ + ('softplus', 'tanh'), + ('tanh', 'mul'), + ]) + + def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]): # Entry point for the transformation. + mul = match['mul'] # Get the Node corresponding to matched "mul" node. + mul_name = mul.soft_get('name', mul.id) + softplus = match['softplus'] # Get the Node corresponding to the matched "softplus" node. + + # Determine the input port of Mul which gets the 'input' node output. + input_port_idx = int(mul.in_port(0).get_connection().get_source().node.soft_get('op') == 'Tanh') + + # Check that the same tensor is provided as input to Mul and SoftPlus. + if mul.in_port(input_port_idx).get_source() != softplus.in_port(0).get_source(): + return + + mish = Mish(graph, {}).create_node() # Create Mish operation. + mish.in_port(0).connect(mul.in_port(input_port_idx).get_source()) # Connect input to the Mish. + mul.out_port(0).get_connection().set_source(mish.out_port(0)) # Reconnect outgoing edge from "mul" to Mish. + + # Rename the created Mish operation to have the name of the "mul" node, which produced the value equal to the + # Mish output. + rename_nodes([(mul, mul_name + '/TBR'), (mish, mul_name)]) + +.. _specific_operation_front_phase_transformations: + +Specific Operation Front Phase Transformations +############################################## + +This type of transformation is implemented using ``mo.front.common.replacement.FrontReplacementOp`` as base class and +works as follows: + +1. Define an operation type to trigger the transformation. +2. Model Optimizer searches for all nodes in the graph with the attribute ``op`` equal to the specified value. +3. Model Optimizer executes the defined function performing graph transformation for each instance of a matched node. You can override different functions in the base transformation class and Model Optimizer works differently: + + 1. The ``replace_sub_graph(self, graph, match)`` override method. In this case, Model Optimizer only executes the overridden function. Pass the ``graph`` object and a dictionary with a single key ``op`` with the matched node as value. You are required to write the transformation and connect the newly created nodes to the rest of the graph. + 2. The ``replace_op(self, graph, node)`` override method. In this case, Model Optimizer executes the overridden function. Pass the ``graph`` object and the matched node as ``node`` parameter. If the function returns an ``id`` of some node, then the ``Node`` with this ``id`` is connected to the consumers of the matched node. After applying the transformation, the matched node is removed from the graph. + +The ``FrontReplacementOp`` class provides a simpler mechanism to match a single operation with specific value of the ``op`` +(write the ``op`` attribute in the class instead of defining a ``pattern()`` function) attribute and perform the +transformation. + +Consider an example transformation from the ``extensions/front/Pack.py`` file, which replaces ``Pack`` operation from +the TensorFlow: + +.. code-block:: py + :force: + + from openvino.tools.mo.front.common.partial_infer.utils import int64_array + from openvino.tools.mo.front.common.replacement import FrontReplacementOp + from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs + from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes + from openvino.tools.mo.ops.concat import Concat + from openvino.tools.mo.ops.unsqueeze import Unsqueeze + + + class Pack(FrontReplacementOp): + op = "Pack" # Trigger transformation for all nodes in the graph with the op = "Pack" attribute + enabled = True # Transformation is enabled. + + def replace_op(self, graph: Graph, node: Node): # Entry point for the transformation. + # Create a Concat operation with a number of inputs equal to a number of inputs to Pack. + out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports())}).create_node() + pack_name = node.soft_get('name', node.id) + + for ind in node.in_ports(): + # Add dimension of size 1 to all inputs of the Pack operation and add them as Concat inputs. + unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([node.axis])}, + {'name': node.soft_get('name', node.id) + '/Unsqueeze'}) + node.in_port(ind).get_connection().set_destination(unsqueeze_node.in_port(0)) + unsqueeze_node.out_port(0).connect(out_node.in_port(ind)) + + # Rename the created Concat operation to have the name of the "pack" node, which produced the value equal to the + # Concat output. + rename_nodes([(node, pack_name + '/TBR'), (out_node, pack_name)]) + return [out_node.id] # Reconnect the Pack operation consumers to get input from Concat instead. + + +.. _generic_front_phase_transformations: + +Generic Front Phase Transformations +################################### + +Model Optimizer provides a mechanism to implement generic front phase transformation. This type of transformation is +implemented using ``mo.front.common.replacement.FrontReplacementSubgraph`` or +``mo.front.common.replacement.FrontReplacementPattern`` as base classes. Make sure the transformation is enabled before trying to execute it. +Then, Model Optimizer executes the ``find_and_replace_pattern(self, graph)`` method and +provides a ``Graph`` object as an input. + +Consider the example of a generic front transformation from the ``extensions/front/SqueezeNormalize.py`` file performing +normalization of the :doc:`Squeeze <../../../../openvino-ir-format/operation-sets/operation-specs/shape/squeeze-1>` operation. Older version of the operation had a list of +axes to squeeze as an attribute, but now it is a separate input. For backward compatibility, the Model Optimizer +operation supports both semantics. Before IR generation, however, the operation should be normalized according to the +specification. + +.. code-block:: py + :force: + + import logging as log + + from openvino.tools.mo.front.common.partial_infer.utils import int64_array + from openvino.tools.mo.front.common.replacement import FrontReplacementPattern + from openvino.tools.mo.graph.graph import Graph + from openvino.tools.mo.ops.const import Const + from openvino.tools.mo.utils.error import Error + + + class SqueezeNormalize(FrontReplacementPattern): + """ + Normalizes inputs of the Squeeze layers. The layers should have two inputs: the input with data and input with the + dimensions to squeeze. If the second input is omitted then all dimensions of size 1 should be removed. + """ + enabled = True # The transformation is enabled. + + def find_and_replace_pattern(self, graph: Graph): # The function is called unconditionally. + for squeeze_node in graph.get_op_nodes(op='Squeeze'): # Iterate over all nodes with op='Squeeze'. + # If the operation has only 1 input node and no 'squeeze_dims' Node attribute, then convert the attribute to + # the operation input. + if len(squeeze_node.in_nodes()) == 1 and squeeze_node.has_valid('squeeze_dims'): + dims_node = Const(graph, {'name': squeeze_node.id + '/Dims', + 'value': int64_array(squeeze_node.squeeze_dims)}).create_node() + squeeze_node.in_port(1).connect(dims_node.out_port(0)) + del squeeze_node['squeeze_dims'] + # If two inputs already exist, that means the operation is already normalized. + elif len(squeeze_node.in_nodes()) == 2: + log.debug('The Squeeze node "{}" is already normalized'.format(squeeze_node.name)) + # In all other cases, raise an error. + else: + raise Error('The Squeeze layer "{}" should either have 2 inputs or one input and an "squeeze_dims" ' + 'attribute'.format(squeeze_node.soft_get('name'))) + +For the details on implementation and how these front phase transformations work, refer to the ``mo/front/common/replacement.py`` +file. + +.. _node_name_pattern_front_phase_transformations: + +Node Name Pattern Front Phase Transformations +############################################# + +TensorFlow uses a mechanism of scope to group related operation nodes. It is a good practice to put nodes performing +particular task into the same scope. This approach divides a graph into logical blocks that are easier to review in the +TensorBoard. The scope, in fact, just defines a common name prefix for the nodes belonging to it. + +For example, Inception topologies contain several types of so-called **Inception blocks**. Some of them are equal to each +other, but located in different places of the network. For example, Inception V4 from the +`TensorFlow-Slim image classification model library `__ has +``Mixed_5b``, ``Mixed_5c`` and ``Mixed_5d`` inception blocks with exactly the same nodes, with the same set of attributes. + +Consider a situation when these Inception blocks are implemented extremely efficiently using a single Inference +Engine operation called ``InceptionBlock`` and these blocks in the model need to be replaced with instances of this operation. +Model Optimizer provides mechanism to trigger the transformation for a sub-graph of operations defined by the node name +regular expressions (scope). In this particular case, some of the patterns are: ``.*InceptionV4/Mixed_5b``, +``.*InceptionV4/Mixed_5c`` and ``.*InceptionV4/Mixed_5d``. Each pattern starts with ``.*``, because the ``InceptionV4`` prefix +is added to all nodes names during a model freeze. + +This type of transformation is implemented using ``mo.front.tf.replacement.FrontReplacementFromConfigFileSubGraph`` as a +base class and works as follows: + +1. Prepare a JSON configuration file template defining node names patterns. +2. Run Model Optimizer with the ``--tensorflow_custom_operations_config_update`` command-line parameter, and Model Optimizer adds information about input and output nodes of the specified sub-graphs. +3. Model Optimizer executes the defined transformation **only** when you specify the path to the configuration file updated in step 2 using the ``--transformations_config`` command-line parameter. + +Consider the following possible configuration file template for the Inception Block transformation: + +.. code-block:: json + + [ + { + "custom_attributes": { + "attr1_key": "attr1_value", + "attr2_key": 123456 + }, + "id": "InceptionBlockTransformation", + "instances": [ + ".*InceptionV4/Mixed_5b", + ".*InceptionV4/Mixed_5c", + ".*InceptionV4/Mixed_5d" + ], + "match_kind": "scope" + } + ] + +The configuration file contains a list of dictionaries. Each dictionary defines one transformation. Each transformation +is defined with several parameters: + +* ``id`` - **(Mandatory)** — is a unique identifier of the transformation. It is used in the Python code that implements the transformation to link the class and the transformation description from the configuration file. +* ``match_kind`` - **(Mandatory)** — is a string that specifies the matching algorithm. For the node name pattern case, the value should be equal to ``scope``. Another possible values are described in the dedicated sections below. +* ``instances`` - **(Mandatory)** — specifies instances of the sub-graph to be matched. It contains a list of node names prefixes patterns for the match kind of the ``scope`` type. +* ``custom_attributes`` - **(Optional)** — is a dictionary with attributes that can be used in the transformation code. + +After running Model Optimizer with additional ``--tensorflow_custom_operations_config_update`` parameter pointing to +the template configuration file, the content of the file should be updated with two new sections ``inputs`` and ``outputs``. +The file content after the update is as follows: + +.. code-block:: json + + [ + { + "id": "InceptionBlockTransformation", + "custom_attributes": { + "attr1_key": "attr1_value", + "attr2_key": 123456 + }, + "instances": [ + ".*InceptionV4/Mixed_5b", + ".*InceptionV4/Mixed_5c", + ".*InceptionV4/Mixed_5d" + ], + "match_kind": "scope", + "inputs": [ + [ + { + "node": "Branch_2/Conv2d_0a_1x1/Conv2D$", + "port": 0 + }, + { + "node": "Branch_3/AvgPool_0a_3x3/AvgPool$", + "port": 0 + }, + { + "node": "Branch_1/Conv2d_0a_1x1/Conv2D$", + "port": 0 + }, + { + "node": "Branch_0/Conv2d_0a_1x1/Conv2D$", + "port": 0 + } + ] + ], + "outputs": [ + { + "node": "concat$", + "port": 0 + } + ] + } + ] + +The value for ``inputs`` key is a list of lists describing input tensors of the sub-graph. Each element of the top-level +list corresponds to one unique input tensor of the sub-graph. Each internal list describes a list of nodes consuming +this tensor and port numbers, where the tensor is consumed. Model Optimizer generates regular expressions for the input +nodes names to uniquely identify them in each instance of the sub-graph, defined by the ``instances``. Denote these nodes +as input nodes of the sub-graph. + +In the InceptionV4 topology, the ``InceptionV4/Mixed_5b`` block has four input tensors from outside of the sub-graph, +but all of them are produced by the ``InceptionV4/Mixed_5a/concat`` node. Therefore, the top-level list of the ``inputs`` +contains one list corresponding to this tensor. Four input nodes of the sub-graph consume the tensor produced by +``InceptionV4/Mixed_5a/concat`` node. In this case, all four input nodes consume input tensor into "port 0". + +The order of items in the internal list describing nodes does not matter, but the order of elements in the top-level +list is important. This order defines how Model Optimizer attaches input tensors to a new generated +node if the sub-graph is replaced with a single node. The ``i``-th input node of the sub-graph is obtained using +``match.single_input_node(i)`` call in the sub-graph transformation code. More information about API is given below. If it is +necessary to change the order of input tensors, the configuration file can be edited in the text editor. + +The value for the ``outputs`` key is a list describing nodes of the sub-graph producing tensor, that goes outside of the +sub-graph or does not have child nodes. Denote these nodes as output nodes of the sub-graph. The order of elements in +the list is important. The ``i``-th element of the list describes the ``i``-th output tensor of the sub-graph, which could be +obtained using ``match.output_node(i)`` call. The order of elements can be manually changed in the configuration file. +Model Optimizer uses this order to connect output edges if the sub-graph is replaced with a single node. + +For more examples of this type of transformation, refer to the :doc:`Converting TensorFlow Object Detection API Models <../../legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection>` guide. + +.. _start_end_points_front_phase_transformations: + +Front Phase Transformations Using Start and End Points +###################################################### + +This type of transformation is implemented using ``mo.front.tf.replacement.FrontReplacementFromConfigFileSubGraph`` as a +base class and works as follows: + +1. Prepare a JSON configuration file that defines the sub-graph to match, using two lists of node names: "start" and "end" nodes. +2. Model Optimizer executes the defined transformation **only** when you specify the path to the configuration file using the ``--transformations_config`` command-line parameter . Model Optimizer performs the following steps to match the sub-graph: + + 1. Starts a graph traversal from every start node following the direction of the graph edges. The search stops in an end node or in the case of a node without consumers. All visited nodes are added to the matched sub-graph. + 2. Starts another graph traversal from each non-start node of the sub-graph, i.e. every node except nodes from the "start" list. In this step, the edges are traversed in the opposite edge direction. All newly visited nodes are added to the matched sub-graph. This step is needed to add nodes required for calculation values of internal nodes of the matched sub-graph. + 3. Checks that all "end" nodes were reached from "start" nodes. If not, it exits with an error. + 4. Checks that there are no :doc:`Parameter <../../../../openvino-ir-format/operation-sets/operation-specs/infrastructure/parameter-1>` operations among added nodes. If they exist, the sub-graph depends on the inputs of the model. Such configuration is considered incorrect so Model Optimizer exits with an error. + +This algorithm finds all nodes "between" start and end nodes and nodes needed for calculation of non-input nodes of the +matched sub-graph. + +The example of a JSON configuration file for a transformation with start and end points is +``extensions/front/tf/ssd_support_api_v1.15.json``: + +.. code-block:: json + + [ + { + "custom_attributes": { + "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", + "pad_mode": "caffe.ResizeParameter.CONSTANT", + "resize_mode": "caffe.ResizeParameter.WARP", + "clip_before_nms": false, + "clip_after_nms": true + }, + "id": "ObjectDetectionAPISSDPostprocessorReplacement", + "include_inputs_to_sub_graph": true, + "include_outputs_to_sub_graph": true, + "instances": { + "end_points": [ + "detection_boxes", + "detection_scores", + "num_detections" + ], + "start_points": [ + "Postprocessor/Shape", + "Postprocessor/scale_logits", + "Postprocessor/Tile", + "Postprocessor/Reshape_1", + "Postprocessor/Cast_1" + ] + }, + "match_kind": "points" + } + ] + +The format of the file is similar to the one provided as an example in the +:ref:`Node Name Pattern Front Phase Transformations ` section. The difference is in +the value of the ``match_kind`` parameter, which should be equal to the ``points`` and the format of the ``instances`` parameter, +which should be a dictionary with two keys ``start_points`` and ``end_points``, defining start and end node names +respectively. + +.. note:: + The ``include_inputs_to_sub_graph`` and ``include_outputs_to_sub_graph`` parameters are redundant and should be always equal to ``true``. + +.. note:: + This sub-graph match algorithm has a limitation that each start node must have only one input. Therefore, it is not possible to specify, for example, the :doc:`Convolution <../../../../openvino-ir-format/operation-sets/operation-specs/convolution/convolution-1>` node as input because it has two inputs: data tensor and tensor with weights. + +For other examples of transformations with points, refer to the +:doc:`Converting TensorFlow Object Detection API Models <../../legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-object-detection>` guide. + +.. _generic_transformations_config_front_phase_transformations: + +Generic Front Phase Transformations Enabled with Transformations Configuration File +################################################################################### + +This type of transformation works similarly to the :ref:`Generic Front Phase Transformations ` +but require a JSON configuration file to enable it similarly to +:ref:`Node Name Pattern Front Phase Transformations ` and +:ref:`Front Phase Transformations Using Start and End Points `. + +The base class for this type of transformation is +``mo.front.common.replacement.FrontReplacementFromConfigFileGeneral``. Model Optimizer executes the +``transform_graph(self, graph, replacement_descriptions)`` method and provides the ``Graph`` object and dictionary with values +parsed from the `custom_attributes` attribute of the provided JSON configuration file. + +The example of the configuration file for this type of transformation is ``extensions/front/tf/yolo_v1_tiny.json``: + +.. code-block:: json + + [ + { + "id": "TFYOLO", + "match_kind": "general", + "custom_attributes": { + "classes": 20, + "coords": 4, + "num": 2, + "do_softmax": 0 + } + } + ] + +and the corresponding transformation file is ``./extensions/front/YOLO.py``: + +.. code-block:: py + :force: + + from openvino.tools.mo.front.no_op_eraser import NoOpEraser + from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser + from openvino.tools.mo.ops.regionyolo import RegionYoloOp + from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral + from openvino.tools.mo.graph.graph import Node, Graph + from openvino.tools.mo.ops.result import Result + from openvino.tools.mo.utils.error import Error + + + class YoloRegionAddon(FrontReplacementFromConfigFileGeneral): + """ + Replaces all Result nodes in graph with YoloRegion->Result nodes chain. + YoloRegion node attributes are taken from configuration file + """ + replacement_id = 'TFYOLO' # The identifier matching the "id" attribute in the JSON file. + + def run_after(self): + return [NoOpEraser, StandaloneConstEraser] + + def transform_graph(self, graph: Graph, replacement_descriptions): + op_outputs = [n for n, d in graph.nodes(data=True) if 'op' in d and d['op'] == 'Result'] + for op_output in op_outputs: + last_node = Node(graph, op_output).in_node(0) + op_params = dict(name=last_node.id + '/YoloRegion', axis=1, end_axis=-1) + op_params.update(replacement_descriptions) + region_layer = RegionYoloOp(graph, op_params) + region_layer_node = region_layer.create_node([last_node]) + # In here, 'axis' from 'dim_attrs' can be removed to avoid permutation from axis = 1 to axis = 2. + region_layer_node.dim_attrs.remove('axis') + Result(graph).create_node([region_layer_node]) + graph.remove_node(op_output) + +The configuration file has only 3 parameters: ``id`` identifier of the transformation , ``match_kind`` (which should be equal +to ``general``) and the ``custom_attributes`` dictionary with custom attributes accessible in the transformation. + +.. _mo_middle_phase_transformations: + +============================ +Middle Phase Transformations +============================ + +There are two types of middle phase transformations: + +1. :ref:`Pattern-Defined Middle Phase Transformations ` triggered for each sub-graph of the original graph, isomorphic to the specified pattern. +2. :ref:`Generic Middle Phase Transformations `. + +.. _pattern_defined_middle_phase_transformations: + +Pattern-Defined Middle Phase Transformations +############################################ + +This type of transformation is implemented using ``mo.middle.replacement.MiddleReplacementPattern`` as a base class and +works similarly to the :ref:`Pattern-Defined Middle Phase Transformations ` +The are two differences: + +1. The transformation entry function name is ``replace_pattern(self, graph, match)``. +2. The pattern defining the graph should contain data nodes because the structure of the graph is different between front and middle phases. For more information about the graph structure changes, refer to the :ref:`Partial Inference `. + +For the example of a pattern-defined middle transformation, refer to the ``extensions/middle/L2NormToNorm.py`` file. + +.. _generic_middle_phase_transformations: + +Generic Middle Phase Transformations +#################################### + +Model Optimizer provides a mechanism to implement generic middle phase transformations. This type of transformation is +implemented using ``mo.middle.replacement.MiddleReplacementPattern`` as a base class and works similarly to the +:ref:`Generic Front Phase Transformations `. The only difference is that the +transformation entry function name is ``find_and_replace_pattern(self, graph: Graph)``. + +For the example of this transformation, refer to the ``extensions/middle/CheckForCycle.py`` file. + +.. _mo_back_phase_transformations: + +========================== +Back Phase Transformations +========================== + +There are two types of back phase transformations: + +1. :ref:`Pattern-Defined Back Phase Transformations ` triggered for each sub-graph of the original graph, isomorphic to the specified pattern. +2. :ref:`Generic Back Phase Transformations `. + +.. note:: + The graph layout during the back phase is always NCHW. However, during the front and middle phases it could be NHWC if the original model was using it. For more details, refer to :ref:`Model Conversion Pipeline `. + +.. _pattern_defined_back_phase_transformations: + +Pattern-Defined Back Phase Transformations +########################################## + +This type of transformation is implemented using ``mo.back.replacement.MiddleReplacementPattern`` as a base class and +works the same way as :ref:`Pattern-Defined Middle Phase Transformations `. + +For the example of a pattern-defined back transformation, refer to the ``extensions/back/ShufflenetReLUReorder.py`` file. + +.. _generic_back_phase_transformations: + +Generic Back Phase Transformations +################################## + +Model Optimizer provides mechanism to implement generic back phase transformations. This type of transformation is +implemented using ``mo.back.replacement.BackReplacementPattern`` as a base class and works the same way as +:ref:`Generic Middle Phase Transformations `. + +For the example of this transformation, refer to the ``extensions/back/GatherNormalizer.py`` file. + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../../legacy-model-optimizer-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections <../../legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification>` +* :doc:`Model Optimizer Extensions <../[legacy]-model-optimizer-extensions>` +* :doc:`Extending Model Optimizer with Caffe Python Layers <../[legacy]-extending-model-optimizer-with-caffe-python-layers>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-model-optimizer-operation.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-model-optimizer-operation.rst new file mode 100644 index 00000000000000..61c43f72dfade9 --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-model-optimizer-operation.rst @@ -0,0 +1,110 @@ +[LEGACY] Model Optimizer Operation +=================================== + +.. meta:: + :description: Learn about the Op class, that contains operation attributes, + which are set to a node of the graph created during model + conversion with Model Optimizer. + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../../openvino-extensibility/frontend-extensions>` article. + +Model Optimizer defines a ``mo.ops.Op`` class (``Op`` will be used later in the document to be short), which is a base class +for an operation used in the Model Optimizer. The instance of the ``Op`` class serves several purposes: + +1. Stores the operation attributes. +2. Stores the operation shape/value and type inference functions. +3. Defines operation attributes to be saved to the corresponding IR section. +4. Contains convenient methods to create a graph node from an ``Op`` object instance and connect it with the existing graph. +5. Used in the extractors to store parsed attributes and operation specific attributes in the dedicated graph node. + +It is important to mention that there is no connection between the instance of the ``Op`` class and the ``Node`` object +created from it. The ``Op`` class is just a container for attributes describing the operation. Model Optimizer uses the ``Op`` +class during a model conversion to create a node of the graph with attributes copied from the ``Op`` class instance. Graph +manipulations are performed with graph ``Nodes`` and their attributes and does not involve ``Ops``. + +There are a number of common attributes used in the operations. Below is the list of these attributes with description. + +* ``id`` — **(Mandatory)** — unique identifier of a node in a graph. Generated automatically, equal to the number of nodes in the graph plus 1 if not specified. +* ``name`` — **(Mandatory)** — name of the operation. Generated automatically, equal to the ``id`` if not specified. +* ``type`` — **(Mandatory)** — type of the operation according to the :doc:`opset specification <../../../../openvino-ir-format/operation-sets/available-opsets>`. For the internal Model Optimizer operations, this attribute should be set to ``None``. The model conversion fails if an operation with ``type`` equal to ``None`` comes to the IR emitting phase. +* ``version`` — **(Mandatory)** — the operation set (opset) name the operation belongs to. If not specified, Model Optimizer sets it equal to ``experimental``. For more information about operation sets, refer to :doc:`OpenVINO Model Representation <../../../../../openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation>` section. +* ``op`` — Model Optimizer type of the operation. In many cases, the value of ``type`` is equal to the value of ``op``. However, when Model Optimizer cannot instantiate the opset operation during model loading, it creates an instance of an internal operation. Thus, the attribute ``op`` is used as a type of this internal operation. Later in the pipeline, the node created from an internal operation will be replaced during front, middle or back phase with node(s) created from the opset. +* ``infer`` — the attribute defines a function calculating output tensor(s) shape and optional value(s). The attribute may be set to ``None`` for the internal Model Optimizer operations used during the front phase only. For more information about the shape inference function, refer to the :ref:`Partial Inference `. +* ``type_infer`` — the attribute defines a function calculating output tensor(s) data type. If the attribute is not defined, the default function is used. The function checks if the ``data_type`` node attribute is set and then propagates this type to the output tensor from the **port 0**. Otherwise, it propagates the data type of the tensor coming into the input **port 0** to the output tensor from the **port 0**. +* ``in_ports_count`` — default number of input ports to be created for the operation. Additional ports can be created or redundant ports can be removed using dedicated ``Node`` class API methods. +* ``out_ports_count`` — default number of output ports to be created for the operation. Additional ports can be created or redundant ports can be removed using dedicated ``Node`` class API methods. + +Below is an example of the Model Optimizer class for the :doc:`SoftMax <../../../../openvino-ir-format/operation-sets/operation-specs/activation/softmax-1>` operation from +the ``mo/ops/softmax.py`` file with the comments in code. + +.. code-block:: py + + class Softmax(Op): + # The class attribute defines a name of the operation so the operation class can be obtained using the + # "Op.get_op_class_by_name()" static method + op = 'SoftMax' + + # The operation works as an extractor by default. This is a legacy behavior, currently not recommended for use, + # thus "enabled" class attribute is set to False. The recommended approach is to use dedicated extractor extension. + enabled = False + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { # The constructor of the base class Op is called with additional default attributes. + 'type': __class__.op, # The operation is from the opset so the type is set to 'SoftMax'. + 'op': __class__.op, # Internal Model Optimizer operation has the same type. + 'version': 'opset1', # The operation corresponds to opset1. + 'infer': Softmax.infer, # Shape inference function is defined below. + 'axis': 1, # Default value for the "axis" attribute of the operation SoftMax. + 'in_ports_count': 1, # The operation has one input. + 'out_ports_count': 1, # The operation produces one output. + }, attrs) + + # The method returns operation specific attributes list. This method is important when implementing + # extractor inherited from CaffePythonFrontExtractorOp class to extract attribute for Caffe Python operation. + # However, it is currently used interchangeably with the "backend_attrs()" method. If the "backend_attrs()" is not used, + # then the "supported_attrs()" is used instead. In this particular case, the operation has just one attribute "axis". + def supported_attrs(self): + return ['axis'] + + @staticmethod + def infer(node: Node): + "some code calculating output shape and values" + +There is a dedicated method called ``backend_attrs()`` defining a list of attributes to be saved to the IR. Consider an +example from the ``mo/ops/pooling.py`` file: + +.. code-block:: py + + def backend_attrs(self): + return [ + ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))), + ('kernel', lambda node: ','.join(map(str, node['window'][node.spatial_dims]))), + + ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))), + ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))), + + ('pool-method', 'pool_method'), + ('exclude-pad', 'exclude_pad'), + + 'rounding_type', + 'auto_pad', + ] + +The ``backend_attrs()`` function returns a list of records. A record can be of one of the following formats: +1. A string defining the attribute to be saved to the IR. If the value of the attribute is ``None``, the attribute is not saved. Examples of this case are ``rounding_type`` and ``auto_pad``. +2. A tuple, where the first element is a string defining the name of the attribute as it will appear in the IR and the second element is a function to produce the value for this attribute. The function gets an instance of the ``Node`` as the only parameter and returns a string with the value to be saved to the IR. Examples of this case are ``strides``, ``kernel``, ``pads_begin`` and ``pads_end``. +3. A tuple, where the first element is a string defining the name of the attribute as it will appear in the IR and the second element is the name of the ``Node`` attribute to get the value from. Examples of this case are ``pool-method`` and ``exclude-pad``. + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../../legacy-model-optimizer-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections <../../legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification>` +* :doc:`Model Optimizer Extensions <../[legacy]-model-optimizer-extensions>` +* :doc:`Extending Model Optimizer with Caffe Python Layers <../[legacy]-extending-model-optimizer-with-caffe-python-layers>` + diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-optimizer-extractor.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-optimizer-extractor.rst new file mode 100644 index 00000000000000..5de7ae93f86a7c --- /dev/null +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility/[legacy]-model-optimizer-extensions/[legacy]-optimizer-extractor.rst @@ -0,0 +1,113 @@ +[LEGACY] Operation Extractor +============================= + +.. meta:: + :description: Learn about a deprecated generic extension in Model Optimizer, + which provides the operation extractor usable for all model + frameworks. + + +.. danger:: + + The code described here has been **deprecated!** Do not use it to avoid working with a legacy solution. It will be kept for some time to ensure backwards compatibility, but **you should not use** it in contemporary applications. + + This guide describes a deprecated TensorFlow conversion method. The guide on the new and recommended method, using a new frontend, can be found in the :doc:`Frontend Extensions <../../../../openvino-extensibility/frontend-extensions>` article. + +Model Optimizer runs specific extractor for each operation in the model during the model loading. + +There are several types of Model Optimizer extractor extensions: + +1. The generic one, which is described in this article. +2. The special extractor for Caffe models with Python layers. This kind of extractor is described in the :doc:`Extending Model Optimizer with Caffe Python Layers <../[legacy]-extending-model-optimizer-with-caffe-python-layers>` guide. + +Generic extension provides a generic mechanism for the operation extractor applicable for all frameworks. Model Optimizer provides the ``mo.front.extractor.FrontExtractorOp`` class as a base class to implement the extractor. It has the ``extract`` class method, which gets the only parameter ``Node``, which corresponds to the graph node to extract data from. The operation description in the original framework format is stored in the attribute ``pb`` of the node. The extractor goal is to parse this attribute and save necessary attributes to the corresponding node of the graph. Consider the extractor for the ``Const`` TensorFlow operation (refer to the ``extensions/front/tf/const_ext.py`` file): + +.. code-block:: py + :force: + + from openvino.tools.mo.front.extractor import FrontExtractorOp + from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content + from openvino.tools.mo.ops.const import Const + + + class ConstExtractor(FrontExtractorOp): + # The "op" class attribute defines a type of the operation in the framework (in this case it is a TensorFlow), + # for which the extractor should be triggered. + op = 'Const' + enabled = True # The flag that indicates that this extractor is enabled. + + @classmethod + def extract(cls, node): # The entry point of the extractor. + # The `node.pb` attribute stores the TensorFlow representation of the operation, which is a Protobuf message of the + # specific format. In particular, the message contains the attribute called "value" containing the description of + # the constant. The string "pb.attr["value"].tensor" is just a Python binding for Protobuf message parsing. + pb_tensor = node.pb.attr["value"].tensor + # Get the shape of the tensor from the protobuf message, using the helper function "tf_tensor_shape". + shape = tf_tensor_shape(pb_tensor.tensor_shape) + # Create a dictionary with necessary attributes. + attrs = { + 'shape': shape, + # Get the tensor value, using "tf_tensor_content" helper function. + 'value': tf_tensor_content(pb_tensor.dtype, shape, pb_tensor), + # Get the tensor data type, using "tf_dtype_extractor" helper function. + 'data_type': tf_dtype_extractor(pb_tensor.dtype), + } + # Update the node attributes, using default attributes from the "Const" operation and attributes saved to the + # "attrs" dictionary. + Const.update_node_stat(node, attrs) + return cls.enabled + +Consider another example with an extractor of the ``Constant`` ONNX operation (refer to the ``extensions/front/onnx/const_ext.py`` file): + +.. code-block:: py + :force: + + from onnx import numpy_helper + from onnx.numpy_helper import to_array + + from openvino.tools.mo.front.extractor import FrontExtractorOp + from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr + from openvino.tools.mo.ops.const import Const + + + class ConstantExtractor(FrontExtractorOp): + op = 'Constant' + enabled = True + + @classmethod + def extract(cls, node): + # Use "onnx_attr" helper method, which parses the Protobuf representation of the operation saved in the "node". + # Gets the value of the attribute with name "value" as "TensorProto" type (specified with a keyword "t"). + pb_value = onnx_attr(node, 'value', 't') + # Use "numpy_helper.to_array()" ONNX helper method to convert "TensorProto" object to a numpy array. + value = numpy_helper.to_array(pb_value) + + attrs = { + 'data_type': value.dtype, + 'value': value, + } + # Update the node attributes, using default attributes from the "Const" operation and attributes saved to the + # "attrs" dictionary. + Const.update_node_stat(node, attrs) + return cls.enabled + +The extractors for operations from different frameworks work similarly. The only difference is in the helper methods used to parse operation attributes encoded with a framework-specific representation. + +A common practice is to use ``update_node_stat()`` method of the dedicated ``Op`` class to update the node attributes. This method does the following: + +1. Sets values for common attributes like ``op``, ``type``, ``infer``, ``in_ports_count``, ``out_ports_count``, ``version`` to values specific to the dedicated operation (``Const`` operation in this case). +2. Uses ``supported_attrs()`` and ``backend_attrs()`` methods, defined in the ``Op`` class to update specific node attribute ``IE``. The IR emitter uses the value stored in the ``IE`` attribute to pre-process attribute values and save them to IR. +3. Optionally sets additional attributes provided to the ``update_node_stat()`` function as a second parameter. Usually these attributes are parsed from the particular instance of the operation. + +.. note:: + Model Optimizer uses numpy arrays to store values and numpy arrays of ``np.int64`` type to store shapes in the graph. + +==================== +Additional Resources +==================== + +* :doc:`Model Optimizer Extensibility <../../legacy-model-optimizer-extensibility>` +* :doc:`Graph Traversal and Modification Using Ports and Connections <../../legacy-model-optimizer-extensibility/[legacy]-graph-traversal-and-modification>` +* :doc:`Model Optimizer Extensions <../[legacy]-model-optimizer-extensions>` +* :doc:`Extending Model Optimizer with Caffe Python Layers <../[legacy]-extending-model-optimizer-with-caffe-python-layers>` + diff --git a/docs/articles_en/documentation/openvino-ecosystem.rst b/docs/articles_en/documentation/openvino-ecosystem.rst index 1975fe0a48a181..6735192e95f674 100644 --- a/docs/articles_en/documentation/openvino-ecosystem.rst +++ b/docs/articles_en/documentation/openvino-ecosystem.rst @@ -12,7 +12,6 @@ OpenVINO™ Ecosystem Overview :hidden: openvino-ecosystem/openvino-training-extensions - openvino-ecosystem/openvino-test-drive openvino-ecosystem/datumaro openvino-ecosystem/openvino-security-add-on @@ -103,19 +102,20 @@ development process, empowering teams to produce custom AI models at scale. |hr| -| **Intel® Test Drive** -| :bdg-link-dark:`Github ` - -OpenVINO™ Test Drive is cross-platform graphic user interface application that enables running -generative AI and vision models directly on your computer or edge device using OpenVINO™ Runtime. -|hr| - - | **Tokenizers** | :bdg-link-dark:`Github ` :bdg-link-success:`User Guide ` OpenVINO Tokenizers add text processing operations to OpenVINO. +|hr| + + +| **OpenVINO's Open Model Zoo** +| :bdg-link-dark:`Github ` + :bdg-link-success:`User Guide ` + +Open Model Zoo includes optimized deep learning models and a set of demos to +expedite development of high-performance deep learning inference applications. OpenVINO-based AI projects ########################## diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst index 043f05a90e2342..3959ebefb09a4a 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst @@ -735,7 +735,7 @@ How to Use the OpenVINO™ Security Add-on This section requires interactions between the Model Developer/Independent Software vendor and the User. All roles must complete all applicable :ref:`set up steps ` and :ref:`installation steps ` before beginning this section. -This document uses a face-detection model as an example. +This document uses the `face-detection-retail-0004 `__ model as an example. The following figure describes the interactions between the Model Developer, Independent Software Vendor, and User. @@ -793,8 +793,15 @@ Step 2: Create a key store and add a certificate to it Step 3: Create the model ------------------------ -Download a `model `__ in OpenVINO IR format to -the ``OVSA_DEV_ARTEFACTS/model`` directory. +This example uses ``curl`` to download the ``face-detection-retail-004`` model from the OpenVINO Model Zoo. If you are behind a firewall, check and set your proxy settings. + +Download a model from the Model Zoo: + +.. code-block:: sh + + curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_../legacy-features/model-zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_../legacy-features/model-zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin + +The model is downloaded to the ``OVSA_DEV_ARTEFACTS/model`` directory Step 4: Define access control for the model and create a master license for it ------------------------------------------------------------------------------- @@ -804,9 +811,9 @@ Define and enable the model access control and master license: .. code-block:: sh uuid=$(uuidgen) - /opt/ovsa/bin/ovsatool controlAccess -i model/.xml model/.bin -n "name of the model" -d "detailed name of the model" -p .dat -m .masterlic -k isv_keystore -g $uuid + /opt/ovsa/bin/ovsatool controlAccess -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g $uuid -The Intermediate Representation files for the model are encrypted as ``.dat`` and a master license is generated as ``.masterlic`` +The Intermediate Representation files for the ``face-detection-retail-0004`` model are encrypted as ``face_detection_model.dat`` and a master license is generated as ``face_detection_model.masterlic`` Step 5: Create a Runtime Reference TCB -------------------------------------- @@ -817,7 +824,7 @@ Generate the reference TCB for the runtime .. code-block:: sh - /opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f model_inference_runtime_vm.tcb -k isv_keystore + /opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f face_detect_runtime_vm.tcb -k isv_keystore Step 6: Publish the access controlled Model and Runtime Reference TCB @@ -849,7 +856,7 @@ Step 7: Receive a User Request .. code-block:: sh cd $OVSA_DEV_ARTEFACTS - /opt/ovsa/bin/ovsatool sale -m .masterlic -k isv_keystore -l 30daylicense.config -t detect_runtime_vm.tcb -p custkeystore.csr.crt -c .lic + /opt/ovsa/bin/ovsatool sale -m face_detection_model.masterlic -k isv_keystore -l 30daylicense.config -t face_detect_runtime_vm.tcb -p custkeystore.csr.crt -c face_detection_model.lic 4. Update the license server database with the license. @@ -857,13 +864,13 @@ Step 7: Receive a User Request .. code-block:: sh cd /opt/ovsa/DB - python3 ovsa_store_customer_lic_cert_db.py ovsa.db $OVSA_DEV_ARTEFACTS/.lic $OVSA_DEV_ARTEFACTS/custkeystore.csr.crt + python3 ovsa_store_customer_lic_cert_db.py ovsa.db $OVSA_DEV_ARTEFACTS/face_detection_model.lic $OVSA_DEV_ARTEFACTS/custkeystore.csr.crt 5. Provide these files to the User: - * ``.dat`` - * ``.lic`` + * ``face_detection_model.dat`` + * ``face_detection_model.lic`` Model User Instructions +++++++++++++++++++++++ @@ -923,14 +930,14 @@ Step 4: Receive and load the access controlled model into the OpenVINO™ Model 1. Receive the model as files named: - * .dat - * .lic + * face_detection_model.dat + * face_detection_model.lic .. code-block:: sh cd $OVSA_RUNTIME_ARTEFACTS - scp username@://OVSA/artefacts/.dat . - scp username@://OVSA/artefacts/.lic . + scp username@://OVSA/artefacts/face_detection_model.dat . + scp username@://OVSA/artefacts/face_detection_model.lic . 2. Prepare the environment: @@ -947,8 +954,8 @@ Step 4: Receive and load the access controlled model into the OpenVINO™ Model .. code-block:: sh cd $OVSA_RUNTIME_ARTEFACTS/../ovms - cp $OVSA_RUNTIME_ARTEFACTS/.dat model/fd/1/. - cp $OVSA_RUNTIME_ARTEFACTS/.lic model/fd/1/. + cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.dat model/fd/1/. + cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.lic model/fd/1/. cp $OVSA_RUNTIME_ARTEFACTS/custkeystore model/fd/1/. 4. Rename and edit ``sample.json`` to include the names of the access controlled model artefacts you received from the Model Developer. The file looks like this: @@ -969,7 +976,7 @@ Step 4: Receive and load the access controlled model into the OpenVINO™ Model "config":{ "name":"controlled-access-model", "base_path":"/sampleloader/model/fd", - "custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "controlled_access_file": ""} + "custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "controlled_access_file": "face_detection_model"} } } ] @@ -1003,7 +1010,7 @@ Step 6: Prepare to run Inference pip3 install futures==3.1.1 pip3 install tensorflow-serving-api==1.14.0 -3. Copy the ``detection.py`` from the example_client in ``/opt/ovsa/example_client`` +3. Copy the ``face_detection.py`` from the example_client in ``/opt/ovsa/example_client`` .. code-block:: sh @@ -1020,11 +1027,11 @@ Step 6: Prepare to run Inference Step 7: Run Inference --------------------- -Run the ``detection.py`` script: +Run the ``face_detection.py`` script: .. code-block:: sh - python3 detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert /var/OVSA/Modelserver/server.pem --client_cert /var/OVSA/Modelserver/client.pem --client_key /var/OVSA/Modelserver/client.key --model_name controlled-access-model + python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert /var/OVSA/Modelserver/server.pem --client_cert /var/OVSA/Modelserver/client.pem --client_key /var/OVSA/Modelserver/client.key --model_name controlled-access-model Summary diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst deleted file mode 100644 index 527a01bf38a6cf..00000000000000 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst +++ /dev/null @@ -1,109 +0,0 @@ -=============================================================================================== -OpenVINO™ Test Drive -=============================================================================================== - - -.. meta:: - :description: See how to test your models with OpenVINO, using a simple graphic interface of - Test Drive. - - - -OpenVINO™ Test Drive is a cross-platform graphic user interface application for running and -testing AI models, both generative and vision based. -It can run directly on your computer or on edge devices using -`OpenVINO™ Runtime `__. - -OpenVINO™ Test Drive is developed under the `openvino_testdrive repository `__. - -Use OpenVINO™ Test Drive to: - -* **Chat with LLMs** and evaluate model performance on your computer or edge device; -* **Experiment with different text prompts** to generate images, using Stable - Diffusion and Stable DiffusionXL models (coming soon); -* **Transcribe speech from video**, using Whisper models, including generation - of timestamps (coming soon); -* **Run inference of models** trained by Intel® Geti™ and **visualize the results**. - - - -Installation (Windows) -############################################################################################### - -1. Download the latest archive from the - `release repository `__. - To verify the integrity of the downloaded package, use the SHA-256 file attached. - -2. Extract the zip file and run the *MSIX* installation package. Click the `Install` button to - proceed. - -3. Launch OpenVINO™ Test Drive, clicking the application name in the Windows app list. - - -Quick start -############################################################################################### - -When starting the application, you can import an LLM model from Hugging Face Hub -or upload an Intel® Geti™ model from a local drive. - -Inference of models from Hugging Face -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -1. Find a model on `Hugging Face `__ and import it. - -2. Chat with LLMs via the `Playground` tab. - -3. Use the `Performance metrics` tab to get model performance metrics on your - computer or an edge device. - - - -Inference of models trained with Intel® Geti™ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -1. Download the deployment code for a model in the OpenVINO IR format trained - by Intel® Geti™ (refer to the `Intel® Geti™ documentation `__ - for more details). - -2. Import the deployment code into OpenVINO™ Test Drive, using the *Import model* and then - *Local disk* buttons. - -3. Use the *Live inference* tab to run and visualize results of inference of individual images. - -4. For batch inference, use the *Batch inference* tab and provide paths to the folder - with input images, as well as one for batch inference results. You can do so by filling out - the *Source folder* and *Destination folder* fields. Click *Start* to start batch inference. - - -Build the Application -############################################################################################### - -1. Make sure you `Install flutter SDK `__ - and all its platform-specific dependencies. -2. Build the bindings and place them in the **./bindings** folder. - - OpenVINO™ Test Drive uses bindings to `OpenVINO™ GenAI `__ - and `OpenVINO™ Model API `__, - which are located in the **./openvino_bindings** folder. Refer to the - `GitHub page `__ - for more details. - -3. Start the application, using the following command: - - .. code-block:: console - - flutter run - -Additional Resources -############################################################################################### - -- `OpenVINO™ `__ - a software toolkit - for optimizing and deploying deep learning models. -- `GenAI Repository `__ and - `OpenVINO Tokenizers `__ - - resources and tools for developing and optimizing Generative AI applications. -- `Intel® Geti™ `__ - software for building computer - vision models. -- `OpenVINO™ Model API `__ - - a set of wrapper classes for particular tasks and model architectures. - It simplifies routine procedures, preprocessing and postprocessing of data. diff --git a/docs/articles_en/documentation/openvino-extensibility.rst b/docs/articles_en/documentation/openvino-extensibility.rst index 6b2d0878bb687c..216135009b1806 100644 --- a/docs/articles_en/documentation/openvino-extensibility.rst +++ b/docs/articles_en/documentation/openvino-extensibility.rst @@ -32,7 +32,7 @@ Custom operations, which are not included in the list, are not recognized by Ope 1. A new or rarely used regular framework operation is not supported in OpenVINO yet. 2. A new user operation that was created for some specific model topology by the author of the model using framework extension capabilities. -Importing models with such operations requires additional steps. This guide illustrates the workflow for running inference on models featuring custom operations. This allows plugging in your own implementation for them. OpenVINO Extensibility API enables adding support for those custom operations and using one implementation for model conversion API and OpenVINO Runtime. +Importing models with such operations requires additional steps. This guide illustrates the workflow for running inference on models featuring custom operations. This allows plugging in your own implementation for them. OpenVINO Extensibility API enables adding support for those custom operations and using one implementation for Model Optimizer and OpenVINO Runtime. Defining a new custom operation basically consists of two parts: @@ -45,7 +45,7 @@ The first part is required for inference. The second part is required for succes Definition of Operation Semantics ################################# -If the custom operation can be mathematically represented as a combination of existing OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of existing operations. The next section of this document describes the way to map a custom operation. +If the custom operation can be mathematically represented as a combination of exiting OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of exiting operations. The next section of this document describes the way to map a custom operation. If such decomposition is not possible or appears too bulky with a large number of constituent operations that do not perform well, then a new class for the custom operation should be implemented, as described in the :doc:`Custom Operation Guide `. @@ -56,9 +56,21 @@ Mapping from Framework Operation Mapping of custom operation is implemented differently, depending on model format used for import. If a model is represented in the ONNX (including models exported from PyTorch in ONNX), TensorFlow Lite, PaddlePaddle or -TensorFlow formats, then you should use one of the classes from :doc:`Frontend Extension API `, -the application of which is described below. +TensorFlow formats, then one of the classes from :doc:`Frontend Extension API ` +should be used. It consists of several classes available in C++ which can be used with the ``--extensions`` option in Model Optimizer +or when a model is imported directly to OpenVINO runtime using the ``read_model`` method. +Python API is also available for runtime model import. +If you are implementing extensions for new ONNX, PaddlePaddle, TensorFlow Lite or TensorFlow frontends and plan to use the ``--extensions`` +option in Model Optimizer for model conversion, then the extensions should be: + +1. Implemented in C++ only. + +2. Compiled as a separate shared library (see details on how to do this further in this guide). + +Model Optimizer does not support new frontend extensions written in Python API. + +Remaining part of this guide describes application of Frontend Extension API for new frontends. Registering Extensions ###################### @@ -92,7 +104,7 @@ Extensions can be loaded from a code with the ``ov::Core::add_extension`` metho :fragment: [add_extension] -The ``Identity`` is a custom operation class defined in :doc:`Custom Operation Guide `. This is sufficient to enable reading OpenVINO IR which uses the ``Identity`` extension operation. In order to load original model directly to the runtime, add a mapping extension: +The ``Identity`` is a custom operation class defined in :doc:`Custom Operation Guide `. This is sufficient to enable reading OpenVINO IR which uses the ``Identity`` extension operation emitted by Model Optimizer. In order to load original model directly to the runtime, add a mapping extension: .. tab-set:: @@ -121,11 +133,11 @@ Create a Library with Extensions An extension library should be created in the following cases: -* Conversion of a model with custom operations in model conversion API +* Conversion of a model with custom operations in Model Optimizer. * Loading a model with custom operations in a Python application. This applies to both framework model and OpenVINO IR. * Loading models with custom operations in tools that support loading extensions from a library, for example the ``benchmark_app``. -To create an extension library, perform the following: +To create an extension library, for example, to load the extensions into Model Optimizer, perform the following: 1. Create an entry point for extension library. OpenVINO provides the ``OPENVINO_CREATE_EXTENSIONS()`` macro, which allows to define an entry point to a library with OpenVINO Extensions. This macro should have a vector of all OpenVINO Extensions as an argument. diff --git a/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst b/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst index 9717c6c8ac4e33..92914223ac123c 100644 --- a/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/custom-gpu-operations.rst @@ -40,8 +40,8 @@ There are two options for using the custom operation configuration file: :fragment: [part0] -All OpenVINO samples, except the trivial ``hello_classification``, -feature a dedicated command-line option ``-c`` to load custom kernels. +All OpenVINO samples, except the trivial ``hello_classification``, and most Open +Model Zoo demos feature a dedicated command-line option ``-c`` to load custom kernels. For example, to load custom operations for the classification sample, run the command below: .. code-block:: cpp @@ -49,6 +49,11 @@ For example, to load custom operations for the classification sample, run the co $ ./classification_sample -m /bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU -c /custom_layer_example.xml +.. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + .. _config-file-format: @@ -388,7 +393,3 @@ execution ends. For more information, refer to the `printf Function `__. -Additional Resources -#################### - -* Models in the OpenVINO IR format published on `Hugging Face `__. diff --git a/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst b/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst index 08b7c6f6b98018..115f149657821c 100644 --- a/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst +++ b/docs/articles_en/documentation/openvino-extensibility/frontend-extensions.rst @@ -14,6 +14,9 @@ Refer to :doc:`Introduction to OpenVINO Extension <../openvino-extensibility>` t understand the entire flow. This API is applicable to new frontends only, which exist for ONNX, TensorFlow Lite, PaddlePaddle, and TensorFlow. +If a different model format is used, follow legacy +:doc:`Model Optimizer Extensions <../legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility>` +guide. .. note:: diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst index 4b64b2177af361..9451fabd6219d8 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst @@ -312,11 +312,17 @@ This step is optional. It modifies the transformation function to a device-speci Result model overview ##################### -Let's explore the resnet-50-tf model, quantized to ``fp16``, which is a TensorFlow -implementation of `ResNet-50 `__ -- an image classification model pre-trained on the ImageNet dataset. Originally -redistributed in the "Saved model" format, converted to a frozen graph using the -"tf.graph_util" module. +Let's explore quantized `TensorFlow implementation of ResNet-50 `__ model. Use `Model Downloader `__ tool to download the ``fp16`` model from `OpenVINO™ Toolkit - Open Model Zoo repository `__: + +.. code-block:: sh + + omz_downloader --name resnet-50-tf --precisions FP16-INT8 + +After that you should quantize model by the `Model Quantizer `__ tool. + +.. code-block:: sh + + omz_quantizer --model_dir public/resnet-50-tf --dataset_dir --precisions=FP16-INT8 Inference @@ -340,7 +346,7 @@ Result model depends on different factors: Information about layer precision is stored in the performance counters that are -available from the OpenVINO Runtime API. For example, the part of performance counters table for the resnet-50-tf model inferred on CPU Plugin looks as follows: +available from the OpenVINO Runtime API. For example, the part of performance counters table for quantized `TensorFlow implementation of ResNet-50 `__ model inference on CPU Plugin looks as follows: .. list-table:: :header-rows: 1 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/generation/random-uniform-8.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/generation/random-uniform-8.rst index 26aad1eb161ace..4013f2151a1b6f 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/generation/random-uniform-8.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/generation/random-uniform-8.rst @@ -15,20 +15,12 @@ RandomUniform **Detailed description**: *RandomUniform* operation generates random numbers from a uniform distribution in the range ``[minval, maxval)``. -The generation algorithm is based on an underlying random integer generator that uses either Philox or Mersnne-Twister algorithm. -Both algorithms are counter-based pseudo-random generators, which produce uint32 values. A single algorithm invocation returns -four result random values, depending on the given initial values. For Philox, these values are *key* and *counter*, for Mersenne-Twister it is a single *state* value. *Key* and *counter* are initialized -with *global_seed* and *op_seed* attributes respectively, while the *state* is only initialized using *global_seed*. +The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm +is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns +four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized +with *global_seed* and *op_seed* attributes respectively. -Algorithm selection allows to align the output of OpenVINO's Random Uniform op with the ones available in Tensorflow and PyTorch. -The *alignment* attribute selects which framework the output should be aligned to. Tensorflow uses the Philox algorithm and PyTorch uses the Mersenne-Twister algorithm. -For Tensorflow, this function is equivalent to the function tf.raw_ops.RandomUniform(shape, dtype, global_seed, op_seed) when dtype represents a real number, and tf.raw_ops.RandomUniformInt(shape, min\_val, max\_val, dtype, global\_seed, op\_seed) for integer types. Internally, both of these functions are executed by tf.random.uniform(shape, min\_val, max\_val, dtype, global\_seed, op\_seed), where for floating-point dtype the output goes through additional conversion to reside within a given range. -For PyTorch, this function is equivalent to the function torch.Tensor(shape, dtype).uniform\_(min\_val, max\_val) when dtype represents a real number, and torch.Tensor(shape, dtype).random\_(min\_val, max\_val) for integer types. Internally, both of these functions are executed by torch.rand(shape, dtype) with default generator and layout. The seed of these functions is provided by calling torch.manual\_seed(global\_seed). op\_seed value is ignored. -By default, the output is aligned with Tensorflow (Philox algorithm). This behavior is backwards-compatibile. - -If both seed values are equal to zero, RandomUniform generates a non-deterministic sequence. - -**Philox Algorithm Explaination**: +If both seed values equal to zero, RandomUniform generates non-deterministic sequence. .. math:: @@ -176,7 +168,7 @@ For integer values: where *x* is uint32 random value. -Example 1. *RandomUniform* output with ``global_seed`` = 150, ``op_seed`` = 10, ``output_type`` = f32, ``alignment`` = TENSORFLOW: +Example 1. *RandomUniform* output with ``global_seed`` = 150, ``op_seed`` = 10, ``output_type`` = f32: .. code-block:: xml :force: @@ -187,7 +179,7 @@ Example 1. *RandomUniform* output with ``global_seed`` = 150, ``op_seed`` = 10, [0.5197197 0.22727466 0.991374 ]] -Example 2. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = double, ``alignment`` = TENSORFLOW: +Example 2. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = double: .. code-block:: xml :force: @@ -202,7 +194,7 @@ Example 2. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, [2.67008206 2.36423758]] -Example 3. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = i32, ``alignment`` = TENSORFLOW: +Example 3. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, ``output_type`` = i32: .. code-block:: xml :force: @@ -216,148 +208,6 @@ Example 3. *RandomUniform* output with ``global_seed`` = 80, ``op_seed`` = 100, output = [[65 70 56] [59 82 92]] -------------------------------------------------------- - -Mersenne-Twister Algorithm Explanation: - -Link to the original paper Mersenne Twister: Mersenne twister: a 623-dimensionally equidistributed uniform pseudo-random number generator __. - -The Mersenne-Twister algorithm generates random numbers by initializing a state array with a seed and then iterating through a series of transformations. -Suppose we have n which determines the n-th element of the random sequence. - -The initial state array is generated recursively using the following formula: - -.. math:: - - state[0] = global_seed & 0xffffffff; - state[i] = 1812433253 * state[i-1] ^ (state[i-1] >> 30) + i - -where the value of i cannot exceed 623. - -The output is generated by tempering the state array: - -.. math:: - - y = state[i]\ - y = y \oplus (y >> u)\ - y = y \oplus ((y << s) & b)\ - y = y \oplus ((y << t) & c)\ - y = y \oplus (y >> l) - -where u, s, t, l, b, and c are constants. - -Whenever all state values are 'used', a new state array is generated recursively as follows: - -.. math:: - - current_state = state[i] - next_state = state[i+1] if i+1 <= 623 else state[0] - next_m_state = state[i+m] if i+m <= 623 else state[i+m-623] - - twisted_state = (((current_state & 0x80000000) | (next_state & 0x7fffffff)) >> 1) ^ (next_state & 1 ? 0x9908b0df : 0) - state[i] = next_m_state ^ twisted_state - -where m is a constant. - -For parity with PyTorch, the value of the constants is set as follows: - -.. math:: - - u = 11 - s = 7 - b = 0x9d2c5680 - t = 15 - c = 0xefc60000 - l = 18 - m = 397 - -These values follow the recommendations from the linked paper for MT19937. - -To convert a given unsigned int value (denoted as x below) to a specific type, a simple conversion is performed. -For float32: - -.. math:: - - mantissa_digits = 24 (mantissa / significand bits count of float + 1, equal to std::numeric_limits::digits == FLT_MANT_DIG == 24) - mask = uint32(uint64(1) << mantissa_digits - 1) - divisor = float(1) / (uint64(1) << mantissa_digits) - output = float((x & mask) * divisor) - -For float16: - - mantissa_digits = 11 (mantissa / significand bits count of float16 + 1, equal to 11) - mask = uint32(uint64(1) << mantissa_digits - 1) - divisor = float(1) / (uint64(1) << mantissa_digits) - output = float16((x & mask) * divisor) - -For bfloat16: - - mantissa_digits = 8 (mantissa / significand bits count of bfloat16 + 1, equal to 8) - mask = uint32(uint64(1) << mantissa_digits - 1) - divisor = float(1) / (uint64(1) << mantissa_digits) - output = bfloat16((x & mask) * divisor) - -For float64 (double precision requires the use of two uint32 values, denoted as x and y below): - - value = uint64(x) << 32 + y - - mantissa_digits = 53 (mantissa / significand bits count of double + 1, equal to std::numeric_limits::digits == DBL_MANT_DIG == 53) - mask = uint64(1) << mantissa_digits - 1 - divisor = double(1) / (uint64(1) << mantissa_digits) - output = double((x & mask) * divisor) - -All of the floating - point types above after the conversion fall between the values of 0 and 1. To convert them to reside between a range , a simple operation is performed: - -.. math:: - - output = x * (max - min) + min - -For integer types, no special conversion operation is done except for int64 when either min or max exceeds the maximum possible value of uint32. A simple operation to standardize the values is performed. -The special behavior (optimization) for int64 matches the expected output for PyTorch, normally a concatenation of 2 uint32s always occurs. -In other words: - -.. math:: - - if output is of int32 dtype: - output = int32(x) - else if output is of int64 dtype and (min <= max(uint32) and max <= max(uint32)): - output = int64(x) - else: - output = int64(x << 32 + y) (uses 2 uint32s instead of one) - - output = output % (max - min) + min - -Example 1. RandomUniform output with initial_seed = 150, output_type = f32, alignment = PYTORCH: -.. code-block:: xml - :force: - - input_shape = [ 3, 3 ] \\ - output = [[0.6789123 0.31274895 0.91842768] \\ - [0.9312087 0.13456984 0.49623574] \\ - [0.5082716 0.23938411 0.97856429]] - - -Example 2. RandomUniform output with initial_seed = 80, output_type = double, alignment = PYTORCH: - -.. code-block:: xml - :force: - - input_shape = [ 2, 2 ] \\ - minval = 2 \\ - maxval = 10 \\ - output = [[8.34928537 6.12348725] \\ - [3.76852914 2.89564172]] - -Example 3. RandomUniform output with initial_seed = 80, output_type = i32, alignment = PYTORCH: - -.. code-block:: xml - :force: - - input_shape = [ 2, 3 ] \\ - minval = 50 \\ - maxval = 100 \\ - output = [[89 73 68] \\ - [95 78 61]] **Attributes**: @@ -384,14 +234,6 @@ Example 3. RandomUniform output with initial_seed = 80, output_type = i32, align * **Default value**: 0 * **Required**: *Yes* -* ``alignment`` - - * **Description**: the framework to align the output to. - * **Range of values**: TENSORFLOW, PYTORCH - * **Type**: `string` - * **Default value**: TENSORFLOW - * **Required**: *No* - **Inputs**: * **1**: ``shape`` - 1D tensor of type *T_SHAPE* describing output shape. **Required.** @@ -403,7 +245,7 @@ Example 3. RandomUniform output with initial_seed = 80, output_type = i32, align **Outputs**: -* **1**: A tensor with type specified by the attribute *output_type* and shape defined by ``shape`` input tensor, with values aligned to the framework selected by the ``alignment`` attribute. +* **1**: A tensor with type specified by the attribute *output_type* and shape defined by ``shape`` input tensor. **Types** diff --git a/docs/articles_en/documentation/openvino-security.rst b/docs/articles_en/documentation/openvino-security.rst index 03a99ba49e89e2..99cf13161bf243 100644 --- a/docs/articles_en/documentation/openvino-security.rst +++ b/docs/articles_en/documentation/openvino-security.rst @@ -55,8 +55,7 @@ Hardware-based protection such as Intel Software Guard Extensions (Intel SGX) ca decryption operation secrets and bind them to a device. For more information, see the `Intel Software Guard Extensions `__. -Use the `ov::Core::read_model <../api/c_cpp_api/group__ov__dev__exec__model.html#classov_1_1_core_1ae0576a95f841c3a6f5e46e4802716981>`__ -to set model representations and weights respectively. +Use the ``ov::Core::read_model`` to set model representations and weights respectively. Currently there is no way to read external weights from memory for ONNX models. The ``ov::Core::read_model(const std::string& model, const Tensor& weights)`` method @@ -66,24 +65,10 @@ should be called with ``weights`` passed as an empty ``ov::Tensor``. :language: cpp :fragment: part1 - -Encrypted models that have already been compiled, in the form of blob files, -can be loaded using the -`ov::Core::import_model <../api/c_cpp_api/group__ov__runtime__cpp__api.html#_CPPv4N2ov4Core12import_modelERNSt7istreamERKNSt6stringERK6AnyMap>`__ -method, as shown in the code sample below: - -.. code-block:: cpp - - ov::Core core; - // Import a model from a blob. - std::ifstream compiled_blob(blob, std::ios_base::in | std::ios_base::binary); - auto compiled_model = core.import_model(compiled_blob, "CPU"); - - Additional Resources #################### - Intel® Distribution of OpenVINO™ toolkit `home page `__. -- :doc:`Convert a Model <../openvino-workflow/model-preparation/convert-model-to-ir>`. +- :doc:`Convert a Model `. - :doc:`OpenVINO™ Runtime User Guide <../openvino-workflow/running-inference>`. - For more information on Sample Applications, see the :doc:`OpenVINO Samples Overview <../learn-openvino/openvino-samples>` diff --git a/docs/articles_en/get-started.rst b/docs/articles_en/get-started.rst index 9b46cc416605f3..28a39d3c0a4e84 100644 --- a/docs/articles_en/get-started.rst +++ b/docs/articles_en/get-started.rst @@ -62,14 +62,14 @@ OpenVINO provides a wide array of examples and documentation showing how to work OpenVINO Basics +++++++++++++++ -Learn the basics of working with models and inference in OpenVINO. Begin with “Hello World” Interactive Tutorials that show how to prepare models, run inference, and retrieve results using the OpenVINO API. Then, explore OpenVINO Code Samples that can be adapted for your own application. +Learn the basics of working with models and inference in OpenVINO. Begin with “Hello World” Interactive Tutorials that show how to prepare models, run inference, and retrieve results using the OpenVINO API. Then, explore other examples from the Open Model Zoo and OpenVINO Code Samples that can be adapted for your own application. .. _interactive-learn-openvino/interactive-tutorials-python: Interactive Tutorials - Jupyter Notebooks ----------------------------------------- -Start with :doc:`interactive Python ` that show the basics of model inference, the OpenVINO API, how to convert models to OpenVINO format, and more. +Start with :doc:`interactive Python ` that show the basics of model inferencing, the OpenVINO API, how to convert models to OpenVINO format, and more. * `Hello Image Classification `__ - Load an image classification model in OpenVINO and use it to apply a label to an image * `OpenVINO Runtime API Tutorial `__ - Learn the basic Python API for working with models in OpenVINO diff --git a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst index 4d1eb37007f59d..e10a67fddadb53 100644 --- a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst +++ b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst @@ -44,7 +44,6 @@ Below are the instructions on how to install the OpenCL packages on supported Li .. code-block:: sh apt-get install -y ocl-icd-libopencl1 intel-opencl-icd intel-level-zero-gpu level-zero - sudo usermod -a -G render $LOGNAME .. tab-item:: Ubuntu 20.04 LTS :sync: ubuntu-20 @@ -58,7 +57,6 @@ Below are the instructions on how to install the OpenCL packages on supported Li echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list && \ apt-get update apt-get update && apt-get install -y --no-install-recommends intel-opencl-icd intel-level-zero-gpu level-zero - sudo usermod -a -G render $LOGNAME Alternatively, download older `deb` version from `here `__. Note that older driver version might not include some of the bug fixes and might be not supported on some latest platforms. Check the supported hardware for the versions you are installing. @@ -137,6 +135,6 @@ Additional Resources * `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit `__ * `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit `__ * `Dockerfiles with Intel® Distribution of OpenVINO™ toolkit `__ -* `GPU Driver issue troubleshoot ` + diff --git a/docs/articles_en/get-started/configurations/genai-dependencies.rst b/docs/articles_en/get-started/configurations/genai-dependencies.rst index 4486890c3a40b8..59d29ef3108da0 100644 --- a/docs/articles_en/get-started/configurations/genai-dependencies.rst +++ b/docs/articles_en/get-started/configurations/genai-dependencies.rst @@ -4,12 +4,12 @@ OpenVINO™ GenAI Dependencies OpenVINO™ GenAI depends on both `OpenVINO `__ and `OpenVINO Tokenizers `__. During OpenVINO™ GenAI installation from PyPi, the same versions of OpenVINO and OpenVINO Tokenizers -are used (e.g. ``openvino==2024.6.0`` and ``openvino-tokenizers==2024.6.0.0`` are installed for -``openvino-genai==2024.6.0``). +are used (e.g. ``openvino==2024.5.0`` and ``openvino-tokenizers==2024.5.0.0`` are installed for +``openvino-genai==2024.5.0``). -Trying to update any of the dependency packages might result in a version incompatibility +Trying to update any of the dependency packages might result in a version incompatiblibty due to different Application Binary Interfaces (ABIs), which will result in errors while running -OpenVINO GenAI. Having package version in the ``...`` format, enables +OpenVINO GenAI. Having package version in the ``...`` format, allows changing the ```` portion of the full version to ensure ABI compatibility. Changing ````, ```` or ```` part of the version may break ABI. diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 7603adf37b7e89..be00804faa01d2 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -1,4 +1,4 @@ -Install OpenVINO™ 2024.6 +Install OpenVINO™ 2024.5 ========================== @@ -21,12 +21,12 @@ Install OpenVINO™ 2024.6 - + -OpenVINO 2024.6, described here, is not a Long-Term-Support version! +OpenVINO 2024.5, described here, is not a Long-Term-Support version! All currently supported versions are: -* 2024.6 (development) +* 2024.5 (development) * 2023.3 (LTS) @@ -38,7 +38,20 @@ All currently supported versions are: :doc:`Install OpenVINO GenAI Flavor <../learn-openvino/llm_inference_guide/genai-guide>` and :doc:`Run LLMs with OpenVINO GenAI Flavor <../learn-openvino/llm_inference_guide/genai-guide>`. +.. dropdown:: Deprecation of OpenVINO™ Development Tools Package + + The OpenVINO™ Development Tools package has been deprecated and removed from the default + installation options. For new projects, the OpenVINO runtime package now includes + all necessary components. + + The OpenVINO Development Tools is still available for older versions of OpenVINO, + as well as the current one, from the GitHub repository and PyPI. :doc:`Learn more <../documentation/legacy-features/install-dev-tools>`. + .. dropdown:: Building OpenVINO from Source OpenVINO Toolkit source files are available on GitHub as open source. If you want to build your own version of OpenVINO for your platform, follow the `OpenVINO Build Instructions `__. + + + + diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index 7224d63d0380b9..20965f2f22d095 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -58,7 +58,7 @@ Step 1: Download and Install the OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -73,9 +73,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu24_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu24_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu24_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu24_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 @@ -83,9 +83,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu22_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu22_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 @@ -93,9 +93,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: RHEL 8 :sync: rhel-8 @@ -103,18 +103,18 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_rhel8_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_rhel8_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: CentOS 7 :sync: centos-7 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_centos7_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_centos7_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_centos7_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_centos7_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM 64-bit @@ -122,25 +122,25 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_arm64.tgz -O openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_arm64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_arm64.tgz -O openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_arm64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM 32-bit :sync: arm-32 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_debian10_2024.6.0.17404.4c0f47d2335_armhf.tgz -O openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_debian10_2024.6.0.17404.4c0f47d2335_armhf /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_debian10_2024.5.0.17288.7975fa5da0c_armhf.tgz -O openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_debian10_2024.5.0.17288.7975fa5da0c_armhf /opt/intel/openvino_2024.5.0 5. Install required system dependencies on Linux. To do this, OpenVINO provides a script in the extracted installation directory. Run the following command: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2024.5.0 sudo -E ./install_dependencies/install_openvino_dependencies.sh 6. (Optional) Install *numpy* Python Library: @@ -149,11 +149,11 @@ Step 1: Download and Install the OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.6.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.5.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2024.5.0 python3 -m pip install -r ./python/requirements.txt 7. For simplicity, it is useful to create a symbolic link as below: @@ -162,7 +162,7 @@ Step 1: Download and Install the OpenVINO Core Components cd /opt/intel - sudo ln -s openvino_2024.6.0 openvino_2024 + sudo ln -s openvino_2024.5.0 openvino_2024 .. note:: If you have already installed a previous release of OpenVINO 2024, a symbolic link to the ``openvino_2024`` folder may already exist. @@ -277,4 +277,4 @@ Additional Resources * Converting models for use with OpenVINO™: :doc:`Convert a Model <../../../openvino-workflow/model-preparation>` * Writing your own OpenVINO™ applications: :doc:`OpenVINO™ Runtime User Guide <../../../openvino-workflow/running-inference>` * Sample applications: :doc:`OpenVINO™ Toolkit Samples Overview <../../../learn-openvino/openvino-samples>` -* Pre-trained deep learning models on `Hugging Face `__. +* Pre-trained deep learning models: :doc:`Overview of OpenVINO™ Toolkit Pre-Trained Models <../../../documentation/legacy-features/model-zoo>` diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst index 0cf2f5f31548dc..e4bff378106122 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst @@ -47,7 +47,7 @@ Step 1: Install OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -57,9 +57,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/macos/m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/macos/m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM, 64-bit :sync: arm-64 @@ -67,9 +67,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/macos/m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_arm64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_arm64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/macos/m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_arm64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_arm64 /opt/intel/openvino_2024.5.0 5. (Optional) Install *numpy* Python Library: @@ -78,11 +78,11 @@ Step 1: Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.6.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.5.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2024.5.0 python3 -m pip install -r ./python/requirements.txt 6. For simplicity, it is useful to create a symbolic link as below: @@ -90,7 +90,7 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - sudo ln -s /opt/intel/openvino_2024.6.0 /opt/intel/openvino_2024 + sudo ln -s /opt/intel/openvino_2024.5.0 /opt/intel/openvino_2024 .. note:: @@ -190,4 +190,4 @@ Additional Resources * :doc:`Convert models for use with OpenVINO™ <../../../openvino-workflow/model-preparation/convert-model-to-ir>` * :doc:`Write your own OpenVINO™ applications <../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` * Sample applications: :doc:`OpenVINO™ Toolkit Samples Overview <../../../learn-openvino/openvino-samples>` -* Pre-trained deep learning models on `Hugging Face `__ +* Pre-trained deep learning models: :doc:`Overview of OpenVINO™ Toolkit Pre-Trained Models <../../../documentation/legacy-features/model-zoo>` diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index 52d1d6d4be0814..9db280ec81472e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -41,18 +41,18 @@ Step 1: Download and Install OpenVINO Core Components ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. -2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. +2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. If you prefer using command-lines, run the following commands in the command prompt window you opened: .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/windows/w_openvino_toolkit_windows_2024.6.0.17404.4c0f47d2335_x86_64.zip --output openvino_2024.6.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/windows/w_openvino_toolkit_windows_2024.5.0.17288.7975fa5da0c_x86_64.zip --output openvino_2024.5.0.zip .. note:: - A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2024.6.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. + A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2024.5.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. 3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory. @@ -61,9 +61,9 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh - tar -xf openvino_2024.6.0.zip - ren w_openvino_toolkit_windows_2024.6.0.17404.4c0f47d2335_x86_64 openvino_2024.6.0 - move openvino_2024.6.0 "C:\Program Files (x86)\Intel" + tar -xf openvino_2024.5.0.zip + ren w_openvino_toolkit_windows_2024.5.0.17288.7975fa5da0c_x86_64 openvino_2024.5.0 + move openvino_2024.5.0 "C:\Program Files (x86)\Intel" 4. (Optional) Install *numpy* Python Library: @@ -72,11 +72,11 @@ Step 1: Download and Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2024.6.0\python`` folder: + You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2024.5.0\python`` folder: .. code-block:: sh - cd "C:\Program Files (x86)\Intel\openvino_2024.6.0" + cd "C:\Program Files (x86)\Intel\openvino_2024.5.0" python -m pip install -r .\python\requirements.txt @@ -85,7 +85,7 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh cd C:\Program Files (x86)\Intel - mklink /D openvino_2024 openvino_2024.6.0 + mklink /D openvino_2024 openvino_2024.5.0 .. note:: @@ -213,4 +213,4 @@ Additional Resources * :doc:`Convert models for use with OpenVINO™ <../../../openvino-workflow/model-preparation/convert-model-to-ir>` * :doc:`Write your own OpenVINO™ applications <../../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` * Sample applications: :doc:`OpenVINO™ Toolkit Samples Overview <../../../learn-openvino/openvino-samples>` -* Pre-trained deep learning models on `Hugging Face `__. +* Pre-trained deep learning models: :doc:`Overview of OpenVINO™ Toolkit Pre-Trained Models <../../../documentation/legacy-features/model-zoo>` diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst index 612a873e4ff5ed..b1710f3bb358e8 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst @@ -59,7 +59,14 @@ Now that you've installed OpenVINO Runtime, you can try the following things: * Learn more about :doc:`OpenVINO Workflow <../../../openvino-workflow>`. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <../../../openvino-workflow/model-preparation>`. -* See pre-trained deep learning models on `Hugging Face `__. +* See pre-trained deep learning models in our + :doc:`Open Model Zoo <../../../documentation/legacy-features/model-zoo>`. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + * Learn more about :doc:`Inference with OpenVINO Runtime <../../../openvino-workflow/running-inference>`. * See sample applications in :doc:`OpenVINO toolkit Samples Overview <../../../learn-openvino/openvino-samples>`. * Check out the OpenVINO `product home page `__. diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index df3c8c7e0dc53b..d1392d3f46a513 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -108,6 +108,7 @@ components by using: - ``libopenvino-pytorch-frontend`` - ``libopenvino-tensorflow-frontend`` - ``libopenvino-tensorflow-lite-frontend`` +- ``libopenvino-dev`` - ``libopenvino-python`` - ``libopenvino-arm-cpu-plugin`` diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst index bbfaa7817017ef..a10b0d0c7bbce4 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst @@ -47,24 +47,24 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu24_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu24_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu22_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu22_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu20_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu20_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: ARM 64-bit @@ -72,8 +72,8 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu20_2024.6.0.0_arm64.tar.gz -O openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu20_2024.5.0.0_arm64.tar.gz -O openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz Windows @@ -82,7 +82,7 @@ Windows .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/windows/openvino_genai_windows_2024.6.0.0_x86_64.zip --output openvino_genai_2024.6.0.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/windows/openvino_genai_windows_2024.5.0.0_x86_64.zip --output openvino_genai_2024.5.0.0.zip macOS ++++++++++++++++++++++++++ @@ -94,16 +94,16 @@ macOS .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/macos/openvino_genai_macos_12_6_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/macos/openvino_genai_macos_12_6_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: ARM, 64-bit :sync: arm-64 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/macos/openvino_genai_macos_12_6_2024.6.0.0_arm64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/macos/openvino_genai_macos_12_6_2024.5.0.0_arm64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz Here are the full guides: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst index 6d739b350f5b38..af9fe85528ca5d 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst @@ -81,7 +81,13 @@ Now that you've installed OpenVINO Runtime, you can try the following things: * Learn more about :doc:`OpenVINO Workflow <../../../openvino-workflow>`. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <../../../openvino-workflow/model-preparation>`. -* See pre-trained deep learning models on `Hugging Face `__. +* See pre-trained deep learning models in our :doc:`Open Model Zoo <../../../documentation/legacy-features/model-zoo>`. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + * Learn more about :doc:`Inference with OpenVINO Runtime <../../../openvino-workflow/running-inference>`. * See sample applications in :doc:`OpenVINO toolkit Samples Overview <../../../learn-openvino/openvino-samples>`. * Check out the OpenVINO `product home page `__ . diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst index fc413f194a1e63..970bb47a095d5b 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst @@ -190,7 +190,13 @@ You can also try the following things: * Learn more about :doc:`OpenVINO Workflow <../../../openvino-workflow>`. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <../../../openvino-workflow/model-preparation>`. -* See pre-trained deep learning models on `Hugging Face `__. +* See pre-trained deep learning models in our :doc:`Open Model Zoo <../../../documentation/legacy-features/model-zoo>`. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + * Learn more about :doc:`Inference with OpenVINO Runtime <../../../openvino-workflow/running-inference>`. * See sample applications in :doc:`OpenVINO toolkit Samples Overview <../../../learn-openvino/openvino-samples>`. * Take a glance at the OpenVINO `product home page `__ . diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst index bc589dfdb48a8b..127b26cac0590f 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst @@ -142,7 +142,13 @@ You can also try the following things: * Learn more about :doc:`OpenVINO Workflow <../../../openvino-workflow>`. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation <../../../openvino-workflow/model-preparation>`. -* See pre-trained deep learning models on `Hugging Face `__. +* See pre-trained deep learning models in our :doc:`Open Model Zoo <../../../documentation/legacy-features/model-zoo>`. + + .. important:: + + Due to the deprecation of Open Model Zoo, models in the OpenVINO IR format are now + published on `Hugging Face `__. + * Learn more about :doc:`Inference with OpenVINO Runtime <../../../openvino-workflow/running-inference>`. * See sample applications in :doc:`OpenVINO toolkit Samples Overview <../../../learn-openvino/openvino-samples>`. * Take a glance at the OpenVINO `product home page `__ . diff --git a/docs/articles_en/learn-openvino.rst b/docs/articles_en/learn-openvino.rst index 98797c9c67c126..4fca64051003a7 100644 --- a/docs/articles_en/learn-openvino.rst +++ b/docs/articles_en/learn-openvino.rst @@ -14,7 +14,7 @@ Learn OpenVINO Interactive Tutorials (Python) Sample Applications (Python & C++) - Generative AI workflow + Large Language Model Inference Guide @@ -29,5 +29,5 @@ as well as an experienced user. | :doc:`OpenVINO Samples ` | The OpenVINO samples (Python and C++) are simple console applications that show how to use specific OpenVINO API features. They can assist you in executing tasks such as loading a model, running inference, querying particular device capabilities, etc. -| :doc:`Generative AI workflow ` +| :doc:`Large Language Models in OpenVINO ` | Detailed information on how OpenVINO accelerates Generative AI use cases and what models it supports. This tutorial provides instructions for running Generative AI models using Hugging Face Optimum Intel and Native OpenVINO APIs. diff --git a/docs/articles_en/learn-openvino/interactive-tutorials-python/notebooks-installation.rst b/docs/articles_en/learn-openvino/interactive-tutorials-python/notebooks-installation.rst index ba7859a0c9f5d1..eb02caa06852fd 100644 --- a/docs/articles_en/learn-openvino/interactive-tutorials-python/notebooks-installation.rst +++ b/docs/articles_en/learn-openvino/interactive-tutorials-python/notebooks-installation.rst @@ -312,6 +312,8 @@ Installing notebooks 1. **Create a Virtual Environment** + If you already have installed *openvino-dev*, you may skip this step and proceed with the next one. + .. code-block:: sh python -m venv openvino_env @@ -362,6 +364,8 @@ Installing notebooks 1. **Create a Virtual Environment** + If you already have installed *openvino-dev*, you may skip this step and proceed with the next one. + .. code-block:: sh python3 -m venv openvino_env @@ -411,6 +415,8 @@ Installing notebooks 1. **Create a Virtual Environment** + If you already have installed *openvino-dev*, you may skip this step and proceed with the next one. + .. code-block:: sh python3 -m venv openvino_env diff --git a/docs/articles_en/learn-openvino/llm_inference_guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide.rst index 372c3b6d652bfc..36c001c015f744 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide.rst @@ -1,94 +1,140 @@ -Generative AI workflow +Large Language Model Inference Guide ======================================== .. meta:: - :description: learn how to use OpenVINO to run generative AI models. + :description: Explore learning materials, including interactive + Python tutorials and sample console applications that explain + how to use OpenVINO features. .. toctree:: :maxdepth: 1 :hidden: - Generative Model Preparation - Inference with OpenVINO GenAI - Inference with Optimum Intel + Run LLMs with Optimum Intel + Run LLMs on OpenVINO GenAI Flavor + Run LLMs on Base OpenVINO OpenVINO Tokenizers +Large Language Models (LLMs) like GPT are transformative deep learning networks capable of a +broad range of natural language tasks, from text generation to language translation. OpenVINO +optimizes the deployment of these models, enhancing their performance and integration into +various applications. This guide shows how to use LLMs with OpenVINO, from model loading and +conversion to advanced use cases. + +The advantages of using OpenVINO for LLM deployment: + +* **OpenVINO offers optimized LLM inference**: + provides a full C/C++ API, leading to faster operation than Python-based runtimes; includes a + Python API for rapid development, with the option for further optimization in C++. +* **Compatible with diverse hardware**: + supports CPUs, GPUs, and neural accelerators across ARM and x86/x64 architectures, integrated + Intel® Processor Graphics, discrete Intel® Arc™ A-Series Graphics, and discrete Intel® Data + Center GPU Flex Series; features automated optimization to maximize performance on target + hardware. +* **Requires fewer dependencies**: + than frameworks like Hugging Face and PyTorch, resulting in a smaller binary size and reduced + memory footprint, making deployments easier and updates more manageable. +* **Provides compression and precision management techniques**: + such as 8-bit and 4-bit weight compression, including embedding layers, and storage format + reduction. This includes fp16 precision for non-compressed models and int8/int4 for compressed + models, like GPTQ models from `Hugging Face `__. +* **Supports a wide range of deep learning models and architectures**: + including text, image, and audio generative models like Llama 2, MPT, OPT, Stable Diffusion, + Stable Diffusion XL. This enables the development of multimodal applications, allowing for + write-once, deploy-anywhere capabilities. +* **Enhances inference capabilities**: + fused inference primitives such as Scaled Dot Product Attention, Rotary Positional Embedding, + Group Query Attention, and Mixture of Experts. It also offers advanced features like in-place + KV-cache, dynamic quantization, KV-cache quantization and encapsulation, dynamic beam size + configuration, and speculative sampling. +* **Provides stateful model optimization**: + models from the Hugging Face Transformers are converted into a stateful form, optimizing + inference performance and memory usage in long-running text generation tasks by managing past + KV-cache tensors more efficiently internally. This feature is automatically activated for many + supported models, while unsupported ones remain stateless. Learn more about the + :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. + +OpenVINO offers three main paths for Generative AI use cases: + +* **Hugging Face**: use OpenVINO as a backend for Hugging Face frameworks (transformers, + diffusers) through the `Optimum Intel `__ + extension. +* **OpenVINO GenAI Flavor**: use OpenVINO GenAI APIs (Python and C++). +* **Base OpenVINO**: use OpenVINO native APIs (Python and C++) with + `custom pipeline code `__. + +In both cases, the OpenVINO runtime is used for inference, and OpenVINO tools are used for +optimization. The main differences are in footprint size, ease of use, and customizability. + +The Hugging Face API is easy to learn, provides a simple interface and hides the complexity of +model initialization and text generation for a better developer experience. However, it has more +dependencies, less customization, and cannot be ported to C/C++. + +The OpenVINO GenAI Flavor reduces the complexity of LLMs implementation by +automatically managing essential tasks like the text generation loop, tokenization, +and scheduling. The Native OpenVINO API provides a more hands-on experience, +requiring manual setup of these functions. Both methods are designed to minimize dependencies +and the overall application footprint and enable the use of generative models in C++ applications. + +It is recommended to start with Hugging Face frameworks to experiment with different models and +scenarios. Then the model can be used with OpenVINO APIs if it needs to be optimized +further. Optimum Intel provides interfaces that enable model optimization (weight compression) +using `Neural Network Compression Framework (NNCF) `__, +and export models to the OpenVINO model format for use in native API applications. + +Proceed to run LLMs with: - -Generative AI is a specific area of Deep Learning models used for producing new and “original” -data, based on input in the form of image, sound, or natural language text. Due to their -complexity and size, generative AI pipelines are more difficult to deploy and run efficiently. -OpenVINO™ simplifies the process and ensures high-performance integrations, with the following -options: - -.. tab-set:: - - .. tab-item:: OpenVINO™ GenAI - - | - Suggested for production deployment for the supported use cases. - | - Smaller footprint and fewer dependencies. - | - More optimization and customization options. - | - Available in both Python and C++. - | - A limited set of supported use cases. - - :doc:`Install the OpenVINO GenAI package <../get-started/install-openvino/install-openvino-genai>` - and run generative models out of the box. With custom - API and tokenizers, among other components, it manages the essential tasks such as the - text generation loop, tokenization, and scheduling, offering ease of use and high - performance. - - `Check out the OpenVINO GenAI Quick-start Guide [PDF] `__ - - .. tab-item:: Hugging Face integration - - | - Suggested for prototyping and, if the use case is not covered by OpenVINO GenAI, production. - | - Bigger footprint and more dependencies. - | - Limited customization due to Hugging Face dependency. - | - Not usable for C++ applications. - | - A very wide range of supported models. - - Using Optimum Intel is a great way to experiment with different models and scenarios, - thanks to a simple interface for the popular API and infrastructure offered by Hugging Face. - It also enables weight compression with - `Neural Network Compression Framework (NNCF) `__, - as well as conversion on the fly. For integration with the final product it may offer - lower performance, though. - - - -The advantages of using OpenVINO for generative model deployment: - -| **Fewer dependencies and smaller footprint** -| Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced - memory footprint, makes deployments easier and updates more manageable. - -| **Compression and precision management** -| Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage - format reduction. This includes fp16 precision for non-compressed models and int8/int4 for - compressed models, like GPTQ models from `Hugging Face `__. - -| **Enhanced inference capabilities** -| Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and - encapsulation, dynamic beam size configuration, and speculative sampling, and more are - available. - -| **Stateful model optimization** -| Models from the Hugging Face Transformers are converted into a stateful form, optimizing - inference performance and memory usage in long-running text generation tasks by managing past - KV-cache tensors more efficiently internally. This feature is automatically activated for - many supported models, while unsupported ones remain stateless. Learn more about the - :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. - -| **Optimized LLM inference** -| Includes a Python API for rapid development and C++ for further optimization, offering - better performance than Python-based runtimes. - - -Proceed to guides on: - -* :doc:`OpenVINO GenAI Flavor <./llm_inference_guide/genai-guide>` * :doc:`Hugging Face and Optimum Intel <./llm_inference_guide/llm-inference-hf>` -* `Generative AI with Base OpenVINO `__ - - +* :doc:`OpenVINO GenAI Flavor <./llm_inference_guide/genai-guide>` +* :doc:`Native OpenVINO API <./llm_inference_guide/llm-inference-native-ov>` + +The table below summarizes the differences between Hugging Face and the native OpenVINO API +approaches. + +.. dropdown:: Differences between Hugging Face and the native OpenVINO API + + .. list-table:: + :widths: 20 25 55 + :header-rows: 1 + + * - + - Hugging Face through OpenVINO + - OpenVINO Native API + * - Model support + - Supports transformer-based models such as LLMs + - Supports all model architectures from most frameworks + * - APIs + - Python (Hugging Face API) + - Python, C++ (OpenVINO API) + * - Model Format + - Source Framework / OpenVINO + - Source Framework / OpenVINO + * - Inference code + - Hugging Face based + - Custom inference pipelines + * - Additional dependencies + - Many Hugging Face dependencies + - Lightweight (e.g. numpy, etc.) + * - Application footprint + - Large + - Small + * - Pre/post-processing and glue code + - Provided through high-level Hugging Face APIs + - Must be custom implemented (see OpenVINO samples and notebooks) + * - Performance + - Good, but less efficient compared to native APIs + - Inherent speed advantage with C++, but requires hands-on optimization + * - Flexibility + - Constrained to Hugging Face API + - High flexibility with Python and C++; allows custom coding + * - Learning Curve and Effort + - Lower learning curve; quick to integrate + - Higher learning curve; requires more effort in integration + * - Ideal Use Case + - Ideal for quick prototyping and Python-centric projects + - Best suited for high-performance, resource-optimized production environments + * - Model Serving + - Paid service, based on CPU/GPU usage with Hugging Face + - Free code solution, run script for own server; costs may incur for cloud services + like AWS but generally cheaper than Hugging Face rates diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst index 60253779b0f3dc..5a641300a68edb 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst @@ -1,4 +1,4 @@ -Inference with OpenVINO GenAI +Run LLMs with OpenVINO GenAI Flavor on NPU ========================================== .. meta:: @@ -20,22 +20,21 @@ Install required dependencies: pip install nncf==2.12 onnx==1.16.1 optimum-intel==1.19.0 pip install --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly -Note that for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM -may be required to run prompts over 1024 tokens on models exceeding 7B parameters, +NOTE that for systems based on Intel® Core Ultra Processors Series 2 and 16 GB of RAM, +prompts longer then 1024 characters will not work with a model of 7B or more parameters, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B. Export an LLM model via Hugging Face Optimum-Intel ################################################## -Since **symmetrically-quantized 4-bit (INT4) models are preffered for inference on NPU**, make -sure to export the model with the proper conversion and optimization settings. +Since **symmetrically-quantized 4-bit (INT4) models are preffered for inference on NPU**, make sure to export +the model with the proper conversion and optimization settings. | You may export LLMs via Optimum-Intel, using one of two compression methods: | **group quantization** - for both smaller and larger models, | **channel-wise quantization** - remarkably effective but for models exceeding 1 billion parameters. -You select one of the methods by setting the ``--group-size`` parameter to either ``128`` or -``-1``, respectively. See the following examples: +You select one of the methods by setting the ``--group-size`` parameter to either ``128`` or ``-1``, respectively. See the following examples: .. tab-set:: @@ -90,7 +89,6 @@ which do not require specifying quantization parameters: | Below is a list of such models: * meta-llama/Meta-Llama-3-8B-Instruct -* meta-llama/Llama-3.1-8B * microsoft/Phi-3-mini-4k-instruct * Qwen/Qwen2-7B * mistralai/Mistral-7B-Instruct-v0.2 @@ -134,7 +132,6 @@ you need to add ``do_sample=False`` **to the** ``generate()`` **method:** int main(int argc, char* argv[]) { std::string model_path = "TinyLlama"; - ov::genai::LLMPipeline pipe(models_path, "NPU"); ov::genai::GenerationConfig config; config.do_sample=false; config.max_new_tokens=100; diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 43f9435bf79b1b..ebd4667d544616 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -1,4 +1,4 @@ -Inference with OpenVINO GenAI +Run LLM Inference on OpenVINO with the GenAI Flavor =============================================================================================== .. meta:: @@ -9,328 +9,39 @@ Inference with OpenVINO GenAI :hidden: NPU inference of LLMs + genai-guide/genai-use-cases -OpenVINO™ GenAI is a library of pipelines and methods, extending the OpenVINO runtime to work -with generative AI models more efficiently. This article provides reference code and guidance -on its usage. Note that the base OpenVINO version will not work with these instructions, -make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvino/install-openvino-genai>`. +This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering +loading a model and passing the input context to receive generated text. Note that the vanilla flavor of OpenVINO +will not work with these instructions, make sure to +:doc:`install OpenVINO GenAI <../../get-started/install-openvino/install-openvino-genai>`. -.. image:: ../../assets/images/genai_main_diagram.svg - :align: center - :alt: OpenVINO GenAI workflow diagram +.. note:: + The examples use the CPU as the target device, however, the GPU is also supported. + Note that for the LLM pipeline, the GPU is used only for inference, while token selection, tokenization, and + detokenization remain on the CPU, for efficiency. Tokenizers are represented as a separate model and also run + on the CPU. -| Here is sample code for several Generative AI use case scenarios. Note that these are very basic - examples and may need adjustments for your specific needs, like changing the inference device. -| For a more extensive instruction and additional options, see the - `step-by-step chat-bot guide <#chat-bot-use-case-step-by-step>`__ below. +1. Export an LLM model via Hugging Face Optimum-Intel. A chat-tuned TinyLlama model is used in this example: -.. dropdown:: Text-to-Image Generation + .. code-block:: python - OpenVINO GenAI introduces ``openvino_genai.Text2ImagePipeline`` for inference of text-to-image - models such as: as Stable Diffusion 1.5, 2.1, XL, LCM, Flex, and more. - See the following usage example for reference. + optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" - .. tab-set:: - - .. tab-item:: Python - :sync: python - - .. tab-set:: - - .. tab-item:: text2image.py - :name: text2image - - .. code-block:: python - - import argparse - - import openvino_genai - from PIL import Image - - - def main(): - parser = argparse.ArgumentParser() - parser.add_argument('model_dir') - parser.add_argument('prompt') - args = parser.parse_args() - - device = 'CPU' # GPU can be used as well - pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device) - - image_tensor = pipe.generate( - args.prompt, - width=512, - height=512, - num_inference_steps=20, - num_images_per_prompt=1) - - image = Image.fromarray(image_tensor.data[0]) - image.save("image.bmp") - - .. tab-item:: lora_text2image.py - :name: loratext2imagepy + *Optional*. Optimize the model: - .. code-block:: python + The model is an optimized OpenVINO IR with FP16 precision. For enhanced LLM performance, + it is recommended to use lower precision for model weights, such as INT4, and to compress weights + using NNCF during model export directly: - import openvino as ov - import openvino_genai - - def image_write(path: str, image_tensor: ov.Tensor): - from PIL import Image - image = Image.fromarray(image_tensor.data[0]) - image.save(path) - - - def main(): - parser = argparse.ArgumentParser() - parser.add_argument('models_path') - parser.add_argument('prompt') - args, adapters = parser.parse_known_args() - - prompt = args.prompt - - device = "CPU" # GPU, NPU can be used as well - adapter_config = openvino_genai.AdapterConfig() - - # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: - for i in range(int(len(adapters) / 2)): - adapter = openvino_genai.Adapter(adapters[2 * i]) - alpha = float(adapters[2 * i + 1]) - adapter_config.add(adapter, alpha) - - # LoRA adapters passed to the constructor will be activated by default in next generates - pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config) - - print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") - image = pipe.generate(prompt, - width=512, - height=896, - num_inference_steps=20, - rng_seed=42) - - image_write("lora.bmp", image) - print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") - image = pipe.generate(prompt, - # passing adapters in generate overrides adapters set in the constructor; openvino_genai.AdapterConfig() means no adapters - adapters=openvino_genai.AdapterConfig(), - width=512, - height=896, - num_inference_steps=20, - rng_seed=42) - image_write("baseline.bmp", image) - - - For more information, refer to the - `Python sample `__ - - .. tab-item:: C++ - :sync: cpp + .. code-block:: python - .. tab-set:: + optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" - .. tab-item:: text2image.cpp - :name: text2imagecpp - - .. code-block:: cpp - #include "openvino/genai/image_generation/text2image_pipeline.hpp" - - #include "imwrite.hpp" - - int32_t main(int32_t argc, char* argv[]) try { - OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); - - const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU can be used as well - - ov::genai::Text2ImagePipeline pipe(models_path, device); - ov::Tensor image = pipe.generate(prompt, - ov::genai::width(512), - ov::genai::height(512), - ov::genai::num_inference_steps(20), - ov::genai::num_images_per_prompt(1)); - - // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", image, true); - - return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - .. tab-item:: lora_text2image.cpp - :name: loratext2imagecpp - - .. code-block:: cpp - - #include "openvino/genai/image_generation/text2image_pipeline.hpp" - - #include "imwrite.hpp" - - int32_t main(int32_t argc, char* argv[]) try { - OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); - - const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well - - ov::genai::AdapterConfig adapter_config; - // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: - for(size_t i = 0; i < (argc - 3)/2; ++i) { - ov::genai::Adapter adapter(argv[3 + 2*i]); - float alpha = std::atof(argv[3 + 2*i + 1]); - adapter_config.add(adapter, alpha); - } - - // LoRA adapters passed to the constructor will be activated by default in next generates - ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); - - std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; - ov::Tensor image = pipe.generate(prompt, - ov::genai::width(512), - ov::genai::height(896), - ov::genai::num_inference_steps(20), - ov::genai::rng_seed(42)); - imwrite("lora.bmp", image, true); - - std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; - image = pipe.generate(prompt, - ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters - ov::genai::width(512), - ov::genai::height(896), - ov::genai::num_inference_steps(20), - ov::genai::rng_seed(42)); - imwrite("baseline.bmp", image, true); - - return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - For more information, refer to the - `C++ sample `__ - - -.. dropdown:: Speech Recognition - - The application performs inference on speech recognition Whisper Models. The samples include - the ``WhisperPipeline`` class and use audio files in WAV format at a sampling rate of 16 kHz - as input. - - .. tab-set:: - - .. tab-item:: Python - :sync: cpp - - .. code-block:: python - - import openvino_genai - import librosa - - - def read_wav(filepath): - raw_speech, samplerate = librosa.load(filepath, sr=16000) - return raw_speech.tolist() - - - def infer(model_dir: str, wav_file_path: str): - device = "CPU" # GPU or NPU can be used as well. - pipe = openvino_genai.WhisperPipeline(model_dir, device) - - # The pipeline expects normalized audio with a sampling rate of 16kHz. - raw_speech = read_wav(wav_file_path) - result = pipe.generate( - raw_speech, - max_new_tokens=100, - language="<|en|>", - task="transcribe", - return_timestamps=True, - ) - - print(result) - - for chunk in result.chunks: - print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") - - - For more information, refer to the - `Python sample `__. - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - #include "audio_utils.hpp" - #include "openvino/genai/whisper_pipeline.hpp" - - int main(int argc, char* argv[]) try { - if (3 > argc) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); - } - - std::filesystem::path models_path = argv[1]; - std::string wav_file_path = argv[2]; - std::string device = "CPU"; // GPU or NPU can be used as well. - - ov::genai::WhisperPipeline pipeline(models_path, device); - - ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); - config.max_new_tokens = 100; - config.language = "<|en|>"; - config.task = "transcribe"; - config.return_timestamps = true; - - // The pipeline expects normalized audio with a sampling rate of 16kHz. - ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); - auto result = pipeline.generate(raw_speech, config); - - std::cout << result << "\n"; - - for (auto& chunk : *result.chunks) { - std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; - } - - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; - } - - For more information, refer to the - `C++ sample `__. - - -.. dropdown:: Using GenAI in Chat Scenario - - For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache - across inputs may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific - methods are used to mark a conversation session, as shown in the samples below: +2. Perform generation using the new GenAI API: .. tab-set:: @@ -339,35 +50,9 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi .. code-block:: python - import openvino_genai - - - def streamer(subword): - print(subword, end='', flush=True) - return False - - - def infer(model_dir: str): - device = 'CPU' # GPU can be used as well. - pipe = openvino_genai.LLMPipeline(model_dir, device) - - config = openvino_genai.GenerationConfig() - config.max_new_tokens = 100 - - pipe.start_chat() - while True: - try: - prompt = input('question:\n') - except EOFError: - break - pipe.generate(prompt, config, streamer) - print('\n----------') - pipe.finish_chat() - - - - For more information, refer to the - `Python sample `__. + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) .. tab-item:: C++ :sync: cpp @@ -375,251 +60,27 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi .. code-block:: cpp #include "openvino/genai/llm_pipeline.hpp" + #include - int main(int argc, char* argv[]) try { - if (2 != argc) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); - } - std::string prompt; - std::string models_path = argv[1]; - - std::string device = "CPU"; // GPU, NPU can be used as well - ov::genai::LLMPipeline pipe(models_path, device); - - ov::genai::GenerationConfig config; - config.max_new_tokens = 100; - std::function streamer = [](std::string word) { - std::cout << word << std::flush; - return false; - }; - - pipe.start_chat(); - std::cout << "question:\n"; - while (std::getline(std::cin, prompt)) { - pipe.generate(prompt, config, streamer); - std::cout << "\n----------\n" - "question:\n"; - } - pipe.finish_chat(); - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; + int main(int argc, char* argv[]) { + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); } +The `LLMPipeline` is the main object used for decoding. You can construct it directly from the +folder with the converted model. It will automatically load the main model, tokenizer, detokenizer, +and the default generation configuration. - For more information, refer to the - `C++ sample `__ - - -.. dropdown:: Using GenAI with Vision Language Models - - OpenVINO GenAI introduces the ``openvino_genai.VLMPipeline`` pipeline for - inference of multimodal text-generation Vision Language Models (VLMs). - With a text prompt and an image as input, VLMPipeline can generate text using - models such as LLava or MiniCPM-V. See the chat scenario presented - in the samples below: - - .. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import numpy as np - import openvino_genai - from PIL import Image - from openvino import Tensor - from pathlib import Path - - - def streamer(subword: str) -> bool: - print(subword, end='', flush=True) - - - def read_image(path: str) -> Tensor: - pic = Image.open(path).convert("RGB") - image_data = np.array(pic.getdata()).reshape(1, pic.size[1], pic.size[0], 3).astype(np.uint8) - return Tensor(image_data) - - - def read_images(path: str) -> list[Tensor]: - entry = Path(path) - if entry.is_dir(): - return [read_image(str(file)) for file in sorted(entry.iterdir())] - return [read_image(path)] - - - def infer(model_dir: str, image_dir: str): - rgbs = read_images(image_dir) - device = 'CPU' # GPU can be used as well. - enable_compile_cache = dict() - if "GPU" == device: - enable_compile_cache["CACHE_DIR"] = "vlm_cache" - pipe = openvino_genai.VLMPipeline(model_dir, device, **enable_compile_cache) - - config = openvino_genai.GenerationConfig() - config.max_new_tokens = 100 - - pipe.start_chat() - prompt = input('question:\n') - pipe.generate(prompt, images=rgbs, generation_config=config, streamer=streamer) - - while True: - try: - prompt = input("\n----------\n" - "question:\n") - except EOFError: - break - pipe.generate(prompt, generation_config=config, streamer=streamer) - pipe.finish_chat() - - - For more information, refer to the - `Python sample `__. - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - #include "load_image.hpp" - #include - #include - - bool print_subword(std::string&& subword) { - return !(std::cout << subword << std::flush); - } - - int main(int argc, char* argv[]) try { - if (3 != argc) { - throw std::runtime_error(std::string{"Usage "} + argv[0] + " "); - } - - std::vector rgbs = utils::load_images(argv[2]); - - std::string device = "CPU"; // GPU can be used as well. - ov::AnyMap enable_compile_cache; - if ("GPU" == device) { - enable_compile_cache.insert({ov::cache_dir("vlm_cache")}); - } - ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache); - - ov::genai::GenerationConfig generation_config; - generation_config.max_new_tokens = 100; - - std::string prompt; - - pipe.start_chat(); - std::cout << "question:\n"; - - std::getline(std::cin, prompt); - pipe.generate(prompt, - ov::genai::images(rgbs), - ov::genai::generation_config(generation_config), - ov::genai::streamer(print_subword)); - std::cout << "\n----------\n" - "question:\n"; - while (std::getline(std::cin, prompt)) { - pipe.generate(prompt, - ov::genai::generation_config(generation_config), - ov::genai::streamer(print_subword)); - std::cout << "\n----------\n" - "question:\n"; - } - pipe.finish_chat(); - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - - For more information, refer to the - `C++ sample `__ - - -| - - -Chat-bot use case - step by step -############################################################################################### - -This example will show you how to create a chat-bot functionality, using the ``ov_genai.LLMPipeline`` -and a chat-tuned TinyLlama model. Apart from the basic implementation, it provides additional -optimization methods. - -Although CPU is used as inference device in the samples below, you may choose GPU instead. -Note that tasks such as token selection, tokenization, and detokenization are always handled -by CPU only. Tokenizers, represented as a separate model, are also run on CPU. - -Running the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -You start with exporting an LLM model via Hugging Face Optimum-Intel. Note that the precision -of ``int4`` is used, instead of the original ``fp16``, for better performance. The weight -compression is done by NNCF at the model export stage. The exported model contains all the -information necessary for execution, including the tokenizer/detokenizer and the generation -config, ensuring that its results match those generated by Hugging Face. - -The `LLMPipeline` is the main object to setup the model for text generation. You can provide the -converted model to this object, specify the device for inference, and provide additional -parameters. - - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: console - - optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" - - .. code-block:: python - - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path, "CPU") - print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: console - - optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" - - .. code-block:: cpp - - #include "openvino/genai/llm_pipeline.hpp" - #include - - int main(int argc, char* argv[]) { - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); - } - - +Once the model is exported from Hugging Face Optimum-Intel, it already contains all the information +necessary for execution, including the tokenizer/detokenizer and the generation config, ensuring that +its results match those generated by Hugging Face. Streaming the Output -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +########################### -For more interactive UIs during generation, you can stream output tokens. In this example, a -lambda function outputs words to the console immediately upon generation: +For more interactive UIs during generation, streaming of model output tokens is supported. See the example +below, where a lambda function outputs words to the console immediately upon generation: .. tab-set:: @@ -716,10 +177,12 @@ You can also create your custom streamer for more sophisticated processing: Optimizing Generation with Grouped Beam Search -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +####################################################### + +Leverage grouped beam search decoding and configure generation_config for better text generation +quality and efficient batch processing in GenAI applications. -For better text generation quality and more efficient batch processing, specify -``generation_config`` to leverage grouped beam search decoding. +Specify generation_config to use grouped beam search: .. tab-set:: @@ -756,123 +219,10 @@ For better text generation quality and more efficient batch processing, specify } -Efficient Text Generation via Speculative Decoding -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Speculative decoding (or assisted-generation) enables faster token generation -when an additional smaller draft model is used alongside the main model. This reduces the -number of infer requests to the main model, increasing performance. - -The draft model predicts the next K tokens one by one in an autoregressive manner. The main -model validates these predictions and corrects them if necessary - in case of -a discrepancy, the main model prediction is used. Then, the draft model acquires this token and -runs prediction of the next K tokens, thus repeating the cycle. - - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import openvino_genai - import queue - import threading - - def streamer(subword): - print(subword, end='', flush=True) - return False - - def infer(model_dir: str, draft_model_dir: str, prompt: str): - main_device = 'CPU' # GPU can be used as well. - draft_device = 'CPU' - - scheduler_config = openvino_genai.SchedulerConfig() - scheduler_config.cache_size = 2 - - draft_model = openvino_genai.draft_model(draft_model_dir, draft_device) - - pipe = openvino_genai.LLMPipeline(model_dir, main_device, scheduler_config=scheduler_config, draft_model=draft_model) - - config = openvino_genai.GenerationConfig() - config.max_new_tokens = 100 - config.num_assistant_tokens = 5 - - pipe.generate("The Sun is yellow because", config, streamer) - - - For more information, refer to the - `Python sample `__. - - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - #include - - #include "openvino/genai/llm_pipeline.hpp" - - int main(int argc, char* argv[]) try { - if (4 != argc) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " ''"); - } - - ov::genai::GenerationConfig config; - config.max_new_tokens = 100; - config.num_assistant_tokens = 5; - - std::string main_model_path = argv[1]; - std::string draft_model_path = argv[2]; - std::string prompt = argv[3]; - - std::string main_device = "CPU", draft_device = "CPU"; - - ov::genai::SchedulerConfig scheduler_config; - scheduler_config.cache_size = 5; - - ov::genai::LLMPipeline pipe( - main_model_path, - main_device, - ov::genai::draft_model(draft_model_path, draft_device), - ov::genai::scheduler_config(scheduler_config)); - - auto streamer = [](std::string subword) { - std::cout << subword << std::flush; - return false; - }; - - pipe.generate("The Sun is yellow because", config, streamer); - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - - For more information, refer to the - `C++ sample `__ - - - - - - - - Comparing with Hugging Face Results ####################################### -You can compare the results of the above example with those generated by Hugging Face models by -running the following code: +Compare and analyze results with those generated by Hugging Face models. .. tab-set:: @@ -900,34 +250,30 @@ running the following code: assert hf_output == ov_output - - - - - GenAI API ####################################### -The use case described here regards the following OpenVINO GenAI API classes: +OpenVINO GenAI Flavor includes the following API: + +* generation_config - defines a configuration class for text generation, enabling customization of the generation process such as the maximum length of the generated text, whether to ignore end-of-sentence tokens, and the specifics of the decoding strategy (greedy, beam search, or multinomial sampling). + +* llm_pipeline - provides classes and utilities for text generation, including a pipeline for processing inputs, generating text, and managing outputs with configurable options. -* generation_config - defines a configuration class for text generation, - enabling customization of the generation process such as the maximum length of - the generated text, whether to ignore end-of-sentence tokens, and the specifics - of the decoding strategy (greedy, beam search, or multinomial sampling). -* llm_pipeline - provides classes and utilities for processing inputs, - text generation, and managing outputs with configurable options. * streamer_base - an abstract base class for creating streamers. + * tokenizer - the tokenizer class for text encoding and decoding. -Learn more from the `GenAI API reference `__. +* visibility - controls the visibility of the GenAI library. + +Learn more in the `GenAI API reference `__. Additional Resources #################### * `OpenVINO GenAI Repo `__ * `OpenVINO GenAI Samples `__ -* A Jupyter notebook demonstrating - `Visual-language assistant with MiniCPM-V2 and OpenVINO `__ * `OpenVINO Tokenizers `__ * `Neural Network Compression Framework `__ + + diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst new file mode 100644 index 00000000000000..6033bd8ed96106 --- /dev/null +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -0,0 +1,426 @@ +GenAI Use Cases +===================== + +This article provides several use case scenarios for Generative AI model +inference. The applications presented in the code samples below +only require minimal configuration, like setting an inference device. Feel free +to explore and modify the source code as you need. + + +Using GenAI for Text-to-Image Generation +######################################## + +Examples below demonstrate inference on text-to-image models, like Stable Diffusion +1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` +sample shows basic usage of the ``Text2ImagePipeline`` pipeline. +:ref:`lora.cpp ` shows how to apply LoRA adapters to the pipeline. + + +.. tab-set:: + + .. tab-item:: Python + :sync: python + + .. tab-set:: + + .. tab-item:: main.py + :name: mainpy + + .. code-block:: python + + import openvino_genai + from PIL import Image + import numpy as np + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def infer(model_dir: str, prompt: str): + device = 'CPU' # GPU can be used as well + random_generator = Generator(42) + pipe = openvino_genai.Text2ImagePipeline(model_dir, device) + image_tensor = pipe.generate( + prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + random_generator=random_generator + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + .. tab-item:: LoRA.py + :name: lorapy + + .. code-block:: python + + import openvino as ov + import openvino_genai + import numpy as np + import sys + + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + + def infer(models_path: str, prompt: str): + prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" + + device = "CPU" # GPU, NPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + adapters=openvino_genai.AdapterConfig(), + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20 + ) + image_write("baseline.bmp", image) + + For more information, refer to the + `Python sample `__ + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: + + .. tab-item:: main.cpp + :name: maincpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1)); + + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + .. tab-item:: LoRA.cpp + :name: loracpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::AdapterConfig adapter_config; + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + + + + +Using GenAI in Speech Recognition +################################# + + +The application, shown in code samples below, performs inference on speech +recognition Whisper Models. The samples include the ``WhisperPipeline`` class +and use audio files in WAV format at a sampling rate of 16 kHz as input. + +.. tab-set:: + + .. tab-item:: Python + :sync: cpp + + .. code-block:: python + + import openvino_genai + import librosa + + + def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + + def infer(model_dir: str, wav_file_path: str): + device = "CPU" # GPU or NPU can be used as well. + pipe = openvino_genai.WhisperPipeline(model_dir, device) + + # The pipeline expects normalized audio with a sampling rate of 16kHz. + raw_speech = read_wav(wav_file_path) + result = pipe.generate( + raw_speech, + max_new_tokens=100, + language="<|en|>", + task="transcribe", + return_timestamps=True, + ) + + print(result) + + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "audio_utils.hpp" + #include "openvino/genai/whisper_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (3 > argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + } + + std::filesystem::path models_path = argv[1]; + std::string wav_file_path = argv[2]; + std::string device = "CPU"; // GPU or NPU can be used as well. + + ov::genai::WhisperPipeline pipeline(models_path, device); + + ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); + config.max_new_tokens = 100; + config.language = "<|en|>"; + config.task = "transcribe"; + config.return_timestamps = true; + + // The pipeline expects normalized audio with a sampling rate of 16kHz. + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + auto result = pipeline.generate(raw_speech, config); + + std::cout << result << "\n"; + + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__. + + +Using GenAI in Chat Scenario +############################ + +For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs +may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to +mark a conversation session, as shown in the samples below: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai + + + def streamer(subword): + print(subword, end='', flush=True) + return False + + + def infer(model_dir: str): + device = 'CPU' # GPU can be used as well. + pipe = openvino_genai.LLMPipeline(model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + pipe.generate(prompt, config, streamer) + print('\n----------') + pipe.finish_chat() + + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "openvino/genai/llm_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (2 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::function streamer = [](std::string word) { + std::cout << word << std::flush; + return false; + }; + + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, config, streamer); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + +Additional Resources +##################### + +* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>` +* `OpenVINO GenAI Repo `__ +* `OpenVINO GenAI Samples `__ +* `OpenVINO Tokenizers `__ diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst deleted file mode 100644 index e6d15675ea45b8..00000000000000 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst +++ /dev/null @@ -1,159 +0,0 @@ -Generative Model Preparation -=============================================================================== - -.. meta:: - :description: Learn how to use Hugging Face Hub and Optimum Intel APIs to - prepare generative models for inference. - - - -Since generative AI models tend to be big and resource-heavy, it is advisable to -optimize them for efficient inference. This article will show how to prepare -LLM models for inference with OpenVINO by: - -* `Downloading Models from Hugging Face <#download-generative-models-from-hugging-face-hub>`__ -* `Downloading Models from Model Scope <#download-generative-models-from-model-scope>`__ -* `Converting and Optimizing Generative Models <#convert-and-optimize-generative-models>`__ - - - -Download Generative Models From Hugging Face Hub -############################################################################### - -Pre-converted and pre-optimized models are available in the `OpenVINO Toolkit `__ -organization, under the `model section `__, or under -different model collections: - -* `LLM: `__ -* `Speech-to-Text `__ -* `Speculative Decoding Draft Models `__ - -You can also use the **huggingface_hub** package to download models: - -.. code-block:: console - - pip install huggingface_hub - huggingface-cli download "OpenVINO/phi-2-fp16-ov" --local-dir model_path - - -The models can be used in OpenVINO immediately after download. No dependencies -are required except **huggingface_hub**. - - -Download Generative Models From Model Scope -############################################################################### - -To download models from `Model Scope `__, -use the **modelscope** package: - -.. code-block:: console - - pip install modelscope - modelscope download --model "Qwen/Qwen2-7b" --local_dir model_path - -Models downloaded via Model Scope are available in Pytorch format only and they must -be :doc:`converted to OpenVINO IR <../../openvino-workflow/model-preparation/convert-model-to-ir>` -before inference. - -Convert and Optimize Generative Models -############################################################################### - -OpenVINO works best with models in the OpenVINO IR format, both in full precision and quantized. -If your selected model has not been pre-optimized, you can easily do it yourself, using a single -**optimum-cli** command. For that, make sure optimum-intel is installed on your system: - -.. code-block:: console - - pip install optimum-intel[openvino] - - -While optimizing models, you can decide to keep the original precision or select one that is lower. - -.. tab-set:: - - .. tab-item:: Keeping full model precision - :sync: full-precision - - .. code-block:: console - - optimum-cli export openvino --model --weight-format fp16 - - Examples: - - .. tab-set:: - - .. tab-item:: LLM (text generation) - :sync: llm-text-gen - - .. code-block:: console - - optimum-cli export openvino --model meta-llama/Llama-2-7b-chat-hf --weight-format fp16 ov_llama_2 - - .. tab-item:: Diffusion models (text2image) - :sync: diff-text-img - - .. code-block:: console - - optimum-cli export openvino --model stabilityai/stable-diffusion-xl-base-1.0 --weight-format fp16 ov_SDXL - - .. tab-item:: VLM (Image processing): - :sync: vlm-img-proc - - .. code-block:: console - - optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code –weight-format fp16 ov_MiniCPM-V-2_6 - - .. tab-item:: Whisper models (speech2text): - :sync: whisp-speech-txt - - .. code-block:: console - - optimum-cli export openvino --trust-remote-code --model openai/whisper-base ov_whisper - - .. tab-item:: Exporting to selected precision - :sync: low-precision - - .. code-block:: console - - optimum-cli export openvino --model --weight-format int4 - - Examples: - - .. tab-set:: - - .. tab-item:: LLM (text generation) - :sync: llm-text-gen - - .. code-block:: console - - optimum-cli export openvino --model meta-llama/Llama-2-7b-chat-hf --weight-format int4 ov_llama_2 - - .. tab-item:: Diffusion models (text2image) - :sync: diff-text-img - - .. code-block:: console - - optimum-cli export openvino --model stabilityai/stable-diffusion-xl-base-1.0 --weight-format int4 ov_SDXL - - .. tab-item:: VLM (Image processing) - :sync: vlm-img-proc - - .. code-block:: console - - optimum-cli export openvino -m model_path --task text-generation-with-past --weight-format int4 ov_MiniCPM-V-2_6 - - -.. note:: - - Any other ``model_id``, for example ``openbmb/MiniCPM-V-2_6``, or the path - to a local model file can be used. - - Also, you can specify different data type like ``int8``. - - -Additional Resources -############################################################################### - -* `Full set of optimum-cli parameters `__ -* :doc:`Model conversion in OpenVINO <../../openvino-workflow/model-preparation/convert-model-to-ir>` -* :doc:`Model optimization in OpenVINO <../../openvino-workflow/model-optimization>` diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst index 4fec1acd23e6a7..a26b670b5314d0 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst @@ -1,4 +1,4 @@ -Inference with Optimum Intel +Run LLMs with Hugging Face and Optimum Intel =============================================================================================== .. meta:: @@ -276,10 +276,9 @@ includes **Dynamic quantization** of activations of 4/8-bit quantized MatMuls an ov_config={"KV_CACHE_PRECISION": "u8", "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32", "PERFORMANCE_HINT": "LATENCY"} ) - .. note:: - Currently, for KV-cache quantization, GPU ignores the DYNAMIC_QUANTIZATION_GROUP_SIZE property, using ``group_size = head_size``. Additionally, it does not support the ``get_state()`` and ``set_state()`` APIs when KV-cache quantization is enabled. +.. note:: - For GPU, KV-cache quantization is enabled by default on platforms without XMX support, and can be disabled by setting KV_CACHE_PRECISION to ``undefined``. + Currently, both Dynamic quantization and KV-cache quantization are available for CPU device. Working with Models Tuned with LoRA diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst new file mode 100644 index 00000000000000..2476a0423e30e1 --- /dev/null +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst @@ -0,0 +1,192 @@ +Run LLM Inference on Native OpenVINO (not recommended) +=============================================================================================== + +To run Generative AI models using native OpenVINO APIs you need to follow regular +**Convert -> Optimize -> Deploy** path with a few simplifications. + +To convert a model from `Hugging Face `__, you can use +Optimum-Intel export feature that allows you to export model in the OpenVINO format without +invoking conversion API and tools directly. In this case, the conversion process is a bit +more simplified. You can still use a regular conversion path if the model comes from +outside of Hugging Face ecosystem, i.e., in source framework format (PyTorch, etc.) + +Model optimization can be performed within Hugging Face or directly using NNCF as described in +:doc:`Weight Compression <../../openvino-workflow/model-optimization-guide/weight-compression>`. + +.. note:: + + It is recommended to use models in 4-bit precision, as maintaining the model in its + original precision may result in significantly decreased performance. + +Inference code that uses native API cannot benefit from Hugging Face pipelines. +You need to write your custom code or take it from the available examples. Below are +some examples of popular Generative AI scenarios: + +* In case of LLMs for text generation, you need to handle tokenization, inference and + token selection loop, and de-tokenization. If token selection involves beam search, + it also needs to be written. +* For image generation models, you need to make a pipeline that includes several model + inferences: inference for source (e.g., text) encoder models, inference loop for + diffusion process and inference for the decoding part. Scheduler code is also required. + +To write such pipelines, you can follow the examples provided as part of OpenVINO: + +* `OpenVINO Latent Consistency Model C++ image generation pipeline `__ +* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline `__ + +To perform inference, models must be first converted to OpenVINO IR format using +Hugging Face Optimum-Intel API. + +An inference pipeline for a text generation LLM is set up in the following stages: + +1. Read and compile the model in OpenVINO IR. +2. Pre-process text prompt with a tokenizer and set the result as model inputs. +3. Run token generation loop. +4. De-tokenize outputs. + +Prerequisites +######################## + +Linux operating system (as of the current version). + +**Installation** + +1. Create a virtual environment + + .. code-block:: python + + python -m venv openvino_llm + + ``openvino_llm`` is an example name; you can choose any name for your environment. + +2. Activate the virtual environment + + .. code-block:: python + + source openvino_llm/bin/activate + +3. Install OpenVINO tokenizers and dependencies + + .. code-block:: python + + pip install optimum[openvino] + + +Convert Hugging Face tokenizer and model to OpenVINO IR format +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +**Convert Tokenizer** + +`OpenVINO Tokenizers `__ +come equipped with a CLI tool that facilitates the conversion of tokenizers +from either the Hugging Face Hub or those saved locally to the OpenVINO IR format: + +.. code-block:: python + + convert_tokenizer microsoft/Llama2-7b-WhoIsHarryPotter --with-detokenizer -o openvino_tokenizer + +In this example, the ``microsoft/Llama2-7b-WhoIsHarryPotter tokenizer`` is transformed from the Hugging +Face hub. You can substitute this tokenizer with one of your preference. You can also rename +the output directory (``openvino_tokenizer``). + +**Convert Model** + +The optimum-cli command can be used for converting a Hugging Face model to the OpenVINO IR model format. +Learn more in Loading an LLM with OpenVINO. + +.. code-block:: python + + optimum-cli export openvino --convert-tokenizer --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 openvino_model + +Full OpenVINO Text Generation Pipeline +###################################################################### + +1. Import and Compile Models ++++++++++++++++++++++++++++++++++++++++ + +Use the model and tokenizer converted from the previous step: + +.. code-block:: python + + import numpy as np + from openvino import compile_model + + # Compile the tokenizer, model, and detokenizer using OpenVINO. These files are XML representations of the models optimized for OpenVINO + compiled_tokenizer = compile_model("openvino_tokenizer.xml") + compiled_model = compile_model("openvino_model.xml") + compiled_detokenizer = compile_model("openvino_detokenizer.xml") + +2. Tokenize and Transform Input ++++++++++++++++++++++++++++++++++++++++ + +Tokenization is a mandatory step in the process of generating text using LLMs. Tokenization +converts the input text into a sequence of tokens, which are essentially the format that the +model can understand and process. The input text string must be tokenized and set up in the +structure expected by the model before running inference. + +.. code-block:: python + + text_input = ["Quick brown fox was"] + ov_input = compiled_tokenizer(text_input) + +3. Generate Tokens ++++++++++++++++++++++++++++++++++++++++ + +The core of text generation lies in the inference and token selection loop. In each iteration +of this loop, the model runs inference on the input sequence, generates and selects a new token, +and appends it to the existing sequence. + +.. code-block:: python + + # Define the number of new tokens to generate + new_tokens_size = 10 + + # Determine the size of the existing prompt + prompt_size = ov_input["input_ids"].shape[-1] + + # Prepare the input dictionary for the model + # It combines existing tokens with additional space for new tokens + input_dict = { + output.any_name: np.hstack([tensor, np.zeros(shape=(1, new_tokens_size), dtype=np.int_)]) + for output, tensor in ov_input.items() + } + + # Generate new tokens iteratively + for idx in range(prompt_size, prompt_size + new_tokens_size): + # Get output from the model + output = compiled_model(input_dict)["token_ids"] + # Update the input_ids with newly generated token + input_dict["input_ids"][:, idx] = output[:, idx - 1] + # Update the attention mask to include the new token + input_dict["attention_mask"][:, idx] = 1 + +4. Decode and Display Output ++++++++++++++++++++++++++++++++++++++++ + +The final step in the process is de-tokenization, where the sequence of token IDs generated by +the model is converted back into human-readable text. +This step is essential for interpreting the model's output. + +.. code-block:: python + + # Extract token IDs for the final output + ov_token_ids = input_dict["input_ids"] + # Decode the model output back to string + ov_output = compiled_detokenizer(ov_token_ids)["string_output"] + print(f"OpenVINO output string: `{ov_output}`") + +.. code-block:: python + + # Example output: + [' Quick brown fox was walking through the forest. He was looking for something'] + + +Additional Resources +#################### + +* `OpenVINO GenAI Repo `__ +* `OpenVINO Tokenizers `__ +* `Neural Network Compression Framework `__ +* :doc:`Stateful Models Low-Level Details <../../openvino-workflow/running-inference/stateful-models>` +* :doc:`Working with Textual Data <../../openvino-workflow/running-inference/string-tensors>` + diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst index 5a706061777594..390fe00605f2c6 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst @@ -30,7 +30,7 @@ Basic Usage The benchmarking application works with models in the OpenVINO IR (``model.xml`` and ``model.bin``) and ONNX (``model.onnx``) formats. - Make sure to :doc:`convert your models <../../openvino-workflow/model-preparation/convert-model-to-ir>` + Make sure to :doc:`convert your models <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` if necessary. To run benchmarking with default options on a model, use the following command: @@ -56,7 +56,7 @@ Basic Usage The benchmarking application works with models in the OpenVINO IR, TensorFlow, TensorFlow Lite, PaddlePaddle, PyTorch and ONNX formats. If you need it, - OpenVINO also allows you to :doc:`convert your models <../../openvino-workflow/model-preparation/convert-model-to-ir>`. + OpenVINO also allows you to :doc:`convert your models <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>`. To run benchmarking with default options on a model, use the following command: @@ -349,7 +349,7 @@ following usage message: [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]] [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-iop INPUT_OUTPUT_PRECISION] [--mean_values [R,G,B]] [--scale_values [R,G,B]] - [-nthreads NUMBER_THREADS] [-pin {YES,NO}] [-latency_percentile LATENCY_PERCENTILE] + [-nthreads NUMBER_THREADS] [-pin {YES,NO,NUMA,HYBRID_AWARE}] [-latency_percentile LATENCY_PERCENTILE] [-report_type {no_counters,average_counters,detailed_counters}] [-report_folder REPORT_FOLDER] [-pc [PERF_COUNTS]] [-pcsort {no_sort,sort,simple_sort}] [-pcseq [PCSEQ]] [-exec_graph_path EXEC_GRAPH_PATH] [-dump_config DUMP_CONFIG] [-load_config LOAD_CONFIG] @@ -462,8 +462,10 @@ following usage message: -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS Number of threads to use for inference on the CPU (including HETERO and MULTI cases). - -pin {YES,NO}, --infer_threads_pinning {YES,NO} - Optional. Enable threads->cores pinning for CPU-involved inference. + -pin {YES,NO,NUMA,HYBRID_AWARE}, --infer_threads_pinning {YES,NO,NUMA,HYBRID_AWARE} + Optional. Enable threads->cores ('YES' which is OpenVINO runtime's default for conventional CPUs), threads->(NUMA)nodes ('NUMA'), + threads->appropriate core types ('HYBRID_AWARE', which is OpenVINO runtime's default for Hybrid CPUs) or completely disable ('NO') CPU threads + pinning for CPU-involved inference. Statistics dumping options: @@ -575,7 +577,11 @@ following usage message: Device-specific performance options: -nthreads Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases). - -pin "YES" / "NO" Optional. Explicit threads->cores pinning for CPU inference tasks (leave empty to let the OpenVINO make a choice). + -pin ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice): + enabling threads->cores pinning("YES", which is already default for any conventional CPU), + letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs) + threads->(NUMA)nodes("NUMA") or + completely disable("NO") CPU inference threads pinning Statistics dumping options: -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). @@ -931,4 +937,4 @@ Additional Resources - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` diff --git a/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst index 13f18fc3272b34..92f6a410219f43 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst @@ -7,7 +7,8 @@ Bert Benchmark Python Sample This sample demonstrates how to estimate performance of a Bert model using Asynchronous -Inference Request API. This sample does not have +Inference Request API. Unlike `demos `__ +this sample does not have configurable command line arguments. Feel free to modify sample's source code to try out different options. @@ -63,5 +64,5 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Bert Benchmark Python Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/hello-classification.rst b/docs/articles_en/learn-openvino/openvino-samples/hello-classification.rst index 7a9a7d449d628d..f8222e495c7387 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/hello-classification.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/hello-classification.rst @@ -93,11 +93,11 @@ To run the sample, you need to specify a model and an image: to manually rearrange the default channels order in the sample or demo application or reconvert your model using model conversion API with ``reverse_input_channels`` argument specified. For more information about - the argument, refer to the **Color Conversion** section of - :doc:`Preprocessing API <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. + the argument, refer to **When to Reverse Input Channels** section of + :doc:`Embedding Preprocessing Computation <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>`. - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) - using the :doc:`model conversion API <../../openvino-workflow/model-preparation/convert-model-to-ir>`. + using the :doc:`model conversion API <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>`. - The sample accepts models in ONNX format (.onnx) that do not require preprocessing. - The sample supports NCHW model layout only. @@ -257,7 +257,7 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `OpenVINO Runtime C API `__ - `Hello Classification Python Sample on Github `__ - `Hello Classification C++ Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst b/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst index 3d1c069e2c8cb1..19219070cbfbe2 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst @@ -95,11 +95,11 @@ the following command, you can convert an ordinary image to an uncompressed NV12 - By default, this sample expects that model input has BGR channels order. If you trained your model to work with RGB order, you need to reconvert your model using model conversion API with ``reverse_input_channels`` argument - specified. For more information about the argument, refer to the - **Color Conversion** section of :doc:`Preprocessing API <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. + specified. For more information about the argument, refer to **When to Reverse + Input Channels** section of :doc:`Embedding Preprocessing Computation <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>`. - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) - using the :doc:`model conversion API <../../openvino-workflow/model-preparation/convert-model-to-ir>`. + using the :doc:`model conversion API <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>`. - The sample accepts models in ONNX format (.onnx) that do not require preprocessing. Example @@ -208,7 +208,7 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `API Reference `__ - `Hello NV12 Input Classification C++ Sample on Github `__ - `Hello NV12 Input Classification C Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/hello-reshape-ssd.rst b/docs/articles_en/learn-openvino/openvino-samples/hello-reshape-ssd.rst index 0e929bb5ed2701..23de8eb1979824 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/hello-reshape-ssd.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/hello-reshape-ssd.rst @@ -14,8 +14,8 @@ using the sample, refer to the following requirements: - Models with only one input and output are supported. - The sample accepts any file format supported by ``core.read_model``. -- The sample has been validated with the person-detection-retail-0013 - model and the NCHW layout format. +- The sample has been validated with: `person-detection-retail-0013 `__ + models and the NCHW layout format. - To build the sample, use instructions available at :ref:`Build the Sample Applications ` section in "Get Started with Samples" guide. @@ -82,12 +82,12 @@ To run the sample, you need to specify a model and an image: order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using model conversion API with ``reverse_input_channels`` - argument specified. For more information about the argument, refer to the - **Color Conversion** section of - :doc:`Preprocessing API <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. + argument specified. For more information about the argument, refer to + **When to Reverse Input Channels** section of + :doc:`Embedding Preprocessing Computation <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>`. - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) - using :doc:`model conversion API <../../openvino-workflow/model-preparation/convert-model-to-ir>`. + using :doc:`model conversion API <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>`. - The sample accepts models in ONNX format (.onnx) that do not require preprocessing. Example @@ -204,7 +204,7 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Hello Reshape SSD Python Sample on Github `__ - `Hello Reshape SSD C++ Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/image-classification-async.rst b/docs/articles_en/learn-openvino/openvino-samples/image-classification-async.rst index d88b950463210d..b112452e932c72 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/image-classification-async.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/image-classification-async.rst @@ -129,9 +129,9 @@ To run the sample, you need to specify a model and an image: .. note:: - - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using model conversion API with ``reverse_input_channels`` argument specified. For more information about the argument, refer to the **Color Conversion** section of :doc:`Preprocessing API <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. + - By default, OpenVINO™ Toolkit Samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using model conversion API with ``reverse_input_channels`` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of :doc:`Embedding Preprocessing Computation <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>`. - - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using :doc:`model conversion API <../../openvino-workflow/model-preparation/convert-model-to-ir>`. + - Before running the sample with a trained model, make sure the model is converted to the intermediate representation (IR) format (\*.xml + \*.bin) using :doc:`model conversion API <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>`. - The sample accepts models in ONNX format (.onnx) that do not require preprocessing. @@ -326,6 +326,6 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO™ Toolkit Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Image Classification Async Python Sample on Github `__ - `Image Classification Async C++ Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/model-creation.rst b/docs/articles_en/learn-openvino/openvino-samples/model-creation.rst index ad01cee53a69b1..e0e3034c225763 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/model-creation.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/model-creation.rst @@ -76,7 +76,7 @@ To run the sample, you need to specify model weights and a device. - This sample supports models with FP32 weights only. - The ``lenet.bin`` weights file is generated by - :doc:`model conversion API <../../openvino-workflow/model-preparation/convert-model-to-ir>` + :doc:`model conversion API <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` from the public LeNet model, with the ``input_shape [64,1,28,28]`` parameter specified. - The original model is available in the `Caffe repository `__ on GitHub. @@ -292,6 +292,6 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Model Creation Python Sample on Github `__ - `Model Creation C++ Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst index ccaa1f03a35552..245672decb7ab2 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst @@ -8,13 +8,15 @@ Sync Benchmark Sample This sample demonstrates how to estimate performance of a model using Synchronous Inference Request API. It makes sense to use synchronous inference only in latency -oriented scenarios. Models with static input shapes are supported. -This sample does not have other configurable command-line +oriented scenarios. Models with static input shapes are supported. Unlike +`demos `__ +this sample does not have other configurable command-line arguments. Feel free to modify sample's source code to try out different options. Before using the sample, refer to the following requirements: - The sample accepts any file format supported by ``core.read_model``. -- The sample has been validated with: the yolo-v3-tf and face-detection-0200 models. +- The sample has been validated with: `yolo-v3-tf `__, + `face-detection-0200 `__ models. - To build the sample, use instructions available at :ref:`Build the Sample Applications ` section in "Get Started with Samples" guide. @@ -165,6 +167,6 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Sync Benchmark Python Sample on Github `__ - `Sync Benchmark C++ Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst index 4632fab82bd0ea..e8b723afd2a480 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst @@ -7,7 +7,7 @@ Throughput Benchmark Sample This sample demonstrates how to estimate performance of a model using Asynchronous -Inference Request API in throughput mode. This sample +Inference Request API in throughput mode. Unlike `demos `__ this sample does not have other configurable command-line arguments. Feel free to modify sample's source code to try out different options. @@ -18,7 +18,8 @@ sets ``uint8``, while the sample uses default model precision which is usually ` Before using the sample, refer to the following requirements: - The sample accepts any file format supported by ``core.read_model``. -- The sample has been validated with: yolo-v3-tf and face-detection-0200 models. +- The sample has been validated with: `yolo-v3-tf `__, + `face-detection-0200 `__ models. - To build the sample, use instructions available at :ref:`Build the Sample Applications ` section in "Get Started with Samples" guide. @@ -170,6 +171,6 @@ Additional Resources - :doc:`Integrate the OpenVINO™ Runtime with Your Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` -- :doc:`Convert a Model <../../openvino-workflow/model-preparation/convert-model-to-ir>` +- :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` - `Throughput Benchmark Python Sample on Github `__ - `Throughput Benchmark C++ Sample on Github `__ diff --git a/docs/articles_en/openvino-workflow/model-preparation.rst b/docs/articles_en/openvino-workflow/model-preparation.rst index 33a4d8a54cc7f6..c23540874e9b7a 100644 --- a/docs/articles_en/openvino-workflow/model-preparation.rst +++ b/docs/articles_en/openvino-workflow/model-preparation.rst @@ -56,6 +56,12 @@ The easiest way to obtain a model is to download it from an online database, suc .. note:: + Model conversion API prior to OpenVINO 2023.1 is considered deprecated. Existing and new + projects are recommended to transition to the new solutions, keeping in mind that they are + not fully backwards compatible with ``openvino.tools.mo.convert_model`` or the ``mo`` + CLI tool. For more details, see the + :doc:`Model Conversion API Transition Guide <../documentation/legacy-features/transition-legacy-conversion-api>`. + For PyTorch and JAX/Flax models, `Python API <#convert-a-model-with-python-convert-model>`__ is the only conversion option. @@ -292,4 +298,15 @@ follow: * :doc:`Post-training optimization ` * :doc:`Model inference in OpenVINO Runtime ` +If you are still using the legacy conversion API (``mo`` or ``openvino.tools.mo.convert_model``), +refer to the following materials: + +* :doc:`Transition from legacy mo and ov.tools.mo.convert_model <../documentation/legacy-features/transition-legacy-conversion-api>` +* :doc:`Legacy Model Conversion API <../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` + + + + .. need to investigate python api article generation - api/ie_python_api/_autosummary/openvino.Model.html does not exist, api/ie_python_api/_autosummary/openvino.runtime.Model.html does. + + diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst index fc2637aba9139e..6ac806daf0cda0 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst @@ -203,67 +203,18 @@ Here is an example of how to convert a model obtained with ``torch.export``: This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2 is recommended. Dynamic shapes are not supported yet. -Converting a PyTorch Model from Disk -#################################### - -PyTorch can save models in two formats: ``torch.jit.ScriptModule`` and ``torch.export.ExportedProgram``. -Both formats may be saved to drive as standalone files and reloaded later, independently of the -original Python code. - -ExportedProgram Format -++++++++++++++++++++++ - -You can save the ``ExportedProgram`` format using -`torch.export.save() `__. -Here is an example of how to convert it: - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: py - :force: - - import openvino as ov - ov_model = ov.convert_model('exported_program.pt2') - - .. tab-item:: CLI - :sync: cli - - .. code-block:: sh - - ovc exported_program.pt2 - -ScriptModule Format -+++++++++++++++++++ - -`torch.jit.save() `__ serializes -the ``ScriptModule`` object on a drive. To convert the serialized ``ScriptModule`` format, run -the ``convert_model`` function with ``example_input`` parameter as follows: - -.. code-block:: py - :force: - - from openvino import convert_model - import torch - - convert_model(input_model='script_module.pt', example_input=torch.rand(1, 10)) - -``example_input`` is the required parameter for the conversion because ``torch.jit.ScriptModule`` object is always saved in an untraced state on disk. - Exporting a PyTorch Model to ONNX Format ######################################## -An alternative method of converting a PyTorch models is to export it to ONNX first -(with ``torch.onnx.export``) and then convert the resulting ``.onnx`` file to the OpenVINO IR -model (with ``openvino.convert_model``). It should be considered a backup solution, if a model -cannot be converted directly, as described previously. Converting through ONNX can be more -expensive in terms of code overhead, conversion time, and allocated memory. +An alternative method of converting PyTorch models is exporting a PyTorch model to ONNX with +``torch.onnx.export`` first and then converting the resulting ``.onnx`` file to OpenVINO Model +with ``openvino.convert_model``. It can be considered as a backup solution if a model cannot be +converted directly from PyTorch to OpenVINO as described in the above chapters. Converting through +ONNX can be more expensive in terms of code, conversion time, and allocated memory. 1. Refer to the `Exporting PyTorch models to ONNX format `__ guide to learn how to export models from PyTorch to ONNX. -2. Follow the :doc:`Convert an ONNX model ` guide to produce OpenVINO IR. +2. Follow :doc:`Convert an ONNX model ` chapter to produce OpenVINO model. Here is an illustration of using these two steps together: diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst index dd2fc35c56e92b..560b013301e064 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst @@ -296,7 +296,7 @@ used by OpenVINO, typically obtained by converting models of supported framework * The ``convert_model()`` method: - You can use ``ovc`` to convert a model to IR. The obtained IR can + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -423,7 +423,7 @@ used by OpenVINO, typically obtained by converting models of supported framework * The ``convert_model()`` method: - You can use ``ovc`` to convert a model to IR. The obtained IR + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -557,7 +557,7 @@ used by OpenVINO, typically obtained by converting models of supported framework * The ``convert_model()`` method: - You can use ``ovc`` to convert a model to IR. The obtained IR + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -708,6 +708,6 @@ multiple times: Additional Resources #################### -* Learn about the :doc:`parameters to adjust model conversion <./conversion-parameters>`. +* :doc:`Transition guide from the legacy to new conversion API <../../documentation/legacy-features/transition-legacy-conversion-api>` * `Download models from Hugging Face `__. diff --git a/docs/articles_en/openvino-workflow/running-inference/dynamic-shapes.rst b/docs/articles_en/openvino-workflow/running-inference/dynamic-shapes.rst index b9978f3767562e..9de4ba9df18827 100644 --- a/docs/articles_en/openvino-workflow/running-inference/dynamic-shapes.rst +++ b/docs/articles_en/openvino-workflow/running-inference/dynamic-shapes.rst @@ -139,7 +139,7 @@ To check if a model already has dynamic dimensions, first load it with the ``rea If the input model already has dynamic dimensions, that will not change during inference. If the inputs will not be used dynamically, it is recommended to set them to static values using the ``reshape`` method to save application memory and potentially improve inference speed. The OpenVINO API supports any combination of static and dynamic dimensions. -Static and dynamic dimensions can also be set when converting the model with ``convert_model()``. It has identical capabilities to the ``reshape`` method, so you can save time by converting the model with dynamic shapes beforehand rather than in the application code. To get information about setting input shapes using ``convert_model()``, refer to :doc:`Setting Input Shapes <./changing-input-shape>`. +Static and dynamic dimensions can also be set when converting the model with ``convert_model()``. It has identical capabilities to the ``reshape`` method, so you can save time by converting the model with dynamic shapes beforehand rather than in the application code. To get information about setting input shapes using ``convert_model()``, refer to :doc:`Setting Input Shapes <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-setting-input-shapes>`. Dimension Bounds ---------------- diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst index 31d0af303c633a..aa8e9cdabfda64 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst @@ -31,6 +31,7 @@ different conditions: | :doc:`Automatic Device Selection (AUTO) ` | :doc:`Heterogeneous Execution (HETERO) ` | :doc:`Automatic Batching Execution (Auto-batching) ` +| :doc:`[DEPRECATED] Multi-Device Execution (MULTI) <../../documentation/legacy-features/multi-device>` To learn how to change the device configuration, read the :doc:`Query device properties article `. diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst index a5ab0c845dfa66..6bebf087052b75 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection.rst @@ -513,6 +513,7 @@ Additional Resources * `Automatic Device Selection with OpenVINO™ Notebook `__ * :doc:`Debugging AUTO ` +* :doc:`(LEGACY) Running on Multiple Devices Simultaneously <../../../documentation/legacy-features/multi-device>` * :doc:`Inference Devices and Modes <../inference-devices-and-modes>` diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst index 30d376e18a608a..f1a914e6b9dac3 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst @@ -357,6 +357,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order - ``ov::hint::enable_hyper_threading`` - ``ov::hint::enable_cpu_pinning`` - ``ov::num_streams`` +- ``ov::affinity`` - ``ov::inference_num_threads`` - ``ov::cache_dir`` - ``ov::intel_cpu::denormals_optimization`` @@ -372,6 +373,8 @@ Read-only properties - ``ov::device::full_name`` - ``ov::device::capabilities`` +.. note:: + ``ov::affinity`` is replaced by ``ov::hint::enable_cpu_pinning``. As such, it is deprecated in the 2024.0 release and will be removed in the 2025 release. External Dependencies ########################################################### diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index 6cc211116d1199..46b541d84d4035 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores. Then the default settings for low-level performance properties on Windows and Linux are as follows: -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ -| Property | Windows | Linux | -+======================================+====================================================================+====================================================================+ -| ``ov::num_streams`` | 1 | 1 | -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one socket | is equal to the number of P-cores or P-cores+E-cores on one socket | -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_hyper_threading`` | No | No | -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | -+--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| Property | Windows | Linux | ++======================================+=======================================================================+=======================================================================+ +| ``ov::num_streams`` | 1 | 1 | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_hyper_threading`` | No | No | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ .. note:: @@ -96,7 +96,7 @@ Then the default settings for low-level performance properties on Windows and Li Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes) within a socket by BIOS configuration. - By default OpenVINO with latency hint uses single socket for inference. Although such + By default OpenVINO with latency hint uses single NUMA node for inference. Although such behavior allows to achive best performance for most of the models, there might be corner cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``. Please find more detail about `Sub-NUMA Clustering `__ diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst index 2adf3e7f9d1e4d..b4e1c7ac15afcc 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst @@ -124,7 +124,7 @@ Selected precision of each primitive depends on the operation precision in IR, q The ``u1``/``u8``/``i8`` data types are used for quantized operations only, which means that they are not selected automatically for non-quantized operations. For more details on how to get a quantized model, refer to the :doc:`Model Optimization guide <../../model-optimization>`. -Floating-point precision of a GPU primitive is selected based on operation precision in the OpenVINO IR, except for the :doc:``, which is executed in the ``f16`` precision. +Floating-point precision of a GPU primitive is selected based on operation precision in the OpenVINO IR, except for the :doc:``, which is executed in the ``f16`` precision. .. note:: diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 2ba25507802288..7b135fa7ff0b14 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -146,8 +146,6 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::turbo ov::intel_npu::tiles ov::intel_npu::max_tiles - ov::intel_npu::bypass_umd_caching - ov::intel_npu::defer_weights_load .. tab-item:: Read-only properties @@ -170,6 +168,7 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::device_alloc_mem_size ov::intel_npu::device_total_mem_size ov::intel_npu::driver_version + ov::intel_npu::bypass_umd_caching .. note:: @@ -250,11 +249,11 @@ or **ov::intel_npu::max_tiles and ov::intel_npu::tiles** -the ``max_tiles`` property is read-write to enable compiling models off-device. +the ``max_tiles`` property is read-write to enable compiling models off-device. When on NPU, ``max_tiles`` will return the number of tiles the device has. Setting the number of tiles to compile for (via ``intel_npu::tiles``), when on device, -must be preceded by reading ``intel_npu::max_tiles`` first, to make sure that -``ov::intel_npu::tiles`` <= ``ov::intel_npu::max_tiles`` +must be preceded by reading ``intel_npu::max_tiles`` first, to make sure that +``ov::intel_npu::tiles`` <= ``ov::intel_npu::max_tiles`` to avoid exceptions from the compiler. .. note:: @@ -281,3 +280,7 @@ Additional Resources * `Working with NPUs in OpenVINO™ Notebook `__ * `Vision colorization Notebook <./../../../notebooks/vision-image-colorization-with-output.html>`__ +* `Classification Benchmark C++ Demo `__ +* `3D Human Pose Estimation Python Demo `__ +* `Object Detection C++ Demo `__ +* `Object Detection Python Demo `__ diff --git a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst index 259f605d46c2f7..6ab924a61ef150 100644 --- a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst +++ b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst @@ -247,50 +247,57 @@ OpenVINO™ provides several debug capabilities: * Model can be visualized to image from the xDot format: - .. tab-set:: +.. tab-set:: - .. tab-item:: Python - :sync: py + .. tab-item:: Python + :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:visualize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:visualize] - .. tab-item:: C++ - :sync: cpp + .. tab-item:: C++ + :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:visualize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:visualize] + + +.. code-block:: sh + + `ov::pass::VisualizeTree` can be parametrized via environment variables: + + OV_VISUALIZE_TREE_OUTPUT_SHAPES=1 - visualize shapes + OV_VISUALIZE_TREE_OUTPUT_TYPES=1 - visualize types - ``ov::pass::VisualizeTree`` can be parametrized via environment variables: + OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1 - pretty denormal values - * ``OV_VISUALIZE_TREE_OUTPUT_SHAPES=1`` - visualize shapes - * ``OV_VISUALIZE_TREE_OUTPUT_TYPES=1`` - visualize types - * ``OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1`` - pretty denormal values - * ``OV_VISUALIZE_TREE_RUNTIME_INFO=1`` - print runtime information - * ``OV_VISUALIZE_TREE_IO=1`` - print I/O ports - * ``OV_VISUALIZE_TREE_MEMBERS_NAME=1`` - print member names + OV_VISUALIZE_TREE_RUNTIME_INFO=1 - print runtime information + + OV_VISUALIZE_TREE_IO=1 - print I/O ports + + OV_VISUALIZE_TREE_MEMBERS_NAME=1 - print member names * Also model can be serialized to IR: - .. tab-set:: +.. tab-set:: - .. tab-item:: Python - :sync: py + .. tab-item:: Python + :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:serialize] - .. tab-item:: C++ - :sync: cpp + .. tab-item:: C++ + :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:serialize] Additional Resources @@ -299,3 +306,5 @@ Additional Resources * :doc:`Available Operation Sets <../../../documentation/openvino-ir-format/operation-sets/available-opsets>`. * :doc:`OpenVINO™ Runtime Extensibility Developer Guide <../../../documentation/openvino-extensibility>`. * :doc:`Transformations Developer Guide <../../../documentation/openvino-extensibility/transformation-api>`. + + diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst index 5f01623d248755..b8ec2da9235fd4 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst @@ -18,7 +18,7 @@ Inputs Pre-Processing with OpenVINO In many cases, a network expects a pre-processed image. It is advised not to perform any unnecessary steps in the code: -* Model conversion API can efficiently incorporate the mean and normalization (scale) values into a model (for example, to the weights of the first convolution). For more details, see the :doc:`relevant model conversion API command-line parameters <../../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details>`. +* Model conversion API can efficiently incorporate the mean and normalization (scale) values into a model (for example, to the weights of the first convolution). For more details, see the :doc:`relevant model conversion API command-line parameters <../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>`. * Let OpenVINO accelerate other means of :doc:`Image Pre-processing and Conversion ` * Data which is already in the "on-device" memory can be input directly by using the :doc:`remote tensors API of the GPU Plugin <../inference-devices-and-modes/gpu-device/remote-tensor-api-gpu-plugin>`. @@ -60,7 +60,7 @@ Below are example-codes for the regular and async-based approaches to compare: The technique can be generalized to any available parallel slack. For example, you can do inference and simultaneously encode the resulting or previous frames or run further inference, like emotion detection on top of the face detection results. -Refer to the :doc:`Benchmark App Sample <../../../learn-openvino/openvino-samples/benchmark-tool>` for complete examples of the Async API in action. +Refer to the `Object Detection C++ Demo `__ , `Object Detection Python Demo `__ (latency-oriented Async API showcase) and :doc:`Benchmark App Sample <../../../learn-openvino/openvino-samples/benchmark-tool>` for complete examples of the Async API in action. .. note:: diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst index 1562165916e576..690b606ff3720a 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/layout-api-overview.rst @@ -23,6 +23,7 @@ Below is a list of cases where input/output layout is important: * :doc:`Convert to OpenVINO <../../../model-preparation/convert-model-to-ir>` * `OpenVINO Model Conversion Tutorial `__ + * :doc:`[LEGACY] Model Optimizer Embedding Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` guide. * Improving the readability of a model input and output. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst index b3253f775bdb02..181622ff55baf1 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst @@ -139,7 +139,7 @@ To check in advance if a particular device supports model caching, your applicat Set "cache_encryption_callbacks" config option to enable cache encryption +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``. +If model caching is enabled, the model topology can be encrypted when saving to the cache and decrypted when loading from the cache. This property can currently be set only in ``compile_model``. .. tab-set:: @@ -157,24 +157,6 @@ If model caching is enabled in the CPU Plugin, the model topology can be encrypt :language: cpp :fragment: [ov:caching:part4] -If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``. - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.py - :language: py - :fragment: [ov:caching:part5] - - .. tab-item:: C++ - :sync: cpp - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_caching.cpp - :language: cpp - :fragment: [ov:caching:part5] - .. important:: - Currently, this property is supported only by the CPU and GPU plugins. For other HW plugins, setting this property will not encrypt/decrypt the model topology in cache and will not affect performance. + Currently, this property is supported only by the CPU plugin. For other HW plugins, setting this property will not encrypt/decrypt the model topology in cache and will not affect performance. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst index 8aafd9ceb4faec..18c18c5f7d05b8 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput.rst @@ -63,7 +63,18 @@ In general, most throughput-oriented inference applications should: * Use the Async API with callbacks, to avoid any dependency on the completion order of the requests and possible device starvation, as explained in the :doc:`common-optimizations section `. +Multi-Device Execution +###################### + +OpenVINO offers the automatic, scalable :doc:`multi-device inference mode <../../../documentation/legacy-features/multi-device>`, which is a simple *application-transparent* way to improve throughput. There is no need to re-architecture existing applications for any explicit multi-device support: no explicit network loading to each device, no separate per-device queues, no additional logic to balance inference requests between devices, etc. For the application using it, multi-device is like any other device, as it manages all processes internally. +Just like with other throughput-oriented scenarios, there are several major pre-requisites for optimal multi-device performance: + +* Using the :ref:`Asynchronous API ` and :doc:`callbacks <../integrate-openvino-with-your-application/inference-request>` in particular. +* Providing the multi-device (and hence the underlying devices) with enough data to crunch. As the inference requests are naturally independent data pieces, the multi-device performs load-balancing at the "requests" (outermost) level to minimize the scheduling overhead. + +Keep in mind that the resulting performance is usually a fraction of the "ideal" (plain sum) value, when the devices compete for certain resources such as the memory-bandwidth, which is shared between CPU and iGPU. + .. note:: - The :doc:`Automatic Device Selection <../inference-devices-and-modes/auto-device-selection>` allows configuration of all devices at once. + While the legacy approach of optimizing the parameters of each device separately works, the :doc:`Automatic Device Selection <../inference-devices-and-modes/auto-device-selection>` allow configuring all devices (that are part of the specific multi-device configuration) at once. diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index d398704a819edc..e5bc0ca901a5aa 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -5,8 +5,7 @@ PyTorch Deployment via "torch.compile" The ``torch.compile`` feature enables you to use OpenVINO for PyTorch-native applications. It speeds up PyTorch code by JIT-compiling it into optimized kernels. -By default, Torch code runs in eager-mode, but with the use of ``torch.compile`` it goes -through the following steps: +By default, Torch code runs in eager-mode, but with the use of ``torch.compile`` it goes through the following steps: 1. **Graph acquisition** - the model is rewritten as blocks of subgraphs that are either: @@ -311,84 +310,10 @@ officially. However, it can be accessed by running the following instructions: if sys.version_info >= (3, 11): `raise RuntimeError("Python 3.11+ not yet supported for torch.compile") -TorchServe Integration -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -TorchServe is a performant, flexible, and easy to use tool for serving PyTorch models in production. For more information on the details of TorchServe, -you can refer to `TorchServe github repository. `__. With OpenVINO ``torch.compile`` integration into TorchServe you can serve -PyTorch models in production and accelerate them with OpenVINO on various Intel hardware. Detailed instructions on how to use OpenVINO with TorchServe are -available in `TorchServe examples. `__ and in a `use case app `__. - -Support for Automatic1111 Stable Diffusion WebUI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion -based image generation. It allows users to create realistic and creative images from text prompts. -Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO -``torch.compile`` capability. Detailed instructions are available in -`Stable Diffusion WebUI repository. `__ - - -Model Quantization and Weights Compression -############################################# - -Model quantization and weights compression are effective methods for accelerating model inference and reducing memory consumption, with minimal impact on model accuracy. The `torch.compile` OpenVINO backend supports two key model optimization APIs: - -1. Neural Network Compression Framework (`NNCF `__). NNCF offers advanced algorithms for post-training quantization and weights compression in the OpenVINO toolkit. - -2. PyTorch 2 export quantization. A general-purpose API designed for quantizing models captured by ``torch.export``. - -NNCF is the recommended approach for model quantization and weights compression. NNCF specifically optimizes models for the OpenVINO backend, providing optimal results in terms of inference speed and accuracy. - - -NNCF Model Optimization Support (Preview) -+++++++++++++++++++++++++++++++++++++++++++++ - -The Neural Network Compression Framework (`NNCF `__) implements advanced quantization and weights compression algorithms, which can be applied to ``torch.fx.GraphModule`` to speed up inference -and decrease memory consumption. - -Model quantization example: - -.. code-block:: python - - import nncf - import openvino.torch - import torch - - calibration_loader = torch.utils.data.DataLoader(...) - - def transform_fn(data_item): - images, _ = data_item - return images - - # Model quantization - quantized_model = nncf.quantize(model, calibration_dataset) - - quantized_model = torch.compile(quantized_model, backend="openvino") - -Model weights compression example: - -.. code-block:: python - - import nncf - import openvino.torch - import torch - - # Weights compression - compressed_model = nncf.compress_model(model) - - compressed_model = torch.compile(compressed_model, backend="openvino") - -NNCF unlocks the full potential of low-precision OpenVINO kernels due to the placement of quantizers designed specifically for the OpenVINO. -Advanced algorithms like ``SmoothQuant`` or ``BiasCorrection`` allow further metrics improvement while minimizing the outputs discrepancies between the original and compressed models. -For further details, please see the `documentation `__ -and a `tutorial `__. - Support for PyTorch 2 export quantization (Preview) +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -NNCF is the default way to compress models for the OpenVINO backend, however -PyTorch 2 export quantization is supported by OpenVINO backend in ``torch.compile`` as well. To be able +PyTorch 2 export quantization is supported by OpenVINO backend in ``torch.compile``. To be able to access this feature, follow the steps provided in `PyTorch 2 Export Post Training Quantization with X86 Backend through Inductor `__ and update the provided sample as explained below. @@ -422,6 +347,24 @@ and update the provided sample as explained below. optimized_model = torch.compile(converted_model, backend="openvino") +TorchServe Integration ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +TorchServe is a performant, flexible, and easy to use tool for serving PyTorch models in production. For more information on the details of TorchServe, +you can refer to `TorchServe github repository. `__. With OpenVINO ``torch.compile`` integration into TorchServe you can serve +PyTorch models in production and accelerate them with OpenVINO on various Intel hardware. Detailed instructions on how to use OpenVINO with TorchServe are +available in `TorchServe examples. `__ + +Support for Automatic1111 Stable Diffusion WebUI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion +based image generation. It allows users to create realistic and creative images from text prompts. +Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO +``torch.compile`` capability. Detailed instructions are available in +`Stable Diffusion WebUI repository. `__ + + Architecture ################# diff --git a/docs/dev/build_mac_arm.md b/docs/dev/build_mac_arm.md index 8b9781e46a5c96..5a1a3698568f95 100644 --- a/docs/dev/build_mac_arm.md +++ b/docs/dev/build_mac_arm.md @@ -14,14 +14,14 @@ The software was validated on: - [brew](https://brew.sh) package manager to install additional dependencies. Use [install brew](https://brew.sh) guide to achieve this. - Installation step for python and python libraries varies depending on the host architecture: - - **arm64** Python 3.9 - 3.12 for the OpenVINO Runtime Python API: + - **arm64** Python 3.9 - 3.12 for the OpenVINO Runtime Python API, Development tools (Model Optimizer, POT and others): ```sh % # let's have a look what python versions are available in brew % brew search python % # select preferred version of python based on available ones, e.g. 3.11 % brew install python@3.11 ``` - - **x86_64** Select universal2 installer from [Python releases](https://www.python.org/downloads/macos/) download page and install `python-3.X.Y-macos11.pkg` image. This allows you to have universal python libraries of OpenVINO Python API (build x86_64). + - **x86_64** Select universal2 installer from [Python releases](https://www.python.org/downloads/macos/) download page and install `python-3.X.Y-macos11.pkg` image. This allows to have universal python libraries, build x86_64 OpenVINO Python API and Development tools. - Clang compiler and other command line tools from Xcode 10.1 or higher: ```sh @@ -35,13 +35,13 @@ The software was validated on: ```sh % brew install tbb pugixml flatbuffers snappy protobuf ``` -- Additional `pip` dependencies to build OpenVINO Runtime Python API: +- Additional `pip` dependencies to build OpenVINO Runtime Python API, Development tools (Model Optimizer, POT and others): ```sh % # update pip and setuptools to newer versions % python3 -m pip install -U pip % python3 -m pip install -r /src/bindings/python/requirements.txt ``` - Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API as wheel packages: + Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API and Development tools as wheel packages: ```sh % python3 -m pip install -r /src/bindings/python/wheel/requirements-dev.txt ``` diff --git a/docs/dev/build_mac_intel_cpu.md b/docs/dev/build_mac_intel_cpu.md index 735c8a97a3b3df..f5b70d73709c20 100644 --- a/docs/dev/build_mac_intel_cpu.md +++ b/docs/dev/build_mac_intel_cpu.md @@ -12,14 +12,14 @@ The software was validated on: - [brew](https://brew.sh) package manager to install additional dependencies. Use [install brew](https://brew.sh) guide to achieve this. - Installation step for python and python libraries varies depending on the host architecture: - - **x86_64** Python 3.9 - 3.12 for the OpenVINO Runtime Python API: + - **x86_64** Python 3.9 - 3.12 for the OpenVINO Runtime Python API, Development tools (Model Optimizer, POT and others): ```sh % # let's have a look what python versions are available in brew % brew search python % # select preferred version of python based on available ones, e.g. 3.11 % brew install python@3.11 ``` - - **arm64** Select universal2 installer from [Python releases](https://www.python.org/downloads/macos/) download page and install `python-3.X.Y-macos11.pkg` image. This allows to have universal python libraries of OpenVINO Python API (build x86_64) . + - **arm64** Select universal2 installer from [Python releases](https://www.python.org/downloads/macos/) download page and install `python-3.X.Y-macos11.pkg` image. This allows to have universal python libraries, build x86_64 OpenVINO Python API and Development tools. - [CMake](https://cmake.org/download/) 3.13 or higher and other development tools: ```sh % brew install cmake scons fdupes git-lfs ninja @@ -32,13 +32,13 @@ The software was validated on: ```sh % brew install tbb pugixml flatbuffers snappy protobuf ``` -- Additional `pip` dependencies to build OpenVINO Runtime Python API: +- Additional `pip` dependencies to build OpenVINO Runtime Python API, Development tools (Model Optimizer, POT and others): ```sh % # update pip and setuptools to newer versions % python3 -m pip install -U pip % python3 -m pip install -r /src/bindings/python/requirements.txt ``` - Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API: + Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API and Development tools as wheel packages: ```sh % python3 -m pip install -r /src/bindings/python/wheel/requirements-dev.txt ``` diff --git a/docs/dev/ci/github_actions/adding_tests.md b/docs/dev/ci/github_actions/adding_tests.md index 464abc4f79faaa..f3e3ed7b5c77c2 100644 --- a/docs/dev/ci/github_actions/adding_tests.md +++ b/docs/dev/ci/github_actions/adding_tests.md @@ -41,13 +41,13 @@ An example step from [`job_python_unit_tests.yml`](./../../../../.github/workflo steps: ... - name: OVC unit tests - if: fromJSON(inputs.affected-components).OVC.test + if: fromJSON(inputs.affected-components).MO.test run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml ... ``` The step includes: * a `name`: `OVC unit tests`. -* an `if` condition: `fromJSON(inputs.affected-components).OVC.test` +* an `if` condition: `fromJSON(inputs.affected-components).MO.test` * This step is executed only if the condition is `true`. * This is a part of the Smart CI system implemented for the OpenVINO workflow. Read the [Smart CI Overview](./smart_ci.md) to learn about the system and its usage. * a `run` section with commands to be executed. diff --git a/docs/dev/ci/github_actions/smart_ci.md b/docs/dev/ci/github_actions/smart_ci.md index 8a9c2264d3db05..d9f17595f682af 100644 --- a/docs/dev/ci/github_actions/smart_ci.md +++ b/docs/dev/ci/github_actions/smart_ci.md @@ -59,7 +59,7 @@ files inside src/frontends/tensorflow: changed_component_names: {'TF_FE'} # TF_FE is an alias we chose for TensorFlow Frontend component affected_components={ "TF_FE": {"test": true, "build": true}, - "OVC": {"test": true, "build": true}, + "MO": {"test": true, "build": true}, "CPU": {"build": true}, "Python_API": {"build": true}, ... @@ -115,7 +115,7 @@ This file describes the relationships between components, for example: ```yaml PyTorch_FE: # Component name revalidate: # Defines the list of components to revalidate (build + test) if the component above was changed - - OVC # This component depends on PyTorch_FE and requires full revalidation + - MO # This component depends on PyTorch_FE and requires full revalidation build: # Defines the list of components to build if the PyTorch_FE was changed (test runs for them are skipped) - CPU # This component and the component below must be built if PyTorch_FE was changed - Python_API @@ -124,8 +124,8 @@ For the example above, the following pipeline will be executed on changes applie * Build for PyTorch_FE * Tests for PyTorch_FE -* Build for OVC -* Tests for OVC +* Build for MO +* Tests for MO * Build for CPU * Build for Python_API diff --git a/docs/dev/installing.md b/docs/dev/installing.md index c20b2ce183de3c..de4c7ba9df9af6 100644 --- a/docs/dev/installing.md +++ b/docs/dev/installing.md @@ -6,86 +6,199 @@ Once the project is built you can install OpenVINO™ Runtime into custom locati cmake --install --prefix ``` -## Build and Run Samples +## Installation check -1. Build samples. +
+For versions prior to 2022.1 +

- To build C++ sample applications, run the following commands: +1. Obtaining Open Model Zoo tools and models - Linux and macOS: - ```sh - cd /samples/cpp - ./build_samples.sh - ``` +To have the ability to run samples and demos, you need to clone the Open Model Zoo repository and copy the folder under `./deployment_tools` to your install directory: - Windows Command Prompt: - ```sh - cd \samples\cpp - build_samples_msvc.bat - ``` +``` +git clone https://github.com/openvinotoolkit/open_model_zoo.git +cmake -E copy_directory ./open_model_zoo/ /deployment_tools/open_model_zoo/ +``` - Windows PowerShell: - ```sh - & /build_samples.ps1 - ``` +2. Adding OpenCV to your environment -2. Download a model. +Open Model Zoo samples use OpenCV functionality to load images. To use it for demo builds you need to provide the path to your OpenCV custom build by setting `OpenCV_DIR` environment variable and add path OpenCV libraries to the `LD_LIBRARY_PATH (Linux)` or `PATH (Windows)` variable before running demos. - You can download an image classification model from - [Hugging Face](https://huggingface.co/models?pipeline_tag=image-classification&sort=trending) - to run the sample +Linux: +```sh +export LD_LIBRARY_PATH=/path/to/opencv_install/lib/:$LD_LIBRARY_PATH +export OpenCV_DIR=/path/to/opencv_install/cmake +``` -4. Convert the model. +Windows: +```sh +set PATH=\path\to\opencv_install\bin\;%PATH% +set OpenCV_DIR=\path\to\opencv_install\cmake +``` - Linux and macOS: - ```sh - ovc --compress_to_fp16=True - ``` - Windows: - ```bat - ovc --compress_to_fp16=True - ``` +3. Running demo -5. Run inference on the sample. +To check your installation go to the demo directory and run Classification Demo: - Set up the OpenVINO environment variables: +Linux and macOS: +```sh +cd /deployment_tools/demo +./demo_squeezenet_download_convert_run.sh +``` - Linux and macOS: - ```sh - source /setupvars.sh - ``` +Windows: +```sh +cd \deployment_tools\demo +demo_squeezenet_download_convert_run.bat +``` - Windows Command Prompt: - ```bat - \setupvars.bat - ``` +Result: +``` +Top 10 results: - Windows PowerShell: - ```bat - . /setupvars.ps1 - ``` +Image /deployment_tools/demo/car.png + +classid probability label +------- ----------- ----- +817 0.6853030 sports car, sport car +479 0.1835197 car wheel +511 0.0917197 convertible +436 0.0200694 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +751 0.0069604 racer, race car, racing car +656 0.0044177 minivan +717 0.0024739 pickup, pickup truck +581 0.0017788 grille, radiator grille +468 0.0013083 cab, hack, taxi, taxicab +661 0.0007443 Model T + +[ INFO ] Execution successful +``` - The following commands run the Image Classification Code Sample using the [`dog.bmp`](https://storage.openvinotoolkit.org/data/test_data/images/ 224x224/dog.bmp) file as an input image, the model in IR format, and on different hardware devices: +

+
- Linux and macOS: - ```sh - cd ~/openvino_cpp_samples_build//Release - ./classification_sample_async -i /dog.bmp -m /model.xml -d CPU - ``` - where the is the output of ``uname -m``, for example, ``intel64``, ``armhf``, or ``aarch64``. +
+ For 2022.1 and after +

- Windows: +1. Build samples - ```bat - cd %USERPROFILE%\Documents\Intel\OpenVINO\openvino_cpp_samples_build\\Release - .\classification_sample_async.exe -i \dog.bmp -m \model.xml -d CPU - ``` - where the is either ``intel64`` or ``aarch64`` depending on the platform architecture. +To build C++ sample applications, run the following commands: -When the sample application is complete, you see the label and confidence data for the top 10 categories on the display: +Linux and macOS: +```sh +cd /samples/cpp +./build_samples.sh +``` + +Windows Command Prompt: +```sh +cd \samples\cpp +build_samples_msvc.bat +``` -Below are results of using the googlenet-v1 model. +Windows PowerShell: +```sh +& /build_samples.ps1 +``` + +2. Install OpenVINO Development Tools + +> **NOTE**: To build OpenVINO Development Tools (Model Optimizer, Post-Training Optimization Tool, Model Downloader, and Open Model Zoo tools) wheel package locally you are required to use the CMake option: `-DENABLE_WHEEL=ON`. + +To install OpenVINO Development Tools to work with Caffe models (OpenVINO support for Caffe is currently being deprecated and will be removed entirely in the future), execute the following commands: + +Linux and macOS: + +```sh +#setup virtual environment +python3 -m venv openvino_env +source openvino_env/bin/activate +pip install pip --upgrade + +#install local package from install directory +pip install openvino_dev--py3-none-any.whl[caffe] --find-links=/tools +``` + +Windows: +```bat +rem setup virtual environment +python -m venv openvino_env +openvino_env\Scripts\activate.bat +pip install pip --upgrade + +rem install local package from install directory +cd \tools +pip install openvino_dev--py3-none-any.whl[caffe] --find-links=\tools +``` + +3. Download the Models + +Download the following model to run the Image Classification Sample: + +Linux and macOS: +```sh +omz_downloader --name googlenet-v1 --output_dir ~/models +``` + +Windows: +```bat +omz_downloader --name googlenet-v1 --output_dir %USERPROFILE%\Documents\models +``` + +4. Convert the Model with Model Optimizer + +Linux and macOS: +```sh +mkdir ~/ir +mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --compress_to_fp16 --output_dir ~/ir +``` +Windows: +```bat +mkdir %USERPROFILE%\Documents\ir +mo --input_model %USERPROFILE%\Documents\models\public\googlenet-v1\googlenet-v1.caffemodel --compress_to_fp16 --output_dir %USERPROFILE%\Documents\ir +``` + +5. Run Inference on the Sample + +Set up the OpenVINO environment variables: + +Linux and macOS: +```sh +source /setupvars.sh +``` + +Windows Command Prompt: +```bat +\setupvars.bat +``` + +Windows PowerShell: +```bat +. /setupvars.ps1 +``` + +The following commands run the Image Classification Code Sample using the [`dog.bmp`](https://storage.openvinotoolkit.org/data/test_data/images/224x224/dog.bmp) file as an input image, the model in IR format from the `ir` directory, and on different hardware devices: + +Linux and macOS: + +```sh +cd ~/openvino_cpp_samples_build//Release +./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d CPU +``` +where the is the output of ``uname -m``, for example, ``intel64``, ``armhf``, or ``aarch64``. + +Windows: + +```bat +cd %USERPROFILE%\Documents\Intel\OpenVINO\openvino_cpp_samples_build\\Release +.\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d CPU +``` +where the is either ``intel64`` or ``aarch64`` depending on the platform architecture. + +When the sample application is complete, you see the label and confidence data for the top 10 categories on the display: ``` Top 10 results: @@ -107,9 +220,36 @@ classid probability ``` +

+
## Adding OpenVINO Runtime to Your Project +
+For versions prior to 2022.1 +

+ +For CMake projects, set the `InferenceEngine_DIR` and when you run CMake tool: + +```sh +cmake -DInferenceEngine_DIR=/path/to/openvino/build/ . +``` + +Then you can find Inference Engine by [`find_package`]: + +```cmake +find_package(InferenceEngine REQUIRED) +target_link_libraries(${PROJECT_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) +``` +

+
+ + +
+For 2022.1 and after +

+ + For CMake projects, set the `OpenVINO_DIR` and when you run CMake tool: ```sh @@ -126,6 +266,8 @@ target_link_libraries(ov_app PRIVATE openvino::runtime) add_executable(ov_c_app main.c) target_link_libraries(ov_c_app PRIVATE openvino::runtime::c) ``` +

+
## See also diff --git a/docs/dev/ov_dependencies.txt b/docs/dev/ov_dependencies.txt index cb64e4d5a6534c..d9c344d2c3048d 100644 --- a/docs/dev/ov_dependencies.txt +++ b/docs/dev/ov_dependencies.txt @@ -1,6 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#This file provides a comprehensive list of all dependencies of OpenVINO 2024.6 +#This file provides a comprehensive list of all dependencies of OpenVINO 2024.5 #The file is part of the automation pipeline for posting OpenVINO IR models on the HuggingFace Hub, including OneBOM dependency checks. diff --git a/docs/dev/pypi_publish/pypi-openvino-dev.md b/docs/dev/pypi_publish/pypi-openvino-dev.md new file mode 100644 index 00000000000000..868a7298b10a14 --- /dev/null +++ b/docs/dev/pypi_publish/pypi-openvino-dev.md @@ -0,0 +1,190 @@ +# OpenVINO™ Development Tools + + +> **NOTE**: This version is pre-release software and has not undergone full release validation or qualification. No support is offered on pre-release software and APIs/behavior are subject to change. It should NOT be incorporated into any production software/solution and instead should be used only for early testing and integration while awaiting a final release version of this software. + +> **NOTE**: OpenVINO™ Development Tools package has been deprecated and will be discontinued with 2025.0 release. To learn more, refer to the [OpenVINO Legacy Features and Components page](https://docs.openvino.ai/2024/documentation/legacy-features.html). + +Intel® Distribution of OpenVINO™ toolkit is an open-source toolkit for optimizing and deploying AI inference. It can be used to develop applications and solutions based on deep learning tasks, such as: emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, etc. It provides high-performance and rich deployment options, from edge to cloud. + +OpenVINO™ Development Tools enables you to download models from Open Model Zoo, convert your own models to OpenVINO IR, as well as optimize and tune pre-trained deep learning models. See [What's in the Package](#whats-in-the-package) for more information. + +## System Requirements + +Before you start the installation, check the supported operating systems and required Python* versions. The complete list of supported hardware is available in the [System Requirements](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html). + +**C++ libraries** are also required for the installation on Windows*. To install that, you can [download the Visual Studio Redistributable file (.exe)](https://aka.ms/vs/17/release/vc_redist.x64.exe). + +> **NOTE**: This package can be installed on other versions of macOS, Linux and Windows, but only the specific versions above are fully validated. + +## Install the OpenVINO™ Development Tools Package + +There are two options to install OpenVINO Development Tools: installation into an existing environment with a deep learning framework used for model training or creation; +or installation in a new environment. + +### Installation into an Existing Environment with the Source Deep Learning Framework + +To install OpenVINO Development Tools (see the [What's in the Package](#whats-in-the-package) section of this article) into an existing environment +with the source deep learning framework used for model training or creation, run the following command: +``` +pip install openvino-dev +``` + +### Installation in a New Environment + +If you do not have an environment with the source deep learning framework for the input model or you encounter any compatibility issues between OpenVINO and your version of deep learning framework, +you may install OpenVINO Development Tools with validated versions of frameworks into a new environment. + +#### Step 1. Set Up Python Virtual Environment + +Use a virtual environment to avoid dependency conflicts. + +To create a virtual environment, use the following commands: + +On Windows: +```sh +python -m venv openvino_env +``` + +On Linux and macOS: +```sh +python3 -m venv openvino_env +``` + +> **NOTE**: On Linux and macOS, you may need to [install pip](https://pip.pypa.io/en/stable/installation/). For example, on Ubuntu execute the following command to get pip installed: `sudo apt install python3-venv python3-pip`. + +#### Step 2. Activate Virtual Environment + +On Linux and macOS: +```sh +source openvino_env/bin/activate +``` +On Windows: +```sh +openvino_env\Scripts\activate +``` + +#### Step 3. Set Up and Update PIP to the Highest Version + +Run the command below: +```sh +python -m pip install --upgrade pip +``` + +#### Step 4. Install the Package + +Use the following command: +```sh +pip install openvino-dev[extras] +``` + where `extras` is the source deep learning framework for the input model and is one or more of the following values separated with "," : + +| Extras Value | DL Framework | +| :-------------------------------| :------------------------------------------------------------------------------- | +| caffe | [Caffe*](https://caffe.berkeleyvision.org/) | +| kaldi | [Kaldi*](https://github.com/kaldi-asr/kaldi) | +| onnx | [ONNX*](https://github.com/microsoft/onnxruntime/) | +| pytorch | [PyTorch*](https://pytorch.org/) | +| tensorflow | [TensorFlow* 1.x](https://www.tensorflow.org/versions#tensorflow_1) | +| tensorflow2 | [TensorFlow* 2.x](https://www.tensorflow.org/versions#tensorflow_2) | + +For example, to install and configure the components for working with TensorFlow 2.x and ONNX models, use the following command: + ```sh + pip install openvino-dev[tensorflow2,onnx] + ``` +> **NOTE**: Model conversion API support for TensorFlow 1.x environment has been deprecated. Use TensorFlow 2.x environment to convert both TensorFlow 1.x and 2.x models. + +> **NOTE**: On macOS, you may need to enclose the package name in quotes: `pip install "openvino-dev[extras]"`. + +## How to Verify that the Package Is Installed + +- To verify that the **developer package** is properly installed, run the command below (this may take a few seconds): + ```sh + mo -h + ``` + You will see the help message for ``mo`` if installation finished successfully. + +- To verify that OpenVINO Runtime from the **runtime package** is available, run the command below: + ```sh + python -c "from openvino import Core; print(Core().available_devices)" + ``` + If installation was successful, you will see a list of available devices. + + + +## What's in the Package? + +> **NOTE**: The openvino-dev package installs [OpenVINO™ Runtime](https://pypi.org/project/openvino) as a dependency, which is the engine that runs the deep learning model and includes a set of libraries for an easy inference integration into your applications. + +**In addition, the openvino-dev package installs the following components by default:** + +| Component | Console Script | Description | +|------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Legacy Model conversion API](https://docs.openvino.ai/2024/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api.html) | `mo` |**Model conversion API** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | | +| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/2024/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](@ref omz_models_group_public) and [Intel](@ref omz_models_group_intel)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with model conversion API. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using model conversion API.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. | + +## Troubleshooting + +For general troubleshooting steps and issues, see [Troubleshooting Guide for OpenVINO Installation](https://docs.openvino.ai/2024/get-started/troubleshooting-install-config.html). The following sections also provide explanations to several error messages. + +### Errors with Installing via PIP for Users in China + +Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try the following solution: + +* Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: + + ``` sh + pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ + ``` + Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. + You can also run the following command to install openvino-dev with specific frameworks. For example: + + ``` + pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ + ``` + +### zsh: no matches found : openvino-dev[...] + +If you use zsh (Z shell) interpreter, that is the default shell for macOS starting with version 10.15 (Catalina), you may encounter the following error while installing `openvino-dev` package with extras: + +```sh +pip install openvino-dev[tensorflow2,caffe] +zsh: no matches found: openvino-dev[tensorflow2,caffe] +``` + +By default zsh interprets square brackets as an expression for pattern matching. To resolve this issue, you need to escape the command with quotes: + +```sh +pip install 'openvino-dev[tensorflow2,caffe]' +``` + +To avoid such issues you can also disable globbing for PIP commands by defining an alias in `~/.zshrc` file: + +```sh +alias pip='noglob pip' +``` + +### ERROR:root:Could not find OpenVINO Python API. + +On Windows*, some libraries are necessary to run OpenVINO. To resolve this issue, install the [C++ redistributable (.exe)](https://aka.ms/vs/17/release/vc_redist.x64.exe). You can also view a full download list on the [official support page](https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist). + +### ImportError: libpython3.8.so.1.0: cannot open shared object file: No such file or directory + +To resolve missing external dependency on Ubuntu* 18.04, execute the following command: +```sh +sudo apt-get install libpython3.8 +``` + +## Additional Resources + +- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit) +- [OpenVINO™ Documentation](https://docs.openvino.ai/) +- [OpenVINO™ Notebooks](https://github.com/openvinotoolkit/openvino_notebooks) +- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) + +Copyright © 2018-2024 Intel Corporation +> **LEGAL NOTICE**: Your use of this software and any required dependent software (the +“Software Package”) is subject to the terms and conditions of the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0.html) for the Software Package, which may also include notices, disclaimers, or +license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details. + +>Intel is committed to the respect of human rights and avoiding complicity in human rights abuses, a policy reflected in the [Intel Global Human Rights Principles](https://www.intel.com/content/www/us/en/policy/policy-human-rights.html). Accordingly, by accessing the Intel material on this platform you agree that you will not use the material in a product or application that causes or contributes to a violation of an internationally recognized human right. diff --git a/docs/dev/pypi_publish/pypi-openvino-rt.md b/docs/dev/pypi_publish/pypi-openvino-rt.md index 642eb12d65e8f9..854984ed2a0734 100644 --- a/docs/dev/pypi_publish/pypi-openvino-rt.md +++ b/docs/dev/pypi_publish/pypi-openvino-rt.md @@ -6,8 +6,8 @@ Intel® Distribution of OpenVINO™ toolkit is an open-source toolkit for optimizing and deploying AI inference. It can be used to develop applications and solutions based on deep learning tasks, such as: emulation of human vision, automatic speech recognition, natural language processing, -recommendation systems, image generation, etc. It provides high-performance and rich deployment -options, from edge to cloud. +recommendation systems, etc. It provides high-performance and rich deployment options, from +edge to cloud. If you have chosen a model, you can integrate it with your application through OpenVINO™ and deploy it on various devices. The OpenVINO™ Python package includes a set of libraries for easy @@ -26,7 +26,7 @@ versions. The complete list of supported hardware is available on the ## Install OpenVINO™ -### Step 1. Set up Python virtual environment +### Step 1. Set Up Python Virtual Environment Use a virtual environment to avoid dependency conflicts. To create a virtual environment, use the following commands: @@ -43,7 +43,7 @@ python3 -m venv openvino_env > **NOTE**: On Linux and macOS, you may need to [install pip](https://pip.pypa.io/en/stable/installation/). -### Step 2. Activate the virtual environment +### Step 2. Activate the Virtual Environment On Windows: ```sh @@ -55,23 +55,24 @@ On Linux and macOS: source openvino_env/bin/activate ``` -### Step 3. Set up PIP and update it to the highest version +### Step 3. Set Up and Update PIP to the Highest Version -Run the command: +Run the command below: ```sh python -m pip install --upgrade pip ``` -### Step 4. Install the package +### Step 4. Install the Package -Run the command: -```sh -pip install openvino -``` +Run the command below:
+ + ```sh + pip install openvino + ``` -### Step 5. Verify that the package is installed +### Step 5. Verify that the Package Is Installed -Run the command: +Run the command below: ```sh python -c "from openvino import Core; print(Core().available_devices)" ``` @@ -87,22 +88,22 @@ If installation was successful, you will see the list of available devices. Description - OpenVINO Runtime + OpenVINO Runtime `openvino package` OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read PyTorch, TensorFlow, TensorFlow Lite, ONNX, and PaddlePaddle models and execute them on preferred devices. OpenVINO Runtime uses a plugin architecture and includes the following plugins: - CPU, - GPU, - Auto Batch, - Auto, - Hetero, + CPU, + GPU, + Auto Batch, + Auto, + Hetero, - OpenVINO Model Converter (OVC) + OpenVINO Model Converter (OVC) `ovc` OpenVINO Model Converter converts models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include ONNX, TensorFlow, @@ -110,7 +111,7 @@ If installation was successful, you will see the list of available devices. - Benchmark Tool + Benchmark Tool `benchmark_app` Benchmark Application** allows you to estimate deep learning inference performance on supported devices for synchronous and asynchronous modes. @@ -121,8 +122,8 @@ If installation was successful, you will see the list of available devices. ## Troubleshooting -For general troubleshooting, see the -[Troubleshooting Guide for OpenVINO Installation](https://docs.openvino.ai/2025/get-started/troubleshooting-install-config.html). +For general troubleshooting steps and issues, see +[Troubleshooting Guide for OpenVINO Installation](https://docs.openvino.ai/2024/get-started/troubleshooting-install-config.html). The following sections also provide explanations to several error messages. ### Errors with Installing via PIP for Users in China @@ -144,11 +145,11 @@ the [C++ redistributable (.exe)](https://aka.ms/vs/17/release/vc_redist.x64.exe) You can also view a full download list on the [official support page](https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist). -### ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory +### ImportError: libpython3.8.so.1.0: cannot open shared object file: No such file or directory To resolve missing external dependency on Ubuntu*, execute the following command: ```sh -sudo apt-get install libpython3.10 +sudo apt-get install libpython3.8 ``` ## Additional Resources @@ -158,7 +159,7 @@ sudo apt-get install libpython3.10 - [OpenVINO™ Notebooks](https://github.com/openvinotoolkit/openvino_notebooks) - [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) -Copyright © 2018-2025 Intel Corporation +Copyright © 2018-2024 Intel Corporation > **LEGAL NOTICE**: Your use of this software and any required dependent software (the “Software Package”) is subject to the terms and conditions of the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0.html) for the Software Package, diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index e5c5d4773dce4c..bfad4b042e5359 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241209220902/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241104220807/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index 7959bf48a75e45..f39aa93b36851d 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -93,11 +93,6 @@ Lab instead.** .. code:: ipython3 - import platform - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" - %pip install pythreejs "openvino>=2024.4.0" "opencv-python" "torch" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu @@ -113,67 +108,68 @@ Lab instead.** Collecting torch Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) Collecting tqdm - Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Using cached tqdm-4.66.6-py3-none-any.whl.metadata (57 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) Collecting openvino-telemetry>=2023.2.1 (from openvino>=2024.4.0) - Using cached openvino_telemetry-2024.5.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.2) + Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.1) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) Collecting networkx (from torch) Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) INFO: pip is looking at multiple versions of networkx to determine which version is compatible with other requirements. This could take a while. Collecting networkx (from torch) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.2) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.0) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached tqdm-4.67.1-py3-none-any.whl (78 kB) + Using cached tqdm-4.66.6-py3-none-any.whl (78 kB) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) - Using cached openvino_telemetry-2024.5.0-py3-none-any.whl (23 kB) + Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, tqdm, sympy, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.5.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.67.1 traittypes-0.2.1 + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.1.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.66.6 traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -197,19 +193,17 @@ Imports # Fetch `notebook_utils` module import requests - if not Path("notebook_utils.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - with open("notebook_utils.py", "w") as f: - f.write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + with open("notebook_utils.py", "w") as f: + f.write(r.text) - if not Path("engine3js.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", - ) - with open("engine3js.py", "w") as f: - f.write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", + ) + with open("engine3js.py", "w") as f: + f.write(r.text) import notebook_utils as utils import engine3js as engine @@ -233,11 +227,10 @@ Download the model # directory where model will be downloaded base_model_dir = Path("model") - if not base_model_dir.exists(): - download_file( - "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", - directory=base_model_dir, - ) + download_file( + "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", + directory=base_model_dir, + ) ckpt_file = base_model_dir / "human-pose-estimation-3d-0001.pth" @@ -249,7 +242,7 @@ Download the model .. parsed-literal:: - human-pose-estimation-3d.tar.gz: 0%| | 0.00/17.6M [00:00`__ +is a lightweight text to image model based off of the +`muse `__ architecture. Amused is +particularly useful in applications that require a lightweight and fast +model such as generating many images quickly at once. + +Amused is a VQVAE token based transformer that can generate an image in +fewer forward passes than many diffusion models. In contrast with muse, +it uses the smaller text encoder CLIP-L/14 instead of t5-xxl. Due to its +small parameter count and few forward pass generation process, amused +can generate many images quickly. This benefit is seen particularly at +larger batch sizes. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Load and run the original + pipeline <#load-and-run-the-original-pipeline>`__ +- `Convert the model to OpenVINO + IR <#convert-the-model-to-openvino-ir>`__ + + - `Convert the Text Encoder <#convert-the-text-encoder>`__ + - `Convert the U-ViT transformer <#convert-the-u-vit-transformer>`__ + - `Convert VQ-GAN decoder + (VQVAE) <#convert-vq-gan-decoder-vqvae>`__ + +- `Compiling models and prepare + pipeline <#compiling-models-and-prepare-pipeline>`__ +- `Quantization <#quantization>`__ + + - `Prepare calibration dataset <#prepare-calibration-dataset>`__ + - `Run model quantization <#run-model-quantization>`__ + - `Compute Inception Scores and inference + time <#compute-inception-scores-and-inference-time>`__ + +- `Interactive inference <#interactive-inference>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + %pip install -q transformers "diffusers>=0.25.0" "openvino>=2023.2.0" "accelerate>=0.20.3" "gradio>=4.19" "torch>=2.1" "pillow" "torchmetrics" "torch-fidelity" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.9.0" datasets + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + # Fetch the notebook utils script from the openvino_notebooks repo + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + + + +.. parsed-literal:: + + 24692 + + + +Load and run the original pipeline +---------------------------------- + + + +.. code:: ipython3 + + import torch + from diffusers import AmusedPipeline + + + pipe = AmusedPipeline.from_pretrained( + "amused/amused-256", + ) + + prompt = "kind smiling ghost" + image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] + image.save("text2image_256.png") + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/5 [00:00 1 or self.sliding_window is not None: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:861: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + encoder_states = () if output_hidden_states else None + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:866: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if output_hidden_states: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:889: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if output_hidden_states: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:892: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not return_dict: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:988: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not return_dict: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1486: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not return_dict: + + +Convert the U-ViT transformer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + class TransformerWrapper(torch.nn.Module): + def __init__(self, transformer): + super().__init__() + self.transformer = transformer + + def forward( + self, + latents=None, + micro_conds=None, + pooled_text_emb=None, + encoder_hidden_states=None, + ): + return self.transformer( + latents, + micro_conds=micro_conds, + pooled_text_emb=pooled_text_emb, + encoder_hidden_states=encoder_hidden_states, + ) + + + shape = (1, 16, 16) + latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) + latents = torch.cat([latents] * 2) + + + example_input = { + "latents": latents, + "micro_conds": torch.rand([2, 5], dtype=torch.float32), + "pooled_text_emb": torch.rand([2, 768], dtype=torch.float32), + "encoder_hidden_states": torch.rand([2, 77, 768], dtype=torch.float32), + } + + + pipe.transformer.eval() + w_transformer = TransformerWrapper(pipe.transformer) + convert(w_transformer, TRANSFORMER_OV_PATH, example_input) + +Convert VQ-GAN decoder (VQVAE) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Function ``get_latents`` is +needed to return real latents for the conversion. Due to the VQVAE +implementation autogenerated tensor of the required shape is not +suitable. This function repeats part of ``AmusedPipeline``. + +.. code:: ipython3 + + def get_latents(): + shape = (1, 16, 16) + latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) + model_input = torch.cat([latents] * 2) + + model_output = pipe.transformer( + model_input, + micro_conds=torch.rand([2, 5], dtype=torch.float32), + pooled_text_emb=torch.rand([2, 768], dtype=torch.float32), + encoder_hidden_states=torch.rand([2, 77, 768], dtype=torch.float32), + ) + guidance_scale = 10.0 + uncond_logits, cond_logits = model_output.chunk(2) + model_output = uncond_logits + guidance_scale * (cond_logits - uncond_logits) + + latents = pipe.scheduler.step( + model_output=model_output, + timestep=torch.tensor(0), + sample=latents, + ).prev_sample + + return latents + + + class VQVAEWrapper(torch.nn.Module): + def __init__(self, vqvae): + super().__init__() + self.vqvae = vqvae + + def forward(self, latents=None, force_not_quantize=True, shape=None): + outputs = self.vqvae.decode( + latents, + force_not_quantize=force_not_quantize, + shape=shape.tolist(), + ) + + return outputs + + + latents = get_latents() + example_vqvae_input = { + "latents": latents, + "force_not_quantize": torch.tensor(True), + "shape": torch.tensor((1, 16, 16, 64)), + } + + convert(VQVAEWrapper(pipe.vqvae), VQVAE_OV_PATH, example_vqvae_input) + + +.. parsed-literal:: + + /tmp/ipykernel_498025/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + shape=shape.tolist(), + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not force_not_quantize: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert hidden_states.shape[1] == self.channels + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if hidden_states.shape[0] >= 64: + + +Compiling models and prepare pipeline +------------------------------------- + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + core = ov.Core() + + ov_text_encoder = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) + ov_transformer = core.compile_model(TRANSFORMER_OV_PATH, device.value) + ov_vqvae = core.compile_model(VQVAE_OV_PATH, device.value) + +Let’s create callable wrapper classes for compiled models to allow +interaction with original ``AmusedPipeline`` class. Note that all of +wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. + +.. code:: ipython3 + + from collections import namedtuple + + + class ConvTextEncoderWrapper(torch.nn.Module): + def __init__(self, text_encoder, config): + super().__init__() + self.config = config + self.text_encoder = text_encoder + + def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): + inputs = { + "input_ids": input_ids, + "return_dict": return_dict, + "output_hidden_states": output_hidden_states, + } + + outs = self.text_encoder(inputs) + + outputs = namedtuple("CLIPTextModelOutput", ("text_embeds", "last_hidden_state", "hidden_states")) + + text_embeds = torch.from_numpy(outs[0]) + last_hidden_state = torch.from_numpy(outs[1]) + hidden_states = list(torch.from_numpy(out) for out in outs.values())[2:] + + return outputs(text_embeds, last_hidden_state, hidden_states) + +.. code:: ipython3 + + class ConvTransformerWrapper(torch.nn.Module): + def __init__(self, transformer, config): + super().__init__() + self.config = config + self.transformer = transformer + + def forward(self, latents=None, micro_conds=None, pooled_text_emb=None, encoder_hidden_states=None, **kwargs): + outputs = self.transformer( + { + "latents": latents, + "micro_conds": micro_conds, + "pooled_text_emb": pooled_text_emb, + "encoder_hidden_states": encoder_hidden_states, + }, + share_inputs=False, + ) + + return torch.from_numpy(outputs[0]) + +.. code:: ipython3 + + class ConvVQVAEWrapper(torch.nn.Module): + def __init__(self, vqvae, dtype, config): + super().__init__() + self.vqvae = vqvae + self.dtype = dtype + self.config = config + + def decode(self, latents=None, force_not_quantize=True, shape=None): + inputs = { + "latents": latents, + "force_not_quantize": force_not_quantize, + "shape": torch.tensor(shape), + } + + outs = self.vqvae(inputs) + outs = namedtuple("VQVAE", "sample")(torch.from_numpy(outs[0])) + + return outs + +And insert wrappers instances in the pipeline: + +.. code:: ipython3 + + prompt = "kind smiling ghost" + + transformer = pipe.transformer + vqvae = pipe.vqvae + text_encoder = pipe.text_encoder + + pipe.__dict__["_internal_dict"]["_execution_device"] = pipe._execution_device # this is to avoid some problem that can occur in the pipeline + pipe.register_modules( + text_encoder=ConvTextEncoderWrapper(ov_text_encoder, text_encoder.config), + transformer=ConvTransformerWrapper(ov_transformer, transformer.config), + vqvae=ConvVQVAEWrapper(ov_vqvae, vqvae.dtype, vqvae.config), + ) + + image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] + image.save("text2image_256.png") + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) + + + +.. parsed-literal:: + + 0%| | 0/12 [00:00`__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +According to ``Amused`` pipeline structure, the vision transformer model +takes up significant portion of the overall pipeline execution time. Now +we will show you how to optimize the UNet part using +`NNCF `__ to reduce +computation cost and speed up the pipeline. Quantizing the rest of the +pipeline does not significantly improve inference performance but can +lead to a substantial degradation of generations quality. + +We also estimate the quality of generations produced by optimized +pipeline with `Inception +Score `__ which is often +used to measure quality of text-to-image generation systems. + +The steps are the following: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize()`` on the model. +3. Save the quantized model using ``openvino.save_model()`` function. +4. Compare inference time and Inception score for original and quantized + pipelines. + +Please select below whether you would like to run quantization to +improve model inference speed. + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +.. code:: ipython3 + + from notebook_utils import quantization_widget + + QUANTIZED_TRANSFORMER_OV_PATH = Path(str(TRANSFORMER_OV_PATH).replace(".xml", "_quantized.xml")) + + skip_for_device = "GPU" in device.value + to_quantize = quantization_widget(not skip_for_device) + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +.. code:: ipython3 + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Prepare calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +We use a portion of +`conceptual_captions `__ +dataset from Hugging Face as calibration data. To collect intermediate +model inputs for calibration we customize ``CompiledModel``. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import datasets + from tqdm.auto import tqdm + from typing import Any, Dict, List + import pickle + import numpy as np + + + def disable_progress_bar(pipeline, disable=True): + if not hasattr(pipeline, "_progress_bar_config"): + pipeline._progress_bar_config = {'disable': disable} + else: + pipeline._progress_bar_config['disable'] = disable + + + class CompiledModelDecorator(ov.CompiledModel): + def __init__(self, compiled_model: ov.CompiledModel, data_cache: List[Any] = None, keep_prob: float = 0.5): + super().__init__(compiled_model) + self.data_cache = data_cache if data_cache is not None else [] + self.keep_prob = keep_prob + + def __call__(self, *args, **kwargs): + if np.random.rand() <= self.keep_prob: + self.data_cache.append(*args) + return super().__call__(*args, **kwargs) + + + def collect_calibration_data(ov_transformer_model, calibration_dataset_size: int) -> List[Dict]: + calibration_dataset_filepath = Path(f"calibration_data/{calibration_dataset_size}.pkl") + if not calibration_dataset_filepath.exists(): + calibration_data = [] + pipe.transformer.transformer = CompiledModelDecorator(ov_transformer_model, calibration_data, keep_prob=1.0) + disable_progress_bar(pipe) + + dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) + + # Run inference for data collection + pbar = tqdm(total=calibration_dataset_size) + for batch in dataset: + prompt = batch["caption"] + if len(prompt) > pipe.tokenizer.model_max_length: + continue + pipe(prompt, generator=torch.Generator('cpu').manual_seed(0)) + pbar.update(len(calibration_data) - pbar.n) + if pbar.n >= calibration_dataset_size: + break + + pipe.transformer.transformer = ov_transformer_model + disable_progress_bar(pipe, disable=False) + + calibration_dataset_filepath.parent.mkdir(exist_ok=True, parents=True) + with open(calibration_dataset_filepath, 'wb') as f: + pickle.dump(calibration_data, f) + + with open(calibration_dataset_filepath, 'rb') as f: + calibration_data = pickle.load(f) + return calibration_data + +Run model quantization +~~~~~~~~~~~~~~~~~~~~~~ + + + +Run calibration data collection and quantize the vision transformer +model. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters + from nncf.quantization.range_estimator import RangeEstimatorParameters, StatisticsCollectorParameters, StatisticsType, \ + AggregatorType + import nncf + + CALIBRATION_DATASET_SIZE = 12 * 25 + + if not QUANTIZED_TRANSFORMER_OV_PATH.exists(): + calibration_data = collect_calibration_data(ov_transformer, CALIBRATION_DATASET_SIZE) + quantized_model = nncf.quantize( + core.read_model(TRANSFORMER_OV_PATH), + nncf.Dataset(calibration_data), + model_type=nncf.ModelType.TRANSFORMER, + subset_size=len(calibration_data), + # We ignore convolutions to improve quality of generations without significant drop in inference speed + ignored_scope=nncf.IgnoredScope(types=["Convolution"]), + # Value of 0.85 was obtained using grid search based on Inception Score computed below + advanced_parameters=nncf.AdvancedQuantizationParameters( + smooth_quant_alphas=AdvancedSmoothQuantParameters(matmul=0.85), + # During activation statistics collection we ignore 1% of outliers which improves quantization quality + activations_range_estimator_params=RangeEstimatorParameters( + min=StatisticsCollectorParameters(statistics_type=StatisticsType.MIN, + aggregator_type=AggregatorType.MEAN_NO_OUTLIERS, + quantile_outlier_prob=0.01), + max=StatisticsCollectorParameters(statistics_type=StatisticsType.MAX, + aggregator_type=AggregatorType.MEAN_NO_OUTLIERS, + quantile_outlier_prob=0.01) + ) + ) + ) + ov.save_model(quantized_model, QUANTIZED_TRANSFORMER_OV_PATH) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00`__ of original and +quantized pipelines on a small subset of images. Images are generated +from prompts of ``conceptual_captions`` validation set. We also measure +the time it took to generate the images for comparison reasons. + +Please note that the validation dataset size is small and serves only as +a rough estimate of generation quality. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from torchmetrics.image.inception import InceptionScore + from torchvision import transforms as transforms + from itertools import islice + import time + + VALIDATION_DATASET_SIZE = 100 + + def compute_inception_score(ov_transformer_model_path, validation_set_size, batch_size=100): + original_ov_transformer_model = pipe.transformer.transformer + pipe.transformer.transformer = core.compile_model(ov_transformer_model_path, device.value) + + disable_progress_bar(pipe) + dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", "unlabeled", split="validation", trust_remote_code=True).shuffle(seed=42) + dataset = islice(dataset, validation_set_size) + + inception_score = InceptionScore(normalize=True, splits=1) + + images = [] + infer_times = [] + for batch in tqdm(dataset, total=validation_set_size, desc="Computing Inception Score"): + prompt = batch["caption"] + if len(prompt) > pipe.tokenizer.model_max_length: + continue + start_time = time.perf_counter() + image = pipe(prompt, generator=torch.Generator('cpu').manual_seed(0)).images[0] + infer_times.append(time.perf_counter() - start_time) + image = transforms.ToTensor()(image) + images.append(image) + + mean_perf_time = sum(infer_times) / len(infer_times) + + while len(images) > 0: + images_batch = torch.stack(images[-batch_size:]) + images = images[:-batch_size] + inception_score.update(images_batch) + kl_mean, kl_std = inception_score.compute() + + pipe.transformer.transformer = original_ov_transformer_model + disable_progress_bar(pipe, disable=False) + + return kl_mean, mean_perf_time + + + original_inception_score, original_time = compute_inception_score(TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) + print(f"Original pipeline Inception Score: {original_inception_score}") + quantized_inception_score, quantized_time = compute_inception_score(QUANTIZED_TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) + print(f"Quantized pipeline Inception Score: {quantized_inception_score}") + print(f"Quantization speed-up: {original_time / quantized_time:.2f}x") + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + warnings.warn(\*args, \*\*kwargs) # noqa: B028 + + + +.. parsed-literal:: + + Computing Inception Score: 0%| | 0/100 [00:00`__ tackles the task of generating animation sequences from a single character image. It builds upon diffusion models pre-trained on vast character image @@ -36,14 +35,9 @@ Learn more in `GitHub repo `__ and `paper `__. -.. container:: alert alert-warning - - :: +.. warning:: -

! WARNING !

-

- This tutorial requires at least 96 GB of RAM for model conversion and 40 GB for inference. Changing the values of HEIGHT, WIDTH and VIDEO_LENGTH variables will change the memory consumption but will also affect accuracy. -

+ This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT`` ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. **Table of contents:** @@ -87,10 +81,13 @@ Prerequisites import requests - %pip install -q "torch>=2.1" torchvision einops omegaconf "diffusers<=0.24" "huggingface-hub<0.26.0" transformers av accelerate "gradio>=4.19" --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "openvino>=2024.0" "nncf>=2.9.0" - + REPO_PATH = Path("Moore-AnimateAnyone") + if not REPO_PATH.exists(): + !git clone -q "https://github.com/itrushkin/Moore-AnimateAnyone.git" + %pip install -q "torch>=2.1" torchvision einops omegaconf "diffusers<=0.24" transformers av accelerate "openvino>=2024.0" "nncf>=2.9.0" "gradio>=4.19" --extra-index-url "https://download.pytorch.org/whl/cpu" + import sys + sys.path.insert(0, str(REPO_PATH.resolve())) r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) @@ -101,16 +98,6 @@ Prerequisites ) open("notebook_utils.py", "w").write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - - from cmd_helper import clone_repo - - clone_repo("https://github.com/itrushkin/Moore-AnimateAnyone.git") - %load_ext skip_kernel_extension Note that we clone a fork of original repo with tweaked forward methods. diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 86ff1bc0aa9b0c..9f9130a4fe0db2 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -59,14 +59,7 @@ Imports .. code:: ipython3 %pip install -q "openvino>=2023.1.0" - %pip install -q opencv-python tqdm "matplotlib>=3.4" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + %pip install -q opencv-python "matplotlib>=3.4" .. code:: ipython3 @@ -104,36 +97,26 @@ the person in each frame of the video. .. code:: ipython3 - from pathlib import Path - # directory where model will be downloaded base_model_dir = "model" # model name as named in Open Model Zoo model_name = "person-detection-0202" precision = "FP16" - model_path = Path("model") / f"{model_name}.xml" - - base_model_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1" - - if not Path(model_path).exists(): - utils.download_file(f"{base_model_url}/{model_name}/{precision}/{model_name}.xml", filename=model_path.name, directory=model_path.parent) - utils.download_file( - f"{base_model_url}/{model_name}/{precision}/{model_name}.bin", filename=model_path.name.replace(".xml", ".bin"), directory=model_path.parent - ) - - - -.. parsed-literal:: - - person-detection-0202.xml: 0%| | 0.00/249k [00:00=2.1.0" torchvision "transformers>=4.26.0" "gradio>=4.19" "openvino>=2023.3.0" "datasets>=2.14.6" "nncf>=2.8.1" "tqdm" - %pip install -q "matplotlib>=3.4" + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" .. code:: ipython3 @@ -564,7 +569,7 @@ As discussed before, the model consists of several blocks which can be reused for building pipelines for different tasks. In the diagram below, you can see how image captioning works: -|image02| +|image01| The visual model accepts the image preprocessed by ``BlipProcessor`` as input and produces image embeddings, which are directly passed to the @@ -582,7 +587,7 @@ performing generation of answers. The next step is implementing both pipelines using OpenVINO models. -.. |image02| image:: https://user-images.githubusercontent.com/29454499/221865836-a56da06e-196d-449c-a5dc-4136da6ab5d5.png +.. |image01| image:: https://user-images.githubusercontent.com/29454499/221865836-a56da06e-196d-449c-a5dc-4136da6ab5d5.png .. |image11| image:: https://user-images.githubusercontent.com/29454499/221868167-d0081add-d9f3-4591-80e7-4753c88c1d0a.png .. code:: ipython3 diff --git a/docs/notebooks/catvton-with-output.rst b/docs/notebooks/catvton-with-output.rst index 2186b6eb8dba6f..a7a9a04359f338 100644 --- a/docs/notebooks/catvton-with-output.rst +++ b/docs/notebooks/catvton-with-output.rst @@ -31,9 +31,7 @@ Teaser image from `CatVTON GitHub `__ |teaser| In this tutorial we consider how to convert and run this model using -OpenVINO. An additional part demonstrates how to run optimization with -`NNCF `__ to speed up -pipeline. +OpenVINO. **Table of contents:** @@ -43,14 +41,6 @@ pipeline. - `Convert the model to OpenVINO IR <#convert-the-model-to-openvino-ir>`__ - `Compiling models <#compiling-models>`__ -- `Optimize model using NNCF Post-Training Quantization - API <#optimize-model-using-nncf-post-training-quantization-api>`__ - - - `Run Post-Training - Quantization <#run-post-training-quantization>`__ - - `Run Weights Compression <#run-weights-compression>`__ - - `Compare model file sizes <#compare-model-file-sizes>`__ - - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -77,10 +67,18 @@ Prerequisites if platform.system() == "Darwin": %pip install -q "numpy<2.0.0" - %pip install -q "openvino>=2024.4" "nncf>=2.13.0" + %pip install -q "openvino>=2024.4" %pip install -q "torch>=2.1" "diffusers>=0.29.1" torchvision opencv_python --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q fvcore "pillow" "tqdm" "gradio>=4.36" "omegaconf==2.4.0.dev3" av pycocotools cloudpickle scipy accelerate "transformers>=4.27.3" + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + .. code:: ipython3 import requests @@ -92,10 +90,19 @@ Prerequisites open("notebook_utils.py", "w").write(r.text) r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + url="https://raw.githubusercontent.com/aleksandr-mokrov/openvino_notebooks/refs/heads/catvton/utils/cmd_helper.py", ) open("cmd_helper.py", "w").write(r.text) + + + +.. parsed-literal:: + + 741 + + + .. code:: ipython3 from cmd_helper import clone_repo @@ -103,6 +110,15 @@ Prerequisites clone_repo("https://github.com/Zheng-Chong/CatVTON.git", "3b795364a4d2f3b5adb365f39cdea376d20bc53c") + + + +.. parsed-literal:: + + PosixPath('CatVTON') + + + Convert the model to OpenVINO IR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -137,202 +153,177 @@ version). .. code:: ipython3 + from pathlib import Path + from ov_catvton_helper import download_models, convert_pipeline_models, convert_automasker_models - pipeline, mask_processor, automasker = download_models() - vae_scaling_factor = pipeline.vae.config.scaling_factor - convert_pipeline_models(pipeline) - convert_automasker_models(automasker) - -Compiling models ----------------- - - + + MODEL_DIR = Path("models") + VAE_ENCODER_PATH = MODEL_DIR / "vae_encoder.xml" + VAE_DECODER_PATH = MODEL_DIR / "vae_decoder.xml" + UNET_PATH = MODEL_DIR / "unet.xml" + DENSEPOSE_PROCESSOR_PATH = MODEL_DIR / "densepose_processor.xml" + SCHP_PROCESSOR_ATR = MODEL_DIR / "schp_processor_atr.xml" + SCHP_PROCESSOR_LIP = MODEL_DIR / "schp_processor_lip.xml" + + + pipeline, mask_processor, automasker = download_models(MODEL_DIR) + convert_pipeline_models(pipeline, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + convert_automasker_models(automasker, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) -Select device from dropdown list for running inference using OpenVINO. -.. code:: ipython3 +.. parsed-literal:: - import openvino as ov + Note: switching to '3b795364a4d2f3b5adb365f39cdea376d20bc53c'. - from notebook_utils import device_widget + You are in 'detached HEAD' state. You can look around, make experimental + changes and commit them, and you can discard any commits you make in this + state without impacting any branches by switching back to a branch. + If you want to create a new branch to retain commits you create, you may + do so (now or later) by using -c with the switch command. Example: - core = ov.Core() + git switch -c - device = device_widget() + Or undo this operation with: - device + git switch - + + Turn off this advice by setting config variable advice.detachedHead to false + + HEAD is now at 3b79536 Update default base model path -``get_compiled_pipeline`` and ``get_compiled_automasker`` functions -defined in ``ov_catvton_helper.py`` provides convenient way for getting -the pipeline and the ``automasker`` with compiled ov-models that are -compatible with the original interface. It accepts the original pipeline -and ``automasker``, inference device and directories with converted -models as arguments. Under the hood we create callable wrapper classes -for compiled models to allow interaction with original pipelines. Note -that all of wrapper classes return ``torch.Tensor``\ s instead of -``np.array``\ s. And then insert wrappers instances in the pipeline. -.. code:: ipython3 - from ov_catvton_helper import ( - get_compiled_pipeline, - get_compiled_automasker, - VAE_ENCODER_PATH, - VAE_DECODER_PATH, - UNET_PATH, - DENSEPOSE_PROCESSOR_PATH, - SCHP_PROCESSOR_ATR, - SCHP_PROCESSOR_LIP, - ) - - pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH, vae_scaling_factor) - automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) +.. parsed-literal:: -Optimize model using NNCF Post-Training Quantization API --------------------------------------------------------- + Fetching 10 files: 0%| | 0/10 [00:00`__ provides a suite of -advanced algorithms for Neural Networks inference optimization in -OpenVINO with minimal accuracy drop. We will use 8-bit quantization in -post-training mode (without the fine-tuning pipeline) for the UNet -model, and 4-bit weight compression for the remaining models. +.. parsed-literal:: - **NOTE**: Quantization is time and memory consuming operation. - Running quantization code below may take some time. You can disable - it using widget below: + README.md: 0%| | 0.00/9.66k [00:00= 64: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if dim % default_overall_up_factor != 0: +Compiling models +---------------- + -Quantizing of the remaining components of the pipeline does not -significantly improve inference performance but can lead to a -substantial degradation of accuracy. The weight compression will be -applied to footprint reduction. + +Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - %%skip not $to_quantize.value + import openvino as ov - from catvton_quantization_helper import compress_models + from notebook_utils import device_widget - compress_models(core) - is_optimized_pipe_available = True + core = ov.Core() + + device = device_widget() + + device -Compare model file sizes -~~~~~~~~~~~~~~~~~~~~~~~~ -.. code:: ipython3 +.. parsed-literal:: - %%skip not $to_quantize.value - from catvton_quantization_helper import compare_models_size - - compare_models_size() + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') -.. parsed-literal:: - vae_encoder compression rate: 2.011 - vae_decoder compression rate: 2.007 - unet compression rate: 1.995 - densepose_processor compression rate: 2.019 - schp_processor_atr compression rate: 1.993 - schp_processor_lip compression rate: 1.993 +``get_compiled_pipeline`` and ``get_compiled_automasker`` functions +defined in ``ov_catvton_helper.py`` provides convenient way for getting +the pipeline and the ``automasker`` with compiled ov-models that are +compatible with the original interface. It accepts the original pipeline +and ``automasker``, inference device and directories with converted +models as arguments. Under the hood we create callable wrapper classes +for compiled models to allow interaction with original pipelines. Note +that all of wrapper classes return ``torch.Tensor``\ s instead of +``np.array``\ s. And then insert wrappers instances in the pipeline. + +.. code:: ipython3 + + from ov_catvton_helper import get_compiled_pipeline, get_compiled_automasker + + pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) Interactive inference --------------------- @@ -342,38 +333,28 @@ Interactive inference Please select below whether you would like to use the quantized models to launch the interactive demo. -.. code:: ipython3 - - from ov_catvton_helper import get_pipeline_selection_option - - use_quantized_models = get_pipeline_selection_option(is_optimized_pipe_available) - - use_quantized_models - .. code:: ipython3 from gradio_helper import make_demo - from catvton_quantization_helper import ( - VAE_ENCODER_INT4_PATH, - VAE_DECODER_INT4_PATH, - DENSEPOSE_PROCESSOR_INT4_PATH, - SCHP_PROCESSOR_ATR_INT4, - SCHP_PROCESSOR_LIP_INT4, - UNET_INT8_PATH, - ) - - pipeline, mask_processor, automasker = download_models() - if use_quantized_models.value: - pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_INT4_PATH, VAE_DECODER_INT4_PATH, UNET_INT8_PATH, vae_scaling_factor) - automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_INT4_PATH, SCHP_PROCESSOR_ATR_INT4, SCHP_PROCESSOR_LIP_INT4) - else: - pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH, vae_scaling_factor) - automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) output_dir = "output" demo = make_demo(pipeline, mask_processor, automasker, output_dir) try: - demo.launch(debug=True) + demo.launch(debug=False) except Exception: - demo.launch(debug=True, share=True) + demo.launch(debug=False, share=True) + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/clip-language-saliency-map-with-output.rst b/docs/notebooks/clip-language-saliency-map-with-output.rst index dd6f608429e1c5..3c19a581410863 100644 --- a/docs/notebooks/clip-language-saliency-map-with-output.rst +++ b/docs/notebooks/clip-language-saliency-map-with-output.rst @@ -122,7 +122,7 @@ Initial Implementation with Transformers and Pytorch # Install requirements %pip install -q "openvino>=2023.1.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "numpy<2" "torch>=2.1" "gradio>=4.19" "matplotlib>=3.4" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "numpy<2" "torch>=2.1" "gradio>=4.19" .. code:: ipython3 diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 3da831e6d9d0dd..fd572a83ffb834 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -729,7 +729,6 @@ up of the dynamic quantized models. Interactive demo ---------------- - Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index 400838fbded1ea..3ab43d897ea658 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -197,34 +197,16 @@ Prerequisites .. code:: ipython3 + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" + %pip install -q "diffusers>=0.14.0" "matplotlib>=3.4" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" + import requests - from pathlib import Path - utility_files = ["notebook_utils.py", "pip_helper.py"] - - for utility in utility_files: - if not Path(utility).exists(): - r = requests.get(f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{utility}") - with open(utility, "w") as f: - f.write(r.text) - - - from pip_helper import pip_install - - pip_install("torch>=2.1", "torchvision", "--extra-index-url", "https://download.pytorch.org/whl/cpu") - pip_install( - "diffusers>=0.14.0", - "matplotlib>=3.4", - "transformers>=4.30.2", - "controlnet-aux>=0.0.6", - "gradio>=3.36", - "datasets>=2.14.6", - "nncf>=2.7.0", - "opencv-python", - "--extra-index-url", - "https://download.pytorch.org/whl/cpu", + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - pip_install("openvino>=2023.1.0") + open("notebook_utils.py", "w").write(r.text) Instantiating Generation Pipeline --------------------------------- @@ -290,18 +272,14 @@ Now, let us check its result on example image: .. code:: ipython3 + import requests from PIL import Image import matplotlib.pyplot as plt import numpy as np - from notebook_utils import download_file - - example_url = "https://user-images.githubusercontent.com/29454499/224540208-c172c92a-9714-4a7b-857a-b1e54b4d4791.jpg" - image_path = Path("example_image.jpg") - if not image_path.exists(): - download_file(example_url, filename="example_image.jpg") - img = Image.open(image_path) + example_url = "https://user-images.githubusercontent.com/29454499/224540208-c172c92a-9714-4a7b-857a-b1e54b4d4791.jpg" + img = Image.open(requests.get(example_url, stream=True).raw) pose = pose_estimator(img) @@ -1461,12 +1439,10 @@ Let’s load ``skip magic`` extension to skip quantization if # Fetch `skip_kernel_extension` module import requests - - if not Path("skip_kernel_extension.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) int8_pipe = None diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index bc5a45f244e376..2baaf0043e7f04 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -184,10 +184,10 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-12-09 23:09:00.018226: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:09:00.080568: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:48:30.842642: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:48:30.876775: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:09:00.743048: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:48:31.539454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -660,7 +660,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-12-09 23:09:17.262024: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-11-04 22:48:47.716205: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 1204ea2c17f106..6e1c039f7013c6 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.5351971983909607 + Predicted Probability: 0.4661690592765808 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst index 68f51ad137ff16..b116f0e1f5cda1 100644 --- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst @@ -32,7 +32,7 @@ Prerequisites - ``requests`` - for getting books - ``pysbd`` - for splitting sentences -- ``transformers[torch]`` and ``openvino`` - for getting sentence +- ``transformers[torch]`` and ``openvino_dev`` - for getting sentence embeddings - ``seaborn`` - for alignment matrix visualization - ``ipywidgets`` - for displaying HTML and JS output in the notebook @@ -416,12 +416,12 @@ languages. It has the same architecture as the BERT model but has been trained on a different task: to produce identical embeddings for translation pairs. -|image02| +|image01| This makes LaBSE a great choice for our task and it can be reused for different language pairs still producing good results. -.. |image02| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/627d3a39-7076-479f-a7b1-392f49a0b83e +.. |image01| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/627d3a39-7076-479f-a7b1-392f49a0b83e .. code:: ipython3 diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index 853da533385284..30778bafc8e884 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -154,10 +154,10 @@ Imports .. parsed-literal:: - 2024-12-09 23:09:41.789833: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:09:41.824673: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:49:10.827255: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:49:10.861330: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:09:42.418712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:49:11.454332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -213,7 +213,7 @@ notebook `__. .. parsed-literal:: - unet_kits19_state_dict.pth: 0%| | 0.00/7.58M [00:00`__. .. parsed-literal:: - /tmp/ipykernel_2165966/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_503635/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -444,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -526,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 250458 / 262144 (95.5%) - Greatest absolute difference: 3.8674159049987793 at index (0, 0, 351, 76) (up to 1e-05 allowed) - Greatest relative difference: 12206.866810726728 at index (0, 0, 144, 31) (up to 1e-05 allowed) + Mismatched elements: 245783 / 262144 (93.8%) + Greatest absolute difference: 3.1180567741394043 at index (0, 0, 474, 435) (up to 1e-05 allowed) + Greatest relative difference: 16087.83647354372 at index (0, 0, 37, 224) (up to 1e-05 allowed) _check_trace( @@ -663,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.90 ms + [ INFO ] Read model took 8.85 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -677,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 264.91 ms + [ INFO ] Compile model took 253.47 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -714,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 48.49 ms + [ INFO ] First inference took 56.51 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 431 iterations - [ INFO ] Duration: 15002.34 ms + [ INFO ] Count: 406 iterations + [ INFO ] Duration: 15019.48 ms [ INFO ] Latency: - [ INFO ] Median: 34.52 ms - [ INFO ] Average: 34.59 ms - [ INFO ] Min: 34.20 ms - [ INFO ] Max: 36.19 ms - [ INFO ] Throughput: 28.73 FPS + [ INFO ] Median: 35.01 ms + [ INFO ] Average: 36.77 ms + [ INFO ] Min: 34.63 ms + [ INFO ] Max: 48.05 ms + [ INFO ] Throughput: 27.03 FPS .. code:: ipython3 @@ -750,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.56 ms + [ INFO ] Read model took 10.78 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -764,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 248.98 ms + [ INFO ] Compile model took 250.08 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -801,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.18 ms + [ INFO ] First inference took 29.09 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 908 iterations - [ INFO ] Duration: 15011.20 ms + [ INFO ] Count: 938 iterations + [ INFO ] Duration: 15008.12 ms [ INFO ] Latency: - [ INFO ] Median: 15.48 ms - [ INFO ] Average: 16.33 ms - [ INFO ] Min: 15.15 ms - [ INFO ] Max: 28.31 ms - [ INFO ] Throughput: 60.49 FPS + [ INFO ] Median: 15.77 ms + [ INFO ] Average: 15.80 ms + [ INFO ] Min: 15.47 ms + [ INFO ] Max: 17.13 ms + [ INFO ] Throughput: 62.50 FPS Visually Compare Inference Results @@ -905,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1733782265 + Visualizing results with seed 1730757034 @@ -989,7 +989,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.32 seconds, fps:29.70 + Total time for 68 frames: 2.36 seconds, fps:29.25 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index fc10c642d8d2a1..5aa37909b71cf7 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52955890ed558e516a361399057b8529ffd5103a7b63ed20a2549062b4d900b5 -size 386283 +oid sha256:894600de56af211d4cc3e64ee092b5a62d1b0158c51048d17accadddea0f046e +size 382725 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index 11b92fe4897f5e..409d2495e2fea6 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,9 +25,8 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! - -**Table of contents:** +🪄 Let’s start to explore magic of image colorization! #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ @@ -68,7 +67,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -86,42 +85,39 @@ Prerequisites .. code:: ipython3 + import sys from pathlib import Path import requests - - + + repo_dir = Path("DDColor") + + if not repo_dir.exists(): + !git clone https://github.com/piddnad/DDColor.git + + sys.path.append(str(repo_dir)) + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - - .. parsed-literal:: - 1491 - - - -.. code:: ipython3 - - from cmd_helper import clone_repo - - - clone_repo("https://github.com/piddnad/DDColor.git") + Cloning into 'DDColor'... + remote: Enumerating objects: 241, done. + remote: Counting objects: 100% (84/84), done. + remote: Compressing objects: 100% (49/49), done. + remote: Total 241 (delta 57), reused 37 (delta 35), pack-reused 157 (from 1) + Receiving objects: 100% (241/241), 14.10 MiB | 21.95 MiB/s, done. + Resolving deltas: 100% (83/83), done. .. parsed-literal:: - PosixPath('DDColor') + 24692 @@ -135,7 +131,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -153,14 +149,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -173,18 +169,18 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_9_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_8_0.png @@ -196,7 +192,7 @@ Run PyTorch model inference -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_10_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_9_0.png @@ -217,9 +213,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -234,11 +230,11 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 from notebook_utils import device_widget - + core = ov.Core() - + device = device_widget() - + device @@ -260,36 +256,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -300,7 +296,7 @@ Select one of supported devices for inference using dropdown list. -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_17_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_16_0.png @@ -328,7 +324,7 @@ improve model inference speed. .. code:: ipython3 from notebook_utils import quantization_widget - + to_quantize = quantization_widget() to_quantize @@ -344,15 +340,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -367,12 +363,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -384,7 +380,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -396,9 +392,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -416,10 +412,10 @@ Perform model quantization .. parsed-literal:: - 2024-12-09 23:13:28.920989: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:13:28.960154: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:52:53.152561: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:52:53.191342: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:13:29.365051: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:52:53.595160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -456,7 +452,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -465,7 +461,7 @@ Run INT8 model inference -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_26_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_25_0.png Compare FP16 and INT8 model size @@ -476,9 +472,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -517,17 +513,17 @@ Tool =2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" "matplotlib>=3.4" - %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" gradio_imageslider - %pip install -q torch torchvision "opencv-python" huggingface_hub --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" + %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" if platform.python_version_tuple()[1] in ["8", "9"]: %pip install -q "gradio-imageslider<=0.0.17" "typing-extensions>=4.9.0" @@ -146,7 +131,7 @@ attention optimizations first. .. code:: ipython3 - attention_file_path = Path("./Depth-Anything-V2/depth_anything_v2/dinov2_layers/attention.py") + attention_file_path = Path("./depth_anything_v2/dinov2_layers/attention.py") orig_attention_path = attention_file_path.parent / ("orig_" + attention_file_path.name) if not orig_attention_path.exists(): @@ -167,8 +152,14 @@ Prepare input data from PIL import Image - from notebook_utils import download_file, device_widget, quantization_widget + import requests + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, device_widget, quantization_widget download_file( "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", @@ -186,7 +177,7 @@ Prepare input data -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_9_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_8_1.png @@ -247,7 +238,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_2168902/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_506168/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -279,12 +270,12 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_15_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_14_1.png Convert Model to OpenVINO IR format @@ -313,13 +304,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -411,12 +402,12 @@ range. .. parsed-literal:: - + -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_25_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_24_1.png Run inference on video @@ -426,13 +417,12 @@ Run inference on video .. code:: ipython3 - VIDEO_FILE = "./Coco Walking in Berkeley.mp4" - download_file( "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", - VIDEO_FILE, + "./Coco Walking in Berkeley.mp4", ) + VIDEO_FILE = "./Coco Walking in Berkeley.mp4" # Number of seconds of input video to process. Set `NUM_SECONDS` to 0 to process # the full video. NUM_SECONDS = 4 @@ -634,7 +624,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.15 seconds. Total FPS (including video processing): 4.56.Inference FPS: 10.69 + Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.68 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -661,7 +651,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -719,11 +709,10 @@ improve model inference speed. .. code:: ipython3 # Fetch `skip_kernel_extension` module - if not Path("skip_kernel_extension.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) OV_DEPTH_ANYTHING_INT8_PATH = Path(f"{model_id}_int8.xml") @@ -795,10 +784,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-12-09 23:21:25.394147: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:21:25.427427: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:01:18.047102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:01:18.080343: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:21:26.001101: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:01:18.654050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -910,7 +899,7 @@ data. -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_44_0.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_43_0.png .. code:: ipython3 @@ -924,10 +913,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.15 + Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.12 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -1008,8 +997,8 @@ Tool =2023.3.0" "datasets>=2.14.6" "nncf" "tqdm" %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" "matplotlib>=3.4" - %pip install -q torch torchvision "opencv-python" huggingface_hub --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" if platform.python_version_tuple()[1] in ["8", "9"]: %pip install -q "gradio-imageslider<=0.0.17" "typing-extensions>=4.9.0" .. parsed-literal:: + Cloning into 'Depth-Anything'... + remote: Enumerating objects: 441, done. + remote: Counting objects: 100% (161/161), done. + remote: Compressing objects: 100% (120/120), done. + remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) + Receiving objects: 100% (441/441), 237.90 MiB | 24.22 MiB/s, done. + Resolving deltas: 100% (158/158), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. + WARNING: typer 0.12.5 does not provide the extra 'all' Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -131,9 +110,6 @@ attention optimizations first. .. code:: ipython3 - from pathlib import Path - - attention_file_path = Path("./torchhub/facebookresearch_dinov2_main/dinov2/layers/attention.py") orig_attention_path = attention_file_path.parent / ("orig_" + attention_file_path.name) @@ -180,16 +156,15 @@ Prepare input data from PIL import Image + import requests r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - open("notebook_utils.py", "w").write(r.text) - + open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget - download_file( "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", "furseal.png", @@ -206,7 +181,7 @@ Prepare input data -.. image:: depth-anything-with-output_files/depth-anything-with-output_11_1.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_9_1.png @@ -280,7 +255,7 @@ image size and prepare it for visualization. -.. image:: depth-anything-with-output_files/depth-anything-with-output_18_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_16_0.png Convert Model to OpenVINO IR format @@ -309,13 +284,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -381,7 +356,7 @@ Run inference on image -.. image:: depth-anything-with-output_files/depth-anything-with-output_27_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_25_0.png Run inference on video @@ -598,7 +573,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.11 seconds. Total FPS (including video processing): 4.58.Inference FPS: 10.66 + Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.62 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -625,7 +600,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -758,10 +733,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-12-09 23:30:13.046257: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:30:13.081039: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:10:13.897258: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:10:13.929954: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:30:13.679496: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:10:14.502746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -873,7 +848,7 @@ data. -.. image:: depth-anything-with-output_files/depth-anything-with-output_46_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_44_0.png .. code:: ipython3 @@ -887,10 +862,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.65 seconds. Total FPS (including video processing): 4.74.Inference FPS: 12.89 + Processed 60 frames in 12.75 seconds. Total FPS (including video processing): 4.70.Inference FPS: 12.76 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -970,9 +945,9 @@ Tool =4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) - Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.44.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting git+https://github.com/facebookresearch/detectron2.git - Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-89enhchj + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-9ds1xx43 .. parsed-literal:: - Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-89enhchj + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-9ds1xx43 .. parsed-literal:: - Resolved https://github.com/facebookresearch/detectron2.git to commit c69939aa85460e8135f40bce908a6cddaa73065f + Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' - Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) - Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) - Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) - Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) + Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) Collecting hydra-core>=1.1 (from detectron2==0.6) Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) Using cached https://download.pytorch.org/whl/iopath-0.1.9-py3-none-any.whl (27 kB) - Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) Collecting omegaconf<2.4,>=2.1 (from detectron2==0.6) Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.2) - Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) - Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) - Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) - Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) - Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.67.1) - Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (3.0.0) - Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.55.2) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) - Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) - Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) - Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) - Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) - Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) - Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) - Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.2.1) - Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) - Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) - Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.68.1) - Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.36.0) - Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) - Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) - Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) - Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) - Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) - Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) - Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) - Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.45.1) - Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) - Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) - Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) - Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) - Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.17.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) - Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) - Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) + Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.6) + Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.10.1) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.1) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB) Building wheels for collected packages: detectron2 Building wheel for detectron2 (setup.py): started Building wheel for detectron2 (setup.py): finished with status 'done' - Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313251 sha256=a744a8ccf54176a60e63af7e14e6a7f431f5b19935a3c1260a7d39f7a7f84bc8 - Stored in directory: /tmp/pip-ephem-wheel-cache-cb2ga2gq/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313237 sha256=7cd84a15a89de76a7ab5b648f2fb7ebff63b7e43ffc90c7f19a568d16858de8a + Stored in directory: /tmp/pip-ephem-wheel-cache-uvptv5zg/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 Successfully built detectron2 Installing collected packages: omegaconf, iopath, hydra-core, detectron2 Attempting uninstall: omegaconf @@ -203,10 +203,10 @@ Install required packages for running model Uninstalling iopath-0.1.10: Successfully uninstalled iopath-0.1.10 Successfully installed detectron2-0.6 hydra-core-1.3.2 iopath-0.1.9 omegaconf-2.3.0 - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.5.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.2) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index 9ffd8dbc558859..f5b1d98eea3213 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec1aaa179217e234b7c93d22f9da2f1ac0281e5bf8e4271c4094c3d680793782 -size 58047 +oid sha256:0df4e94924f81aab66086702d85a461f463078f0d06f67b1fe5d46ad8480aa91 +size 58652 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 0c626a2f115cc3..f676b44edd1d9a 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ae2588579f79d5d3e23a9fd9870f28f3bd063b9166da901cd639f16f0f04fca -size 508747 +oid sha256:b5a857cd060d740290ccc65aec47252aad9f41c665dc2808195c3185248977e8 +size 509376 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index 6063ffe4fca6ec..67719cdcbd66b0 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d414af832026a73bf7d8a8165a202c499989ddbc4db0826e6e0ca1951b2b4605 -size 54234 +oid sha256:ddc40900fddf1a115903c4e200899306060114348bf2ca82fbb4d7d92a885b09 +size 53897 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index 656018b2fa8884..af63ef41697b47 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8aa8664400b8c51d604d23e669d6e44f766d4eb6b9958d38f4757d5e1cbefe88 -size 457666 +oid sha256:d1276209027e5aac72e4bb6f39f4494d2a807ee4bd85054a1285b0832e4515b9 +size 460797 diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst new file mode 100644 index 00000000000000..862079f68aeeb7 --- /dev/null +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -0,0 +1,338 @@ +Sentiment Analysis with OpenVINO™ +================================= + +**Sentiment analysis** is the use of natural language processing, text +analysis, computational linguistics, and biometrics to systematically +identify, extract, quantify, and study affective states and subjective +information. This notebook demonstrates how to convert and run a +sequence classification model using OpenVINO. + + +**Table of contents:** + + +- `Imports <#imports>`__ +- `Initializing the Model <#initializing-the-model>`__ +- `Initializing the Tokenizer <#initializing-the-tokenizer>`__ +- `Convert Model to OpenVINO Intermediate Representation + format <#convert-model-to-openvino-intermediate-representation-format>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Inference <#inference>`__ + + - `For a single input sentence <#for-a-single-input-sentence>`__ + - `Read from a text file <#read-from-a-text-file>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Imports +------- + + + +.. code:: ipython3 + + %pip install "openvino>=2023.1.0" transformers "torch>=2.1" tqdm --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.46.1) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.6) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.2) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import warnings + from pathlib import Path + import time + from transformers import AutoModelForSequenceClassification, AutoTokenizer + import numpy as np + import openvino as ov + +.. code:: ipython3 + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, device_widget + +Initializing the Model +---------------------- + + + +We will use the transformer-based `DistilBERT base uncased finetuned +SST-2 `__ +model from Hugging Face. + +.. code:: ipython3 + + checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" + model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=checkpoint) + + +.. parsed-literal:: + + 2024-11-04 23:18:47.102633: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:18:47.135966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-04 23:18:47.793551: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +Initializing the Tokenizer +-------------------------- + + + +Text Preprocessing cleans the text-based input data so it can be fed +into the model. +`Tokenization `__ +splits paragraphs and sentences into smaller units that can be more +easily assigned meaning. It involves cleaning the data and assigning +tokens or IDs to the words, so they are represented in a vector space +where similar words have similar vectors. This helps the model +understand the context of a sentence. Here, we will use +`AutoTokenizer `__ +- a pre-trained tokenizer from Hugging Face: + +.. code:: ipython3 + + tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint) + +Convert Model to OpenVINO Intermediate Representation format +------------------------------------------------------------ + + + +`Model conversion +API `__ +facilitates the transition between training and deployment environments, +performs static model analysis, and adjusts deep learning models for +optimal execution on end-point target devices. + +.. code:: ipython3 + + import torch + + ir_xml_name = checkpoint + ".xml" + MODEL_DIR = "model/" + ir_xml_path = Path(MODEL_DIR) / ir_xml_name + + MAX_SEQ_LENGTH = 128 + input_info = [ + (ov.PartialShape([1, -1]), ov.Type.i64), + (ov.PartialShape([1, -1]), ov.Type.i64), + ] + default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64) + inputs = { + "input_ids": default_input, + "attention_mask": default_input, + } + + ov_model = ov.convert_model(model, input=input_info, example_input=inputs) + ov.save_model(ov_model, ir_xml_path) + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + + +OpenVINO™ Runtime uses the `Infer +Request `__ +mechanism which enables running models on different devices in +asynchronous or synchronous manners. The model graph is sent as an +argument to the OpenVINO API and an inference request is created. The +default inference mode is AUTO but it can be changed according to +requirements and hardware available. You can explore the different +inference modes and their usage `in +documentation. `__ + +.. code:: ipython3 + + core = ov.Core() + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + warnings.filterwarnings("ignore") + compiled_model = core.compile_model(ov_model, device.value) + infer_request = compiled_model.create_infer_request() + +.. code:: ipython3 + + def softmax(x): + """ + Defining a softmax function to extract + the prediction from the output of the IR format + Parameters: Logits array + Returns: Probabilities + """ + + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() + +Inference +--------- + + + +.. code:: ipython3 + + def infer(input_text): + """ + Creating a generic inference function + to read the input and infer the result + into 2 classes: Positive or Negative. + Parameters: Text to be processed + Returns: Label: Positive or Negative. + """ + + input_text = tokenizer( + input_text, + truncation=True, + return_tensors="np", + ) + inputs = dict(input_text) + label = {0: "NEGATIVE", 1: "POSITIVE"} + result = infer_request.infer(inputs=inputs) + for i in result.values(): + probability = np.argmax(softmax(i)) + return label[probability] + +For a single input sentence +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + input_text = "I had a wonderful day" + start_time = time.perf_counter() + result = infer(input_text) + end_time = time.perf_counter() + total_time = end_time - start_time + print("Label: ", result) + print("Total Time: ", "%.2f" % total_time, " seconds") + + +.. parsed-literal:: + + Label: POSITIVE + Total Time: 0.03 seconds + + +Read from a text file +~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + # Download the text from the openvino_notebooks storage + vocab_file_path = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/food_reviews.txt", + directory="data", + ) + + + +.. parsed-literal:: + + data/food_reviews.txt: 0%| | 0.00/71.0 [00:00`__) and +techniques such as `InstructGPT `__ +has been the core foundation of breakthroughs such as ChatGPT and GPT-4. +However, these powerful models remain hidden behind APIs and we know +very little about their underlying architecture. Instruction-following +models are capable of generating text in response to prompts and are +often used for tasks like writing assistance, chatbots, and content +generation. Many users now interact with these models regularly and even +use them for work but the majority of such models remain closed-source +and require massive amounts of computational resources to experiment +with. + +`Dolly +2.0 `__ +is the first open-source, instruction-following LLM fine-tuned by +Databricks on a transparent and freely available dataset that is also +open-sourced to use for commercial purposes. That means Dolly 2.0 is +available for commercial applications without the need to pay for API +access or share data with third parties. Dolly 2.0 exhibits similar +characteristics so ChatGPT despite being much smaller. + +In this tutorial, we consider how to run an instruction-following text +generation pipeline using Dolly 2.0 and OpenVINO. We will use a +pre-trained model from the `Hugging Face +Transformers `__ +library. To simplify the user experience, the `Hugging Face Optimum +Intel `__ library is +used to convert the models to OpenVINO™ IR format. + +The tutorial consists of the following steps: + +- Install prerequisites +- Download and convert the model from a public source using the + `OpenVINO integration with Hugging Face + Optimum `__. +- Compress model weights to INT8 with `OpenVINO + NNCF `__ +- Create an instruction-following inference pipeline +- Run instruction-following pipeline + +About Dolly 2.0 +--------------- + +Dolly 2.0 is an instruction-following large language model trained on +the Databricks machine-learning platform that is licensed for commercial +use. It is based on `Pythia `__ +and is trained on ~15k instruction/response fine-tuning records +generated by Databricks employees in various capability domains, +including brainstorming, classification, closed QA, generation, +information extraction, open QA, and summarization. Dolly 2.0 works by +processing natural language instructions and generating responses that +follow the given instructions. It can be used for a wide range of +applications, including closed question-answering, summarization, and +generation. + +The model training process was inspired by +`InstructGPT `__. To train InstructGPT +models, the core technique is reinforcement learning from human feedback +(RLHF), This technique uses human preferences as a reward signal to +fine-tune models, which is important as the safety and alignment +problems required to be solved are complex and subjective, and aren’t +fully captured by simple automatic metrics. More details about the +InstructGPT approach can be found in OpenAI `blog +post `__ The +breakthrough discovered with InstructGPT is that language models don’t +need larger and larger training sets. By using human-evaluated +question-and-answer training, authors were able to train a better +language model using one hundred times fewer parameters than the +previous model. Databricks used a similar approach to create a prompt +and response dataset called they call +`databricks-dolly-15k `__, +a corpus of more than 15,000 records generated by thousands of +Databricks employees to enable large language models to exhibit the +magical interactivity of InstructGPT. More details about the model and +dataset can be found in `Databricks blog +post `__ +and `repo `__ + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert model using Optimum-CLI + tool <#convert-model-using-optimum-cli-tool>`__ +- `Compress model weights <#compress-model-weights>`__ + + - `Weights Compression using + Optimum-CLI <#weights-compression-using-optimum-cli>`__ + +- `Select model variant and inference + device <#select-model-variant-and-inference-device>`__ +- `Instantiate Model using Optimum + Intel <#instantiate-model-using-optimum-intel>`__ +- `Create an instruction-following inference + pipeline <#create-an-instruction-following-inference-pipeline>`__ + + - `Setup imports <#setup-imports>`__ + - `Prepare template for user + prompt <#prepare-template-for-user-prompt>`__ + - `Helpers for output parsing <#helpers-for-output-parsing>`__ + - `Main generation function <#main-generation-function>`__ + - `Helpers for application <#helpers-for-application>`__ + +- `Run instruction-following + pipeline <#run-instruction-following-pipeline>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +First, we should install the `Hugging Face +Optimum `__ library +accelerated by OpenVINO integration. The Hugging Face Optimum Intel API +is a high-level API that enables us to convert and quantize models from +the Hugging Face Transformers library to the OpenVINO™ IR format. For +more details, refer to the `Hugging Face Optimum Intel +documentation `__. + +.. code:: ipython3 + + import os + from pathlib import Path + import requests + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + %pip uninstall -q -y optimum optimum-intel + %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" "onnx<1.16.2" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + + + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) + +Convert model using Optimum-CLI tool +------------------------------------ + + + +`Optimum Intel `__ is +the interface between the +`Transformers `__ and +`Diffusers `__ libraries +and OpenVINO to accelerate end-to-end pipelines on Intel architectures. +It provides ease-to-use cli interface for exporting models to `OpenVINO +Intermediate Representation +(IR) `__ +format. + +The command bellow demonstrates basic command for model export with +``optimum-cli`` + +.. code:: bash + + optimum-cli export openvino --model --task + +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For LLMs it will be +``text-generation-with-past``. If model initialization requires to use +remote code, ``--trust-remote-code`` flag additionally should be passed. + +Compress model weights +---------------------- + + + +The `Weights +Compression `__ +algorithm is aimed at compressing the weights of the models and can be +used to optimize the model footprint and performance of large models +where the size of weights is relatively larger than the size of +activations, for example, Large Language Models (LLM). Compared to INT8 +compression, INT4 compression improves performance even more, but +introduces a minor drop in prediction quality. + +Weights Compression using Optimum-CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +You can also apply fp16, 8-bit or 4-bit weight compression on the +Linear, Convolutional and Embedding layers when exporting your model +with the CLI by setting ``--weight-format`` to respectively fp16, int8 +or int4. This type of optimization allows to reduce the memory footprint +and inference latency. By default the quantization scheme for int8/int4 +will be +`asymmetric `__, +to make it +`symmetric `__ +you can add ``--sym``. + +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. + +Smaller group_size and ratio values usually improve accuracy at the +sacrifice of the model size and inference latency. + + **Note**: There may be no speedup for INT4/INT8 compressed models on + dGPU. + +.. code:: ipython3 + + from IPython.display import display + import ipywidgets as widgets + + prepare_int4_model = widgets.Checkbox( + value=True, + description="Prepare INT4 model", + disabled=False, + ) + prepare_int8_model = widgets.Checkbox( + value=False, + description="Prepare INT8 model", + disabled=False, + ) + prepare_fp16_model = widgets.Checkbox( + value=False, + description="Prepare FP16 model", + disabled=False, + ) + + display(prepare_int4_model) + display(prepare_int8_model) + display(prepare_fp16_model) + + + +.. parsed-literal:: + + Checkbox(value=True, description='Prepare INT4 model') + + + +.. parsed-literal:: + + Checkbox(value=False, description='Prepare INT8 model') + + + +.. parsed-literal:: + + Checkbox(value=False, description='Prepare FP16 model') + + +.. code:: ipython3 + + from pathlib import Path + from cmd_helper import optimum_cli + + model_id = "databricks/dolly-v2-3b" + model_path = Path("dolly-v2-3b") + + fp16_model_dir = model_path / "FP16" + int8_model_dir = model_path / "INT8_compressed_weights" + int4_model_dir = model_path / "INT4_compressed_weights" + + + def convert_to_fp16(): + if (fp16_model_dir / "openvino_model.xml").exists(): + return + optimum_cli(model_id, fp16_model_dir, additional_args={"weight-format": "fp16"}) + + + def convert_to_int8(): + if (int8_model_dir / "openvino_model.xml").exists(): + return + optimum_cli(model_id, int8_model_dir, additional_args={"weight-format": "int8"}) + + + def convert_to_int4(): + if (int4_model_dir / "openvino_model.xml").exists(): + return + optimum_cli(model_id, int4_model_dir, additional_args={"weight-format": "int4"}) + + + if prepare_fp16_model.value: + convert_to_fp16() + if prepare_int8_model.value: + convert_to_int8() + if prepare_int4_model.value: + convert_to_int4() + + + +**Export command:** + + + +``optimum-cli export openvino --model databricks/dolly-v2-3b --task text-generation-with-past --weight-format int4 --ratio 1.0 --group-size 128 dolly-v2-3b/INT4_compressed_weights`` + + +.. parsed-literal:: + + 2024-07-24 11:40:56.083018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-07-24 11:40:56.084962: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-07-24 11:40:56.121994: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-07-24 11:40:56.122347: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-07-24 11:40:56.845683: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( + WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for: + PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.1+cpu) + Python 3.8.18 (you have 3.8.10) + Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers) + Memory-efficient attention, SwiGLU, sparse and more won't be available. + Set XFORMERS_MORE_DETAILS=1 for more details + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. + warn("The installed version of bitsandbytes was compiled without GPU support. " + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + Framework not specified. Using pt to export the model. + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Using framework PyTorch: 2.3.1+cpu + Overriding 1 configuration item(s) + - use_cache -> True + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:934: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert batch_size > 0, "batch_size has to be defined and > 0" + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal: + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:304: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:617: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.max_seq_len_cached: + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 9% (2 / 130) │ 0% (0 / 128) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 91% (128 / 130) │ 100% (128 / 128) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━ 100% 130/130 • 0:01:38 • 0:00:00;0;104;181m0:00:01181m0:00:04 + + +.. code:: ipython3 + + fp16_weights = fp16_model_dir / "openvino_model.bin" + int8_weights = int8_model_dir / "openvino_model.bin" + int4_weights = int4_model_dir / "openvino_model.bin" + + if fp16_weights.exists(): + print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") + for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): + if compressed_weights.exists(): + print(f"Size of model with INT{precision} compressed weights is {compressed_weights.stat().st_size / 1024 / 1024:.2f} MB") + if compressed_weights.exists() and fp16_weights.exists(): + print(f"Compression rate for INT{precision} model: {fp16_weights.stat().st_size / compressed_weights.stat().st_size:.3f}") + + +.. parsed-literal:: + + Size of model with INT4 compressed weights is 1497.06 MB + + +Select model variant and inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + available_models = [] + if int4_model_dir.exists(): + available_models.append("INT4") + if int8_model_dir.exists(): + available_models.append("INT8") + if fp16_model_dir.exists(): + available_models.append("FP16") + + model_to_run = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Model to run:", + disabled=False, + ) + + model_to_run + + + + +.. parsed-literal:: + + Dropdown(description='Model to run:', options=('INT4',), value='INT4') + + + +.. code:: ipython3 + + from notebook_utils import device_widget + import openvino as ov + + core = ov.Core() + + device = device_widget("CPU", exclude=["NPU"]) + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Instantiate Model using Optimum Intel +------------------------------------- + + + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Below is an example of the Dolly model + +.. code:: diff + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + model_id = "databricks/dolly-v2-3b" + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + +Model class initialization starts with calling ``from_pretrained`` +method. When downloading and converting Transformers model, the +parameter ``export=True`` should be added (as we already converted model +before, we do not need to provide this parameter). We can save the +converted model for the next usage with the ``save_pretrained`` method. +Tokenizer class and pipelines API are compatible with Optimum models. + +You can find more details about OpenVINO LLM inference using HuggingFace +Optimum API in `LLM inference +guide `__. + +.. code:: ipython3 + + from pathlib import Path + + from transformers import AutoTokenizer + from optimum.intel.openvino import OVModelForCausalLM + + import openvino.properties as props + import openvino.properties.hint as hints + import openvino.properties.streams as streams + + + if model_to_run.value == "INT4": + model_dir = int4_model_dir + elif model_to_run.value == "INT8": + model_dir = int8_model_dir + else: + model_dir = fp16_model_dir + print(f"Loading model from {model_dir}") + + tokenizer = AutoTokenizer.from_pretrained(model_dir) + + current_device = device.value + + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} + + ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config) + + +.. parsed-literal:: + + 2024-07-24 11:43:17.404362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-07-24 11:43:17.406313: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-07-24 11:43:17.443348: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-07-24 11:43:17.444995: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-07-24 11:43:18.193758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? + warn( + WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for: + PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.1+cpu) + Python 3.8.18 (you have 3.8.10) + Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers) + Memory-efficient attention, SwiGLU, sparse and more won't be available. + Set XFORMERS_MORE_DETAILS=1 for more details + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. + warn("The installed version of bitsandbytes was compiled without GPU support. " + + +.. parsed-literal:: + + /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 + Loading model from dolly-v2-3b/INT4_compressed_weights + + +.. parsed-literal:: + + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Compiling the model to CPU ... + + +Create an instruction-following inference pipeline +-------------------------------------------------- + + + +The ``run_generation`` function accepts user-provided text input, +tokenizes it, and runs the generation process. Text generation is an +iterative process, where each next token depends on previously generated +until a maximum number of tokens or stop generation condition is not +reached. To obtain intermediate generation results without waiting until +when generation is finished, we will use +`TextIteratorStreamer `__, +provided as part of HuggingFace `Streaming +API `__. + +The diagram below illustrates how the instruction-following pipeline +works + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/e881f4a4-fcc8-427a-afe1-7dd80aebd66e + :alt: generation pipeline) + + generation pipeline) + +As can be seen, on the first iteration, the user provided instructions +converted to token ids using a tokenizer, then prepared input provided +to the model. The model generates probabilities for all tokens in logits +format The way the next token will be selected over predicted +probabilities is driven by the selected decoding methodology. You can +find more information about the most popular decoding methods in this +`blog `__. + +There are several parameters that can control text generation quality: + +- | ``Temperature`` is a parameter used to control the level of + creativity in AI-generated text. By adjusting the ``temperature``, + you can influence the AI model’s probability distribution, making + the text more focused or diverse. + | Consider the following example: The AI model has to complete the + sentence “The cat is \____.” with the following token + probabilities: + + | playing: 0.5 + | sleeping: 0.25 + | eating: 0.15 + | driving: 0.05 + | flying: 0.05 + + - **Low temperature** (e.g., 0.2): The AI model becomes more focused + and deterministic, choosing tokens with the highest probability, + such as “playing.” + - **Medium temperature** (e.g., 1.0): The AI model maintains a + balance between creativity and focus, selecting tokens based on + their probabilities without significant bias, such as “playing,” + “sleeping,” or “eating.” + - **High temperature** (e.g., 2.0): The AI model becomes more + adventurous, increasing the chances of selecting less likely + tokens, such as “driving” and “flying.” + +- ``Top-p``, also known as nucleus sampling, is a parameter used to + control the range of tokens considered by the AI model based on their + cumulative probability. By adjusting the ``top-p`` value, you can + influence the AI model’s token selection, making it more focused or + diverse. Using the same example with the cat, consider the following + top_p settings: + + - **Low top_p** (e.g., 0.5): The AI model considers only tokens with + the highest cumulative probability, such as “playing.” + - **Medium top_p** (e.g., 0.8): The AI model considers tokens with a + higher cumulative probability, such as “playing,” “sleeping,” and + “eating.” + - **High top_p** (e.g., 1.0): The AI model considers all tokens, + including those with lower probabilities, such as “driving” and + “flying.” + +- ``Top-k`` is another popular sampling strategy. In comparison with + Top-P, which chooses from the smallest possible set of words whose + cumulative probability exceeds the probability P, in Top-K sampling K + most likely next words are filtered and the probability mass is + redistributed among only those K next words. In our example with cat, + if k=3, then only “playing”, “sleeping” and “eating” will be taken + into account as possible next word. + +To optimize the generation process and use memory more efficiently, the +``use_cache=True`` option is enabled. Since the output side is +auto-regressive, an output token hidden state remains the same once +computed for every further generation step. Therefore, recomputing it +every time you want to generate a new token seems wasteful. With the +cache, the model saves the hidden state once it has been computed. The +model only computes the one for the most recently generated output token +at each time step, re-using the saved ones for hidden tokens. This +reduces the generation complexity from O(n^3) to O(n^2) for a +transformer model. More details about how it works can be found in this +`article `__. +With this option, the model gets the previous step’s hidden states +(cached attention keys and values) as input and additionally provides +hidden states for the current step as output. It means for all next +iterations, it is enough to provide only a new token obtained from the +previous step and cached key values to get the next token prediction. + +The generation cycle repeats until the end of the sequence token is +reached or it also can be interrupted when maximum tokens will be +generated. As already mentioned before, we can enable printing current +generated tokens without waiting until when the whole generation is +finished using Streaming API, it adds a new token to the output queue +and then prints them when they are ready. + +Setup imports +~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from threading import Thread + from time import perf_counter + from typing import List + import gradio as gr + from transformers import AutoTokenizer, TextIteratorStreamer + import numpy as np + +Prepare template for user prompt +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For effective generation, model expects to have input in specific +format. The code below prepare template for passing user instruction +into model with providing additional context. + +.. code:: ipython3 + + INSTRUCTION_KEY = "### Instruction:" + RESPONSE_KEY = "### Response:" + END_KEY = "### End" + INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + + # This is the prompt that is used for generating responses using an already trained model. It ends with the response + # key, where the job of the model is to provide the completion that follows it (i.e. the response itself). + PROMPT_FOR_GENERATION_FORMAT = """{intro} + + {instruction_key} + {instruction} + + {response_key} + """.format( + intro=INTRO_BLURB, + instruction_key=INSTRUCTION_KEY, + instruction="{instruction}", + response_key=RESPONSE_KEY, + ) + +Helpers for output parsing +~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Model was retrained to finish generation using special token ``### End`` +the code below find its id for using it as generation stop-criteria. + +.. code:: ipython3 + + def get_special_token_id(tokenizer: AutoTokenizer, key: str) -> int: + """ + Gets the token ID for a given string that has been added to the tokenizer as a special token. + + When training, we configure the tokenizer so that the sequences like "### Instruction:" and "### End" are + treated specially and converted to a single, new token. This retrieves the token ID each of these keys map to. + + Args: + tokenizer (PreTrainedTokenizer): the tokenizer + key (str): the key to convert to a single token + + Raises: + RuntimeError: if more than one ID was generated + + Returns: + int: the token ID for the given key + """ + token_ids = tokenizer.encode(key) + if len(token_ids) > 1: + raise ValueError(f"Expected only a single token for '{key}' but found {token_ids}") + return token_ids[0] + + + tokenizer_response_key = next( + (token for token in tokenizer.additional_special_tokens if token.startswith(RESPONSE_KEY)), + None, + ) + + end_key_token_id = None + if tokenizer_response_key: + try: + end_key_token_id = get_special_token_id(tokenizer, END_KEY) + # Ensure generation stops once it generates "### End" + except ValueError: + pass + +Main generation function +~~~~~~~~~~~~~~~~~~~~~~~~ + + + +As it was discussed above, ``run_generation`` function is the entry +point for starting generation. It gets provided input instruction as +parameter and returns model response. + +.. code:: ipython3 + + def run_generation( + user_text: str, + top_p: float, + temperature: float, + top_k: int, + max_new_tokens: int, + perf_text: str, + ): + """ + Text generation function + + Parameters: + user_text (str): User-provided instruction for a generation. + top_p (float): Nucleus sampling. If set to < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for a generation. + temperature (float): The value used to module the logits distribution. + top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering. + max_new_tokens (int): Maximum length of generated sequence. + perf_text (str): Content of text field for printing performance results. + Returns: + model_output (str) - model-generated text + perf_text (str) - updated perf text filed content + """ + + # Prepare input prompt according to model expected template + prompt_text = PROMPT_FOR_GENERATION_FORMAT.format(instruction=user_text) + + # Tokenize the user text. + model_inputs = tokenizer(prompt_text, return_tensors="pt") + + # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer + # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread. + streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + generate_kwargs = dict( + model_inputs, + streamer=streamer, + max_new_tokens=max_new_tokens, + do_sample=True, + top_p=top_p, + temperature=float(temperature), + top_k=top_k, + eos_token_id=end_key_token_id, + ) + t = Thread(target=ov_model.generate, kwargs=generate_kwargs) + t.start() + + # Pull the generated text from the streamer, and update the model output. + model_output = "" + per_token_time = [] + num_tokens = 0 + start = perf_counter() + for new_text in streamer: + current_time = perf_counter() - start + model_output += new_text + perf_text, num_tokens = estimate_latency(current_time, perf_text, new_text, per_token_time, num_tokens) + yield model_output, perf_text + start = perf_counter() + return model_output, perf_text + +Helpers for application +~~~~~~~~~~~~~~~~~~~~~~~ + + + +For making interactive user interface we will use Gradio library. The +code bellow provides useful functions used for communication with UI +elements. + +.. code:: ipython3 + + def estimate_latency( + current_time: float, + current_perf_text: str, + new_gen_text: str, + per_token_time: List[float], + num_tokens: int, + ): + """ + Helper function for performance estimation + + Parameters: + current_time (float): This step time in seconds. + current_perf_text (str): Current content of performance UI field. + new_gen_text (str): New generated text. + per_token_time (List[float]): history of performance from previous steps. + num_tokens (int): Total number of generated tokens. + + Returns: + update for performance text field + update for a total number of tokens + """ + num_current_toks = len(tokenizer.encode(new_gen_text)) + num_tokens += num_current_toks + per_token_time.append(num_current_toks / current_time) + if len(per_token_time) > 10 and len(per_token_time) % 4 == 0: + current_bucket = per_token_time[:-10] + return ( + f"Average generation speed: {np.mean(current_bucket):.2f} tokens/s. Total generated tokens: {num_tokens}", + num_tokens, + ) + return current_perf_text, num_tokens + + + def select_device(device_str: str, current_text: str = "", progress: gr.Progress = gr.Progress()): + """ + Helper function for uploading model on the device. + + Parameters: + device_str (str): Device name. + current_text (str): Current content of user instruction field (used only for backup purposes, temporally replacing it on the progress bar during model loading). + progress (gr.Progress): gradio progress tracker + Returns: + current_text + """ + if device_str != ov_model._device: + ov_model.request = None + ov_model._device = device_str + + for i in progress.tqdm(range(1), desc=f"Model loading on {device_str}"): + ov_model.compile() + return current_text + +Run instruction-following pipeline +---------------------------------- + + + +Now, we are ready to explore model capabilities. This demo provides a +simple interface that allows communication with a model using text +instruction. Type your instruction into the ``User instruction`` field +or select one from predefined examples and click on the ``Submit`` +button to start generation. Additionally, you can modify advanced +generation parameters: + +- ``Device`` - allows switching inference device. Please note, every + time when new device is selected, model will be recompiled and this + takes some time. +- ``Max New Tokens`` - maximum size of generated text. +- ``Top-p (nucleus sampling)`` - if set to < 1, only the smallest set + of most probable tokens with probabilities that add up to top_p or + higher are kept for a generation. +- ``Top-k`` - the number of highest probability vocabulary tokens to + keep for top-k-filtering. +- ``Temperature`` - the value used to module the logits distribution. + +.. code:: ipython3 + + import requests + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/dolly-2-instruction-following/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo + + demo = make_demo(run_fn=run_generation, select_device_fn=select_device) + + try: + demo.queue().launch(debug=False, height=800) + except Exception: + demo.queue().launch(debug=False, share=True, height=800) + # If you are launching remotely, specify server_name and server_port + # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` + # To learn more please refer to the Gradio docs: https://gradio.app/docs/ diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index fac46c9f3e6cad..992c346194e31c 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -160,15 +160,21 @@ Prerequisites .. code:: ipython3 + import sys from pathlib import Path import requests - if not Path("cmd_helper.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) + dynamicrafter_path = Path("dynamicrafter") + + if not dynamicrafter_path.exists(): + dynamicrafter_path.mkdir(parents=True, exist_ok=True) + !git clone https://github.com/Doubiiu/DynamiCrafter.git dynamicrafter + %cd dynamicrafter + !git checkout 26e665cd6c174234238d2ded661e2e56f875d360 -q # to avoid breaking changes + %cd .. + + sys.path.append(str(dynamicrafter_path)) r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", @@ -176,27 +182,24 @@ Prerequisites open("notebook_utils.py", "w").write(r.text) - - .. parsed-literal:: - 24624 - - - -.. code:: ipython3 - - from cmd_helper import clone_repo - - - clone_repo("https://github.com/Doubiiu/DynamiCrafter.git", "26e665cd6c174234238d2ded661e2e56f875d360") + Cloning into 'dynamicrafter'... + remote: Enumerating objects: 335, done. + remote: Counting objects: 100% (153/153), done. + remote: Compressing objects: 100% (99/99), done. + remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) + Receiving objects: 100% (335/335), 72.41 MiB | 20.85 MiB/s, done. + Resolving deltas: 100% (123/123), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images .. parsed-literal:: - PosixPath('DynamiCrafter') + 24692 @@ -218,7 +221,7 @@ We will use model for 256x256 resolution as example. Also, models for from huggingface_hub import hf_hub_download from omegaconf import OmegaConf - from utils.utils import instantiate_from_config + from dynamicrafter.utils.utils import instantiate_from_config def load_model_checkpoint(model, ckpt): @@ -263,7 +266,7 @@ We will use model for 256x256 resolution as example. Also, models for hf_hub_download(repo_id=REPO_ID, filename="model.ckpt", local_dir="./checkpoints/dynamicrafter_256_v1/", local_dir_use_symlinks=False) ckpt_path = "checkpoints/dynamicrafter_256_v1/model.ckpt" - config_file = "DynamiCrafter/configs/inference_256_v1.0.yaml" + config_file = "dynamicrafter/configs/inference_256_v1.0.yaml" config = OmegaConf.load(config_file) model_config = config.pop("model", OmegaConf.create()) model_config["params"]["unet_config"]["params"]["use_checkpoint"] = False @@ -279,25 +282,7 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - Note: switching to '26e665cd6c174234238d2ded661e2e56f875d360'. - - You are in 'detached HEAD' state. You can look around, make experimental - changes and commit them, and you can discard any commits you make in this - state without impacting any branches by switching back to a branch. - - If you want to create a new branch to retain commits you create, you may - do so (now or later) by using -c with the switch command. Example: - - git switch -c - - Or undo this operation with: - - git switch - - - Turn off this advice by setting config variable advice.detachedHead to false - - HEAD is now at 26e665c add dataset - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( @@ -315,12 +300,12 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) - 2024-12-09 23:43:19.000762: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-09 23:43:19.034903: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:23:38.980054: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:23:39.013901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-09 23:43:19.630734: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:23:39.616188: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -378,7 +363,7 @@ Convert CLIP text encoder .. code:: ipython3 - from lvdm.modules.encoders.condition import FrozenOpenCLIPEmbedder + from dynamicrafter.lvdm.modules.encoders.condition import FrozenOpenCLIPEmbedder MODEL_DIR = Path("models") @@ -439,43 +424,43 @@ resolutions. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. std = torch.as_tensor(std, device=data.device, dtype=data.dtype) @@ -504,7 +489,7 @@ Convert AE encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! w_ = w_ * (int(c)**(-0.5)) @@ -548,15 +533,15 @@ Convert Diffusion U-Net model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels @@ -931,7 +916,7 @@ Run OpenVINO pipeline inference .. code:: ipython3 - image_path = "DynamiCrafter/prompts/256/art.png" + image_path = "dynamicrafter/prompts/256/art.png" prompt = "man fishing in a boat at sunset" seed = 234 image = Image.open(image_path) @@ -943,14 +928,14 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_2173449/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_511478/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) .. parsed-literal:: - start: man fishing in a boat at sunset 2024-12-09 23:46:36 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.37 seconds + start: man fishing in a boat at sunset 2024-11-04 23:26:56 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 206.55 seconds .. code:: ipython3 @@ -1149,7 +1134,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: Error occurred: 403 Client Error: Forbidden for url: http://1.bp.blogspot.com/-c2pSbigvVm8/T9JqOXKIrsI/AAAAAAAACWs/ASXRA3Mbd0A/s1600/upsidedownnile.jpg - Error occurred: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) + Error occurred: 400 Client Error: Bad Request for url: https://media.gettyimages.com/photos/singer-benjamin-booker-appears-onstage-during-the-rachael-ray-sxsw-picture-id655166184?s=612x612 Error occurred: 400 Client Error: Bad Request for url: http://i2.wp.com/www.monsoonbreeze123.com/wp-content/uploads/2016/04/edited-5.jpg?resize=781%2C512 Error occurred: 403 Client Error: Forbidden for url: http://i.dailymail.co.uk/i/pix/2017/07/26/16/42B41FE900000578-4732576-It_seems_that_Emma_and_her_cat_have_an_extremely_close_bond_one_-a-50_1501083105178.jpg Error occurred: HTTPSConnectionPool(host='thewondrous.com', port=443): Max retries exceeded with url: /wp-content/uploads/2013/04/Egg-on-the-Head-of-Jack-Dog-600x799.jpg (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1131)'))) @@ -1362,7 +1347,7 @@ Let’s run the optimized pipeline %%skip not $to_quantize.value - image_path = "DynamiCrafter/prompts/256/art.png" + image_path = "dynamicrafter/prompts/256/art.png" prompt = "man fishing in a boat at sunset" seed = 234 image = Image.open(image_path) @@ -1379,8 +1364,8 @@ Let’s run the optimized pipeline .. parsed-literal:: - start: man fishing in a boat at sunset 2024-12-10 01:17:34 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.80 seconds + start: man fishing in a boat at sunset 2024-11-05 00:58:08 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 97.78 seconds .. code:: ipython3 @@ -1485,9 +1470,9 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 193.524 - INT8 latency: 97.073 - Performance speed up: 1.994 + FP32 latency: 201.526 + INT8 latency: 96.036 + Performance speed up: 2.098 Interactive inference diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index ce83a3675d1d8c..b50b82341f4af8 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -82,68 +82,49 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "openvino>=2024.5.0" "nncf>=2.14.0" - %pip install -q "torch>=2.2.0" "torchaudio>=2.2.0" "torchvision>=0.17.0" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q opencv-python "gradio>=4.13" "matplotlib>=3.4" tqdm - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" + %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" "matplotlib>=3.4" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: - ERROR: Could not find a version that satisfies the requirement openvino>=2024.5.0 (from versions: 2021.3.0, 2021.4.0, 2021.4.1, 2021.4.2, 2022.1.0, 2022.2.0, 2022.3.0, 2022.3.1, 2022.3.2, 2023.0.0.dev20230119, 2023.0.0.dev20230217, 2023.0.0.dev20230407, 2023.0.0.dev20230427, 2023.0.0, 2023.0.1, 2023.0.2, 2023.1.0.dev20230623, 2023.1.0.dev20230728, 2023.1.0.dev20230811, 2023.1.0, 2023.2.0.dev20230922, 2023.2.0, 2023.3.0, 2024.0.0, 2024.1.0, 2024.2.0, 2024.3.0, 2024.4.0, 2024.4.1.dev20240926) - ERROR: No matching distribution found for openvino>=2024.5.0 - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 - import requests from pathlib import Path + repo_dir = Path("EfficientSAM") - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - + if not repo_dir.exists(): + !git clone https://github.com/yformer/EfficientSAM.git + %cd $repo_dir .. parsed-literal:: - 1491 - + Cloning into 'EfficientSAM'... + remote: Enumerating objects: 424, done. + remote: Counting objects: 100% (85/85), done. + remote: Compressing objects: 100% (33/33), done. + remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) + Receiving objects: 100% (424/424), 262.14 MiB | 23.37 MiB/s, done. + Resolving deltas: 100% (246/246), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 - from cmd_helper import clone_repo - - - repo_dir = clone_repo("https://github.com/yformer/EfficientSAM.git") - - %cd $repo_dir - + import requests r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) + open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget # noqa: F401 - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM - - Load PyTorch model ------------------ @@ -214,13 +195,6 @@ build PyTorch model pt_model.eval(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:303: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - state_dict = torch.load(f, map_location="cpu") - - Run PyTorch model inference --------------------------- @@ -403,23 +377,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -666,10 +640,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-12-10 01:35:21.740526: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 01:35:21.772231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:15:40.935673: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:15:40.968460: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-10 01:35:22.412391: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:15:41.606156: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -836,7 +810,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.92 ms + [ INFO ] Read model took 30.24 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -856,7 +830,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1396.28 ms + [ INFO ] Compile model took 1388.43 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -897,17 +871,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 850.98 ms + [ INFO ] First inference took 798.46 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 49 iterations - [ INFO ] Duration: 16117.80 ms + [ INFO ] Duration: 16827.30 ms [ INFO ] Latency: - [ INFO ] Median: 1890.12 ms - [ INFO ] Average: 1899.68 ms - [ INFO ] Min: 1013.52 ms - [ INFO ] Max: 2315.56 ms - [ INFO ] Throughput: 3.04 FPS + [ INFO ] Median: 2025.54 ms + [ INFO ] Average: 1991.09 ms + [ INFO ] Min: 816.09 ms + [ INFO ] Max: 2176.67 ms + [ INFO ] Throughput: 2.91 FPS .. code:: ipython3 @@ -933,7 +907,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 43.16 ms + [ INFO ] Read model took 43.95 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -953,7 +927,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1639.65 ms + [ INFO ] Compile model took 1607.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -994,17 +968,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 586.73 ms + [ INFO ] First inference took 596.94 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 55 iterations - [ INFO ] Duration: 15880.08 ms + [ INFO ] Duration: 15959.69 ms [ INFO ] Latency: - [ INFO ] Median: 1710.19 ms - [ INFO ] Average: 1694.56 ms - [ INFO ] Min: 569.82 ms - [ INFO ] Max: 1827.81 ms - [ INFO ] Throughput: 3.46 FPS + [ INFO ] Median: 1701.74 ms + [ INFO ] Average: 1692.86 ms + [ INFO ] Min: 653.76 ms + [ INFO ] Max: 1817.85 ms + [ INFO ] Throughput: 3.45 FPS Interactive segmentation demo @@ -1334,7 +1308,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index ee488196e09a35..9f65fa9db4554a 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c724c8a2e1ea229d28fc4828d1e0f8e3709b56e66b4568cd5c300123a6b6990b -size 1259642 +oid sha256:9368b1fbd458d1e022a768f24e689af0fd6e5dacc98a920f45d3fc0f63062567 +size 1259373 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 25a70458403cd0..7c0716600906a1 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8086bb37d6a8400d681ce701a0ccd8aca10ef94cbb1d2fd387ae08f06e26342a -size 1262788 +oid sha256:22f0e5bfd74e7426218d2bd007f9219433556530ddb10f33b9706398eb7cd370 +size 1263404 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index cb5a9e6e89c825..0a717e2c9aa38d 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a18bb4842ab402d752631d693ed64876b58b8cd3cff35bbb3342ba67b35f2c30 -size 1260902 +oid sha256:d1863ccc9483f6cbd60768b311d104ee68692c3a7181e06da4bc751b52cf0ca1 +size 1262535 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 4d10def61a4a57..7f0e153ffa4a55 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -67,11 +67,13 @@ Install required dependencies: .. code:: ipython3 - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "openvino>=2023.3.0" "torch>=2.1" "torchaudio>=2.1" "encodec>=0.1.1" "gradio>=4.19" "librosa>=0.8.1" "matplotlib>=3.4" tqdm + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "openvino>=2023.3.0" "torch>=2.1" "torchaudio>=2.1" "encodec>=0.1.1" "gradio>=4.19" "librosa>=0.8.1" "matplotlib<=3.7" tqdm .. parsed-literal:: + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -140,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -300,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -400,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -426,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png index 9f01201bccd659..0aeedba5d00a83 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a031358d39936f6ccdb1e4e8c9eb8ddda651384ecf7d95fbe6c2dc1f7e65be95 +oid sha256:160e17b680bd3d5e8ae8d05736f6c8794af22597097cc8481d0986915fe9d696 size 44175 diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png index d157f39a8fc143..dfab67e44f9be0 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2800c74996f567b92758358b136cc2acab70b48ea628ac392e59cecc1c416a3 +oid sha256:aea9089d7a4630b53481b1277bbf8e7f52f1c992ed61d1e998250980f59df5ab size 44186 diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png index 93baa1aa5eeea6..a8af4e5b6153b9 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:491264f7b803244b0230b7a7bebee6b81da547541ccf928fbae1c9c0af719451 +oid sha256:2df9c2103837505ffcf5543e55a8d1589385ddb5e73b917d5efe9a6ebfd0368c size 44933 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output.rst b/docs/notebooks/explainable-ai-1-basic-with-output.rst index 4dd115d7983c64..1df31312fd752f 100644 --- a/docs/notebooks/explainable-ai-1-basic-with-output.rst +++ b/docs/notebooks/explainable-ai-1-basic-with-output.rst @@ -66,6 +66,8 @@ Guide =2024.2.0" opencv-python tqdm @@ -74,7 +76,10 @@ Guide =3.4" + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" Imports ------- diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst index c0722b01a9c9b4..4e2ad0970661d2 100644 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst @@ -116,7 +116,10 @@ Install requirements %pip install -q -U "numpy==1.*" %pip install -q scipy - %pip install -q "matplotlib>=3.4" + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" Imports ~~~~~~~ diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst index b26064fcf12e27..537ae36f6a331c 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst @@ -115,7 +115,10 @@ Install requirements %pip install -q -U "numpy==1.*" %pip install -q scipy - %pip install -q "matplotlib>=3.4" + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" Imports ~~~~~~~ diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 0071e2dca60e74..e0f20e0f79974b 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -85,6 +85,8 @@ Install requirements .. parsed-literal:: + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -156,9 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:03<00:00, 46.3MB/s] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/ultralytics/nn/tasks.py:732: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - ckpt = torch.load(file, map_location="cpu") + 100%|██████████| 138M/138M [00:02<00:00, 67.7MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 638.3ms - Speed: 3.4ms preprocess, 638.3ms inference, 500.4ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 728.3ms + Speed: 3.1ms preprocess, 728.3ms inference, 768.2ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -209,15 +209,15 @@ tracing. The FastSAM model itself is based on YOLOv8 model. .. parsed-literal:: - Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.5ms - Speed: 6.1ms preprocess, 498.5ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 504.9ms + Speed: 5.8ms preprocess, 504.9ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -521,11 +521,6 @@ repo <-with-output.html>`__. preset=nncf.QuantizationPreset.MIXED) -.. parsed-literal:: - - :7: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console) - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -620,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 68 seconds. - Resulting in 1.88 fps + Segmented in 69 seconds. + Resulting in 1.86 fps .. code:: ipython3 @@ -648,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 21 seconds - Resulting in 6.1 fps - That is 3.24 times faster! + Segmented in 22 seconds + Resulting in 5.82 fps + That is 3.14 times faster! Try out the converted pipeline diff --git a/docs/notebooks/film-slowmo-with-output.rst b/docs/notebooks/film-slowmo-with-output.rst index 33d915ff72c326..0f5c9c7ba8c0d6 100644 --- a/docs/notebooks/film-slowmo-with-output.rst +++ b/docs/notebooks/film-slowmo-with-output.rst @@ -79,6 +79,7 @@ Prerequisites .. code:: ipython3 + import platform import os %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 @@ -91,7 +92,10 @@ Prerequisites %pip install -q tensorflow_hub tf_keras numpy "opencv-python" tqdm "gradio>=4.19" Pillow "openvino>=2023.2.0" - %pip install -q "matplotlib>=3.4" + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" .. code:: ipython3 diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index 7ec9ce6e6557ca..e929a95fb182c1 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -100,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-12-10 01:48:13.363088: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 01:48:13.396921: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:28:54.034484: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:28:54.069316: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-10 01:48:14.055295: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:28:54.728430: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -199,43 +199,43 @@ pipeline. .. parsed-literal:: - .gitattributes: 0%| | 0.00/1.56k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False @@ -382,7 +382,7 @@ Run model inference -``OvFlorence2Model`` class defined in ``ov_florence2_helper.py`` +``OvFlorence@Model`` class defined in ``ov_florence2_helper.py`` provides convenient way for running model. It accepts directory with converted model and inference device as arguments. For running model we will use ``generate`` method. diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index 0ffc56ebd94d65..37d11a47fd30c9 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:552934f1e05cf6d598ce249bb662530388c1f3335dc2a6af6c304825c8aa023a -size 259656 +oid sha256:d85b3df68708172ed849a9e182bdec6a94f0174643833bd8cc7184ac0d090fae +size 259636 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index 69a935f4c4f78d..fe2ac780f5cca6 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -82,43 +82,44 @@ Install extra requirements Note: you may need to restart the kernel to use updated packages. +Check if FreeVC is installed and append its path to ``sys.path`` + .. code:: ipython3 - # Fetch `notebook_utils` module - import requests + from pathlib import Path + import sys - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + free_vc_repo = "FreeVC" + if not Path(free_vc_repo).exists(): + !git clone https://github.com/OlaWod/FreeVC.git - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - + sys.path.append(free_vc_repo) .. parsed-literal:: - 1491 - + Cloning into 'FreeVC'... + remote: Enumerating objects: 131, done. + remote: Counting objects: 100% (74/74), done. + remote: Compressing objects: 100% (47/47), done. + remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) + Receiving objects: 100% (131/131), 15.28 MiB | 17.50 MiB/s, done. + Resolving deltas: 100% (43/43), done. .. code:: ipython3 - from pathlib import Path - + # Fetch `notebook_utils` module + import requests import gdown - from cmd_helper import clone_repo - from notebook_utils import download_file, device_widget + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) - clone_repo("https://github.com/OlaWod/FreeVC.git") - + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, device_widget wavlm_large_dir_path = Path("FreeVC/wavlm") wavlm_large_path = wavlm_large_dir_path / "WavLM-Large.pt" @@ -133,8 +134,8 @@ Install extra requirements Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [01:03<00:00, 19.9MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:32<00:00, 38.5MB/s] .. code:: ipython3 @@ -153,7 +154,7 @@ Install extra requirements .. parsed-literal:: - freevc.pth: 0%| | 0.00/451M [00:00 - + Your browser does not support the audio element. diff --git a/docs/notebooks/glm-edge-v-with-output.rst b/docs/notebooks/glm-edge-v-with-output.rst deleted file mode 100644 index 2449d414d82594..00000000000000 --- a/docs/notebooks/glm-edge-v-with-output.rst +++ /dev/null @@ -1,516 +0,0 @@ -Visual-language assistant with GLM-Edge-V and OpenVINO ------------------------------------------------------- - -The -`GLM-Edge `__ -series is `Zhipu `__\ ’s attempt to meet -real-world deployment scenarios for edge devices. It consists of two -sizes of large language dialogue models and multimodal understanding -models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, -GLM-Edge-V-5B). Among them, the 1.5B / 2B models are mainly targeted at -platforms like mobile phones and car machines, while the 4B / 5B models -are aimed at platforms like PCs. Based on the technological advancements -of the GLM-4 series, some targeted adjustments have been made to the -model structure and size, balancing model performance, real-world -inference efficiency, and deployment convenience. Through deep -collaboration with partner enterprises and relentless efforts in -inference optimization, the GLM-Edge series models can run at extremely -high speeds on some edge platforms. - -In this tutorial we consider how to launch multimodal model GLM-Edge-V -using OpenVINO for creation multimodal chatbot. Additionally, we -optimize model to low precision using -`NNCF `__ - -**Table of contents:** - -- `Prerequisites <#prerequisites>`__ -- `Select Model <#select-model>`__ -- `Convert and Optimize model <#convert-and-optimize-model>`__ - - - `Compress model weights to - 4-bit <#compress-model-weights-to-4-bit>`__ - -- `Select inference device <#select-inference-device>`__ -- `Run OpenVINO model <#run-openvino-model>`__ -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -install required packages and setup helper functions. - -.. code:: ipython3 - - %pip install -q "torch>=2.1" "torchvision" "protobuf>=3.20" "gradio>=4.26" "Pillow" "accelerate" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.5.0" "nncf>=2.14.0" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - ERROR: Could not find a version that satisfies the requirement openvino>=2024.5.0 (from versions: 2021.3.0, 2021.4.0, 2021.4.1, 2021.4.2, 2022.1.0, 2022.2.0, 2022.3.0, 2022.3.1, 2022.3.2, 2023.0.0.dev20230119, 2023.0.0.dev20230217, 2023.0.0.dev20230407, 2023.0.0.dev20230427, 2023.0.0, 2023.0.1, 2023.0.2, 2023.1.0.dev20230623, 2023.1.0.dev20230728, 2023.1.0.dev20230811, 2023.1.0, 2023.2.0.dev20230922, 2023.2.0, 2023.3.0, 2024.0.0, 2024.1.0, 2024.2.0, 2024.3.0, 2024.4.0, 2024.4.1.dev20240926) - ERROR: No matching distribution found for openvino>=2024.5.0 - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - %pip install -q "git+https://github.com/huggingface/transformers" - - -.. parsed-literal:: - - error: subprocess-exited-with-error - - × Preparing metadata (pyproject.toml) did not run successfully. - │ exit code: 1 - ╰─> [6 lines of output] - - Cargo, the Rust package manager, is not installed or is not on PATH. - This package requires Rust and Cargo to compile extensions. Install it through - the system's package manager or via https://rustup.rs/ - - Checking for Rust toolchain.... - [end of output] - - note: This error originates from a subprocess, and is likely not a problem with pip. - error: metadata-generation-failed - - × Encountered error while generating package metadata. - ╰─> See above for output. - - note: This is an issue with the package mentioned above, not pip. - hint: See above for details. - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - import requests - from pathlib import Path - - if not Path("glmv_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/glm-edge-v/glmv_helper.py") - open("glmv_helper.py", "w").write(r.text) - - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/glm-edge-v/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - if not Path("notebook_utils.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") - open("notebook_utils.py", "w").write(r.text) - -Select Model ------------- - - - -The tutorial supports the following models from GLM-Edge-V model family: - -- `glm-edge-v-2b `__ -- `glm-edge-v-5b `__ - -You can select one from the provided options below. - -.. code:: ipython3 - - import ipywidgets as widgets - - # Select model - model_ids = [ - "THUDM/glm-edge-v-2b", - "THUDM/glm-edge-v-5b", - ] - - model_dropdown = widgets.Dropdown( - options=model_ids, - value=model_ids[0], - description="Model:", - disabled=False, - ) - - model_dropdown - - - - -.. parsed-literal:: - - Dropdown(description='Model:', options=('THUDM/glm-edge-v-2b', 'THUDM/glm-edge-v-5b'), value='THUDM/glm-edge-v… - - - -Convert and Optimize model --------------------------- - - - -GLM-Edge-V is PyTorch model. OpenVINO supports PyTorch models via -conversion to OpenVINO Intermediate Representation (IR). `OpenVINO model -conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original PyTorch model instance and example input for tracing and -returns ``ov.Model`` representing this model in OpenVINO framework. -Converted model can be used for saving on disk using ``ov.save_model`` -function or directly loading on device using ``core.complie_model``. - -The script ``glmv_helper.py`` contains helper function for model -conversion, please check its content if you interested in conversion -details. - -.. raw:: html - -
- -Click here for more detailed explanation of conversion steps GLM-Edge-V -is autoregressive transformer generative model, it means that each next -model step depends from model output from previous step. The generation -approach is based on the assumption that the probability distribution of -a word sequence can be decomposed into the product of conditional next -word distributions. In other words, model predicts the next token in the -loop guided by previously generated tokens until the stop-condition will -be not reached (generated sequence of maximum length or end of string -token obtained). The way the next token will be selected over predicted -probabilities is driven by the selected decoding methodology. You can -find more information about the most popular decoding methods in this -blog. The entry point for the generation process for models from the -Hugging Face Transformers library is the ``generate`` method. You can -find more information about its parameters and configuration in the -documentation. To preserve flexibility in the selection decoding -methodology, we will convert only model inference for one step. - -GLM-Edge-V model consists of 3 parts: - -- **Vision Model** for encoding input images into embedding space. -- **Embedding Model** for conversion input text tokens into embedding - space -- **Language Model** for generation answer based on input embeddings - provided by Image Encoder and Input Embedding models. - -.. raw:: html - -
- -Compress model weights to 4-bit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For reducing memory -consumption, weights compression optimization can be applied using -`NNCF `__. - -.. raw:: html - -
- -Click here for more details about weight compression Weight compression -aims to reduce the memory footprint of a model. It can also lead to -significant performance improvement for large memory-bound models, such -as Large Language Models (LLMs). LLMs and other models, which require -extensive memory to store the weights during inference, can benefit from -weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - -.. raw:: html - -
- -.. code:: ipython3 - - from pathlib import Path - import nncf - from glmv_helper import convert_glmv_model - - - model_id = model_dropdown.value - out_dir = Path("model") / Path(model_id).name / "INT4" - compression_configuration = { - "mode": nncf.CompressWeightsMode.INT4_SYM, - "group_size": 64, - "ratio": 0.6, - } - convert_glmv_model(model_id, out_dir, compression_configuration) - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - 2024-12-10 01:51:54.756921: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 01:51:54.790860: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-12-10 01:51:55.339388: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. parsed-literal:: - - ⌛ glm-edge-v-2b conversion started. Be patient, it may takes some time. - ⌛ Load Original model - ✅ Original model successfully loaded - ⌛ Convert Input embedding model - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - - -.. parsed-literal:: - - ✅ Input embedding model successfully converted - ⌛ Convert Image embedding model - - -.. parsed-literal:: - - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/siglip.py:48: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - grid_size = int(s**0.5) - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/siglip.py:53: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - image_emb = torch.cat([self.boi.repeat(len(image_emb), 1, 1), image_emb, self.eoi.repeat(len(image_emb), 1, 1)], dim=1) - - -.. parsed-literal:: - - ✅ Image embedding model successfully converted - ⌛ Convert Language model - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:458: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:995: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:153: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - rotary_dim = int(q.shape[-1] * partial_rotary_factor) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:249: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:168: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) - if a.grad is not None: - - -.. parsed-literal:: - - ✅ Language model successfully converted - ⌛ Weights compression with int4_sym mode started - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 45% (115 / 169) │ 40% (114 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 55% (54 / 169) │ 60% (54 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ Weights compression finished - ✅ glm-edge-v-2b model conversion finished. You can find results in model/glm-edge-v-2b/INT4 - - -Select inference device ------------------------ - - - -.. code:: ipython3 - - from notebook_utils import device_widget - - device = device_widget(default="AUTO", exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -Run OpenVINO model ------------------- - - - -``OvGLMv`` class provides convenient way for running model. It accepts -directory with converted model and inference device as arguments. For -running model we will use ``generate`` method. - -.. code:: ipython3 - - from glmv_helper import OvGLMv - - model = OvGLMv(out_dir, device.value) - -.. code:: ipython3 - - import requests - from PIL import Image - - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" - image = Image.open(requests.get(url, stream=True).raw) - - query = "Please describe this picture" - - print(f"Question:\n {query}") - image - - -.. parsed-literal:: - - Question: - Please describe this picture - - - - -.. image:: glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png - - - -.. code:: ipython3 - - from transformers import TextStreamer, AutoImageProcessor, AutoTokenizer - import torch - - messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": query}]}] - - processor = AutoImageProcessor.from_pretrained(out_dir, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(out_dir, trust_remote_code=True) - inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_dict=True, tokenize=True, return_tensors="pt").to("cpu") - generate_kwargs = { - **inputs, - "pixel_values": torch.tensor(processor(image).pixel_values).to("cpu"), - "max_new_tokens": 100, - "do_sample": True, - "top_k": 20, - "streamer": TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True), - } - - print("Answer:") - output = model.generate(**generate_kwargs) - - -.. parsed-literal:: - - Answer: - The image depicts a cat resting inside a cardboard box placed on a soft carpeted floor. The cat is lying with its head towards the bottom of the box, and its front paws are stretched out with the right one slightly forward, while its back and hind legs are positioned in the box. The box appears to be in partial disassembly, with the flaps folded down and one side raised slightly off the ground. The cat's fur is well-groomed and - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from gradio_helper import make_demo - - demo = make_demo(model, processor, tokenizer) - - try: - demo.launch(debug=False, height=600) - except Exception: - demo.launch(debug=False, share=True, height=600) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst index 5953608eae62e5..732cc297aa9531 100644 --- a/docs/notebooks/gpu-device-with-output.rst +++ b/docs/notebooks/gpu-device-with-output.rst @@ -330,7 +330,7 @@ categories of object. For details, see the ov_model_path = base_model_dir / model_name / f"{model_name}.xml" if not (ov_model_path).exists(): - hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir / model_name) + hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir) model = core.read_model(ov_model_path) @@ -541,7 +541,7 @@ with a latency focus: .. code:: ipython3 - !benchmark_app -m {ov_model_path} -d GPU -hint latency + !benchmark_app -m {model_path} -d GPU -hint latency .. parsed-literal:: @@ -622,7 +622,7 @@ CPU vs GPU with Latency Hint .. code:: ipython3 - !benchmark_app -m {ov_model_path} -d CPU -hint latency + !benchmark_app -m {model_path} -d CPU -hint latency .. parsed-literal:: @@ -1071,7 +1071,7 @@ Compile the Model .. code:: ipython3 # Read model and compile it on GPU in THROUGHPUT mode - model = core.read_model(model=ov_model_path) + model = core.read_model(model=model_path) device_name = "GPU" compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}) diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index 6449fb1a6a9507..232629422b14e0 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -64,7 +64,7 @@ Clone repositories and install requirements .. parsed-literal:: - WARNING: supervision 0.25.0 does not provide the extra 'desktop' + WARNING: supervision 0.24.0 does not provide the extra 'desktop' Note: you may need to restart the kernel to use updated packages. @@ -96,51 +96,46 @@ segmentation you can select vanilla ``SAM``. use_efficient_sam = sam_type_widget.value == "EfficientSAM" -.. code:: ipython3 - - import requests - - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - - - -.. parsed-literal:: - - 1491 - - - .. code:: ipython3 from pathlib import Path import sys import os - from cmd_helper import clone_repo - - repo_dir = Path("Grounded-Segment-Anything") ground_dino_dir = Path("GroundingDINO") efficient_sam_dir = Path("EfficientSAM") - # we use grounding dino from a fork which contains modifications that allow conversion to OpenVINO IR - clone_repo("https://github.com/wenyi5608/GroundingDINO.git") + # we use grounding dino from a fork which contains modifications that allow conversion to OpenVINO IR format + if not ground_dino_dir.exists(): + !git clone https://github.com/wenyi5608/GroundingDINO/ + if use_efficient_sam and not efficient_sam_dir.exists(): + !git clone https://github.com/yformer/EfficientSAM + if not use_efficient_sam and not repo_dir.exists(): + !git clone https://github.com/IDEA-Research/Grounded-Segment-Anything - if use_efficient_sam: - clone_repo("https://github.com/yformer/EfficientSAM.git") - if not use_efficient_sam: - clone_repo("https://github.com/IDEA-Research/Grounded-Segment-Anything.git", add_to_sys_path=False) - sys.path.append(repo_dir / "segment_anything") + # append to sys.path so that modules from the repo could be imported + sys.path.append(str(ground_dino_dir)) + sys.path.append(str("EfficientSAM" if use_efficient_sam else repo_dir / "segment_anything")) + + +.. parsed-literal:: + + Cloning into 'GroundingDINO'... + remote: Enumerating objects: 379, done. + remote: Counting objects: 100% (190/190), done. + remote: Compressing objects: 100% (79/79), done. + remote: Total 379 (delta 136), reused 111 (delta 111), pack-reused 189 (from 1) + Receiving objects: 100% (379/379), 14.03 MiB | 20.95 MiB/s, done. + Resolving deltas: 100% (194/194), done. + Cloning into 'EfficientSAM'... + remote: Enumerating objects: 424, done. + remote: Counting objects: 100% (85/85), done. + remote: Compressing objects: 100% (33/33), done. + remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) + Receiving objects: 100% (424/424), 262.14 MiB | 24.44 MiB/s, done. + Resolving deltas: 100% (246/246), done. + .. code:: ipython3 @@ -184,8 +179,14 @@ Download checkpoints and load PyTorch models .. code:: ipython3 - from notebook_utils import download_file, device_widget + import requests + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, device_widget download_file( "https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth", @@ -201,7 +202,7 @@ Download checkpoints and load PyTorch models .. parsed-literal:: - groundingdino_swint_ogc.pth: 0%| | 0.00/662M [00:00 + @@ -215,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png deleted file mode 100644 index 3677caabff4380..00000000000000 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76113c575caa9c8a8aca45d3ec6ebd7a4b513dadffd8e9e63861a7a041d7e5de -size 249032 diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png index 5023362b06be2d..12a0ec3dda0bf1 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96f0eb3a9535d57b8784be4b717dc9f280e4bf107e5b61d7cf51b36e142e4c7a +oid sha256:fb21264c96554435f8c9331a342b9c3a20d8129dc0725f6ff226d789779645be size 249032 diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png index fe6d042ef77d30..ec01c58bdf8be1 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:caef59a6c15a5a1d512f4dd22395b12fbd754bba264ea5f0deae323ff8edee39 +oid sha256:492235a08c36c9afbebabcb01c8325ac99dccff84174e7074ca321aba2ac7aac size 20550 diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png index 310b0d3545d48c..f8d59545b65f8c 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a3137d9359a44fb19e1900e6b808f9e7e7ded0ba209abe8c4bd90fcf37b1c6a +oid sha256:62376a2e159eca912bff4ce975d169f8ed71f9d9b75c4fd09937e7552120b14d size 260045 diff --git a/docs/notebooks/hello-world-with-output.rst b/docs/notebooks/hello-world-with-output.rst index 94d6dca5798876..5bd1216db29701 100644 --- a/docs/notebooks/hello-world-with-output.rst +++ b/docs/notebooks/hello-world-with-output.rst @@ -98,13 +98,13 @@ Download the Model and data samples .. parsed-literal:: - v3-small_224_1.0_float.xml: 0%| | 0.00/294k [00:00=4.33.0" "torch>=2.1.0" %pip install -q ipywidgets - %pip install -q "openvino>=2023.1.0" "Pillow" + %pip install -q "openvino>=2023.1.0" .. parsed-literal:: @@ -132,10 +132,10 @@ tutorials `__. from optimum.intel.openvino import OVModelForSequenceClassification + +.. parsed-literal:: + + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + Initialize and Convert the Model Automatically using OVModel class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -427,19 +436,17 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-12-10 01:57:20.152345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:12.161579: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] - [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] + [--weight-format {fp32,fp16,int8,int4,mxfp4}] [--library {transformers,diffusers,timm,sentence_transformers,open_clip}] [--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym] [--group-size GROUP_SIZE] - [--backup-precision {none,int8_sym,int8_asym}] [--dataset DATASET] [--all-layers] [--awq] [--scale-estimation] [--gptq] - [--lora-correction] [--sensitivity-metric SENSITIVITY_METRIC] [--num-samples NUM_SAMPLES] [--disable-stateful] @@ -460,20 +467,20 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['zero-shot-object-detection', 'multiple-choice', - 'audio-xvector', 'masked-im', 'text2text-generation', - 'inpainting', 'image-segmentation', 'semantic- - segmentation', 'question-answering', 'token- - classification', 'audio-frame-classification', - 'feature-extraction', 'text-to-audio', 'image-to- - image', 'fill-mask', 'automatic-speech-recognition', - 'image-classification', 'text-classification', 'zero- - shot-image-classification', 'object-detection', - 'image-to-text', 'audio-classification', 'sentence- - similarity', 'depth-estimation', 'text-to-image', - 'mask-generation', 'text-generation']. For decoder - models, use `xxx-with-past` to export the model using - past key values in the decoder. + ['mask-generation', 'image-classification', 'fill- + mask', 'audio-xvector', 'audio-frame-classification', + 'sentence-similarity', 'multiple-choice', 'automatic- + speech-recognition', 'text-to-image', 'token- + classification', 'image-to-text', 'image- + segmentation', 'question-answering', 'depth- + estimation', 'semantic-segmentation', 'feature- + extraction', 'text-generation', 'zero-shot-object- + detection', 'text-to-audio', 'zero-shot-image- + classification', 'object-detection', 'text2text- + generation', 'audio-classification', 'image-to-image', + 'masked-im', 'inpainting', 'text-classification']. For + decoder models, use `xxx-with-past` to export the + model using past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment. @@ -482,7 +489,7 @@ Full list of supported arguments available via ``--help`` for repositories you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the model repository. - --weight-format {fp32,fp16,int8,int4,mxfp4,nf4} + --weight-format {fp32,fp16,int8,int4,mxfp4} The weight format of the exported model. --library {transformers,diffusers,timm,sentence_transformers,open_clip} The library used to load the model before export. If @@ -507,27 +514,12 @@ Full list of supported arguments available via ``--help`` --group-size GROUP_SIZE The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization. - --backup-precision {none,int8_sym,int8_asym} - Defines a backup precision for mixed-precision weight - compression. Only valid for int4 weight format. If not - provided, backup precision is int8_asym. 'none' stands - for original floating-point precision of the model - weights, in this case weights are retained in their - original precision without any quantization. - 'int8_sym' stands for 8-bit integer symmetric - quantization without zero point. 'int8_asym' stands - for 8-bit integer asymmetric quantization with zero - points per each quantization group. --dataset DATASET The dataset used for data-aware compression or - quantization with NNCF. For language models you can - use the one from the list - ['auto','wikitext2','c4','c4-new']. With 'auto' the - dataset will be collected from model's generations. - For diffusion models it should be on of - ['conceptual_captions','laion/220k-GPT4Vision- - captions-from-LIVIS','laion/filtered-wit']. For visual - language models the dataset must be set to - 'contextual'. + quantization with NNCF. You can use the one from the + list ['wikitext2','c4','c4-new'] for language models + or ['conceptual_captions','laion/220k-GPT4Vision- + captions-from-LIVIS','laion/filtered-wit'] for + diffusion models. --all-layers Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight compression is applied, they are compressed to INT8. @@ -535,7 +527,7 @@ Full list of supported arguments available via ``--help`` generation quality of INT4-compressed LLMs, but requires additional time for tuning weights on a calibration dataset. To run AWQ, please also provide a - dataset argument. Note: it is possible that there will + dataset argument. Note: it's possible that there will be no matching patterns in the model to apply AWQ, in such case it will be skipped. --scale-estimation Indicates whether to apply a scale estimation @@ -549,15 +541,9 @@ Full list of supported arguments available via ``--help`` to minimize the difference between activations of a compressed and original layer. Please note, that applying GPTQ takes additional memory and time. - --lora-correction Indicates whether to apply LoRA Correction algorithm. - When enabled, this algorithm introduces low-rank - adaptation layers in the model that can recover - accuracy after weight compression at some cost of - inference latency. Please note, that applying LoRA - Correction algorithm takes additional memory and time. --sensitivity-metric SENSITIVITY_METRIC The sensitivity metric for assigning quantization - precision to layers. It can be one of the following: + precision to layers. Can be one of the following: ['weight_quantization_error', 'hessian_input_activation', 'mean_activation_variance', 'max_activation_variance', @@ -575,7 +561,7 @@ Full list of supported arguments available via ``--help`` performance. Use it when you intentionally want to use a stateless model, for example, to be compatible with existing OpenVINO native inference code that expects - KV-cache inputs and outputs in the model. + kv-cache inputs and outputs in the model. --disable-convert-tokenizer Do not add converted tokenizer and detokenizer OpenVINO models. @@ -599,7 +585,7 @@ compression: .. parsed-literal:: - 2024-12-10 01:57:25.755800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:17.680673: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). @@ -650,8 +636,9 @@ OpenVINO `__ 3. `Stable Diffusion v2.1 using Optimum-Intel OpenVINO `__ 4. `Image generation with Stable Diffusion -XL `__ 5. `Create LLM-powered Chatbot using -OpenVINO `__ 6. `Document Visual Question Answering -Using Pix2Struct and OpenVINO `__ 7. `Automatic -speech recognition using Distil-Whisper and -OpenVINO `__ +XL `__ 5. `Instruction following using +Databricks Dolly 2.0 `__ 6. `Create +LLM-powered Chatbot using OpenVINO `__ 7. `Document +Visual Question Answering Using Pix2Struct and +OpenVINO `__ 8. `Automatic speech recognition +using Distil-Whisper and OpenVINO `__ diff --git a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst index 61c412fe6f5e62..b3fea075421bb3 100644 --- a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst +++ b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst @@ -36,7 +36,6 @@ using OpenVINO. Additionally, we will use `NNCF `__ for optimizing model in low precision. - **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -82,21 +81,17 @@ Prerequisites .. code:: ipython3 from pathlib import Path - import requests + import sys + repo_dir = Path("HunyuanDiT") - if not Path("cmd_helper.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - -.. code:: ipython3 + if not repo_dir.exists(): + !git clone https://github.com/tencent/HunyuanDiT + %cd HunyuanDiT + !git checkout ebfb7936490287616c38519f87084a34a1d75362 + %cd .. - from cmd_helper import clone_repo - - - repo_dir = clone_repo("https://github.com/tencent/HunyuanDiT", "ebfb7936490287616c38519f87084a34a1d75362") + sys.path.append(str(repo_dir)) Download PyTorch model ---------------------- @@ -758,6 +753,8 @@ Please select inference device using dropdown widget: .. code:: ipython3 + import requests + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -884,7 +881,7 @@ Please select inference device using dropdown widget: -.. image:: hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_31_0.png +.. image:: hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_30_0.png diff --git a/docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_31_0.jpg b/docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_30_0.jpg similarity index 100% rename from docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_31_0.jpg rename to docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_30_0.jpg diff --git a/docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_31_0.png b/docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_30_0.png similarity index 100% rename from docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_31_0.png rename to docs/notebooks/hunyuan-dit-image-generation-with-output_files/hunyuan-dit-image-generation-with-output_30_0.png diff --git a/docs/notebooks/image-bind-with-output.rst b/docs/notebooks/image-bind-with-output.rst new file mode 100644 index 00000000000000..1e8ecd63c1de0e --- /dev/null +++ b/docs/notebooks/image-bind-with-output.rst @@ -0,0 +1,1027 @@ +Binding multimodal data using ImageBind and OpenVINO +==================================================== + +Exploring the surrounding world, people get information using multiple +senses, for example, seeing a busy street and hearing the sounds of car +engines. ImageBind introduces an approach that brings machines one step +closer to humans’ ability to learn simultaneously, holistically, and +directly from many different forms of information. +`ImageBind `__ is the +first AI model capable of binding data from six modalities at once, +without the need for explicit supervision (the process of organizing and +labeling raw data). By recognizing the relationships between these +modalities — images and video, audio, text, depth, thermal, and inertial +measurement units (IMUs) — this breakthrough helps advance AI by +enabling machines to better analyze many different forms of information, +together. + +.. figure:: https://user-images.githubusercontent.com/8495451/236859695-ffa13364-3e39-4d99-a8da-fbfab17f9a6b.gif + :alt: ImageBind + + ImageBind + +In this tutorial, we consider how to convert and run ImageBind model +using OpenVINO. + +The tutorial consists of following steps: + +1. Download the pre-trained model. +2. Prepare input data examples. +3. Convert the model to OpenVINO Intermediate Representation format + (IR). +4. Run model inference and analyze results. + +About ImageBind +--------------- + +ImageBind, released in May 2023 by Meta Research, is an embedding model +that combines data from six modalities: images and video, text, audio, +thermal imaging, depth, and IMUs, which contain sensors including +accelerometers and orientation monitors. Using ImageBind, you can +provide data in one modality – for example, audio – and find related +documents in different modalities, such as video or images. + +ImageBind was trained with pairs of data. Each pair mapped image data – +including videos – to another modality, and the combined data was used +to train an embedding model. ImageBind found that features for different +modalities could be learned using the image data used in their training. +A notable conclusion from ImageBind is that pairing images with another +modality, then combining the results in the same embedding space is +sufficient to create a multi-modal embedding model. More details about +the model can be found in the model +`repository `__, +`paper `__, and Meta AI `blog +post `__. + +Like all embedding models, there are many potential use cases for +ImageBind, among them information retrieval, zero-shot classification, +and usage created by ImageBind representation as input for downstream +tasks (e.g. image generation). Some of the potential use-cases +represented on the image below: + +.. figure:: https://user-images.githubusercontent.com/29454499/256303836-c8e7b311-0b7b-407c-8610-fd8a803e4197.png + :alt: usecases + + usecases + +In this tutorial, we consider how to use ImageBind for multimodal +zero-shot classification. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Instantiate PyTorch model <#instantiate-pytorch-model>`__ +- `Prepare input data <#prepare-input-data>`__ +- `Convert Model to OpenVINO Intermediate Representation (IR) + format <#convert-model-to-openvino-intermediate-representation-ir-format>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Zero-shot classification using ImageBind and + OpenVINO <#zero-shot-classification-using-imagebind-and-openvino>`__ + + - `Text-Image classification <#text-image-classification>`__ + - `Text-Audio classification <#text-audio-classification>`__ + - `Image-Audio classification <#image-audio-classification>`__ + +- `Post-Training Quantization of ImageBind model with + NNCF <#post-training-quantization-of-imagebind-model-with-nncf>`__ + + - `Prepare datasets <#prepare-datasets>`__ + - `Apply quantization <#apply-quantization>`__ + + - `Quantize ImageBind model for vision + modality. <#quantize-imagebind-model-for-vision-modality->`__ + - `Quantize ImageBind model for text + modality <#quantize-imagebind-model-for-text-modality>`__ + - `Quantize ImageBind model for audio + modality <#quantize-imagebind-model-for-audio-modality>`__ + + - `Compare results for the OpenVINO FP16 model and the quantized + model <#compare-results-for-the-openvino-fp16-model-and-the-quantized-model>`__ + + - `Select inference device <#select-inference-device>`__ + + - `Compare File Size <#compare-file-size>`__ + - `Compare inference time of the FP16 IR and quantized + models <#compare-inference-time-of-the-fp16-ir-and-quantized-models>`__ + + - `Vision model <#vision-model>`__ + - `Text model <#text-model>`__ + - `Audio model <#audio-model>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import platform + + %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q datasets regex librosa soundfile pytorchvideo ftfy "timm>=0.6.7" einops fvcore "openvino>=2024.0.0" "nncf>=2.9.0" numpy scipy --extra-index-url https://download.pytorch.org/whl/cpu + + + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" + else: + %pip install -q "matplotlib>=3.4,<3.7" + +.. code:: ipython3 + + from pathlib import Path + + repo_dir = Path("ImageBind") + + if not repo_dir.exists(): + !git clone https://github.com/facebookresearch/ImageBind.git + + %cd {repo_dir} + +Instantiate PyTorch model +------------------------- + + + +To start work with the model, we should instantiate the PyTorch model +class. ``imagebind_model.imagebind_huge(pretrained=True)`` downloads +model weights and creates a PyTorch model object for ImageBind. +Currently, there is only one ImageBind model available for downloading, +``imagebind_huge``, more details about it can be found in `model +card `__. + + Please note, depending on internet connection speed, the model + downloading process can take some time. It also requires at least 5 + GB of free space on disk for saving model checkpoint. + +.. code:: ipython3 + + import imagebind.data as data + import torch + from imagebind.models import imagebind_model + from imagebind.models.imagebind_model import ModalityType + + # Instantiate model + model = imagebind_model.imagebind_huge(pretrained=True) + model.eval(); + +Prepare input data +------------------ + + + +ImageBind works with data across 6 different modalities. Each of them +requires its steps for preprocessing. ``data`` module is responsible for +data reading and preprocessing for each modality. + +- ``data.load_and_transform_text`` accepts a list of text labels and + tokenizes them. +- ``data.load_and_transform_vision_data`` accepts paths to input + images, reads them, resizes to save aspect ratio with smaller side + size 224, performs center crop, and normalizes data into [0, 1] + floating point range. +- ``data.load_and_transofrm_audio_data`` reads audio files from + provided paths, splits it on samples, and computes + `mel `__ + spectrogram. + +.. code:: ipython3 + + # Prepare inputs + + text_list = ["A car", "A bird", "A dog"] + image_paths = [ + ".assets/dog_image.jpg", + ".assets/car_image.jpg", + ".assets/bird_image.jpg", + ] + audio_paths = [ + ".assets/dog_audio.wav", + ".assets/bird_audio.wav", + ".assets/car_audio.wav", + ] + + inputs = { + ModalityType.TEXT: data.load_and_transform_text(text_list, "cpu"), + ModalityType.VISION: data.load_and_transform_vision_data(image_paths, "cpu"), + ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, "cpu"), + } + +Convert Model to OpenVINO Intermediate Representation (IR) format +----------------------------------------------------------------- + + + +OpenVINO supports PyTorch through Model Conversion API. You will use +`model conversion Python +API `__ +to convert model to IR format. The ``ov.convert_model`` function returns +OpenVINO Model class instance ready to load on a device or save on a +disk for next loading using ``ov.save_model``. + +ImageBind accepts data that represents different modalities +simultaneously in any combinations, however, their processing is +independent of each other. For avoiding losing flexibility passing data, +we will export each modality encoder as an independent model. The code +below defines wrappers for the model to get only single-modality +embeddings. + +.. code:: ipython3 + + class ModelExporter(torch.nn.Module): + def __init__(self, model, modality): + super().__init__() + self.model = model + self.modality = modality + + def forward(self, data): + return self.model({self.modality: data}) + +.. code:: ipython3 + + import openvino as ov + + core = ov.Core() + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + import openvino as ov + + core = ov.Core() + + ov_modality_models = {} + + modalities = [ModalityType.TEXT, ModalityType.VISION, ModalityType.AUDIO] + for modality in modalities: + export_dir = Path(f"image-bind-{modality}") + file_name = f"image-bind-{modality}" + export_dir.mkdir(exist_ok=True) + ir_path = export_dir / f"{file_name}.xml" + if not ir_path.exists(): + exportable_model = ModelExporter(model, modality) + model_input = inputs[modality] + ov_model = ov.convert_model(exportable_model, example_input=model_input) + ov.save_model(ov_model, ir_path) + else: + ov_model = core.read_model(ir_path) + ov_modality_models[modality] = core.compile_model(ov_model, device.value) + +Zero-shot classification using ImageBind and OpenVINO +----------------------------------------------------- + + + +In zero-shot classification, a piece of data is embedded and fed to the +model to retrieve a label that corresponds with the contents of the +data. In the case of ImageBind, you can classify audio, images, and +information in the other supported modalities. We already discussed how +to perform zero-shot image classification using the CLIP model (please +check this +`notebook `__ +for details), capabilities of ImageBind for this task wider, because it +allows using any combinations of supported modalities for +classification. + +To perform zero-shot classification using ImageBind we should perform +the following steps: + +1. Preprocess data batch for requested modalities (one modality in our + case treated as a data source, other - as a label). +2. Calculate embeddings for each modality. +3. Find dot-product between embeddings vectors to get probabilities + matrix. +4. Obtain the label with the highest probability for mapping the source + into label space. + +We already preprocessed data in previous step, now, we should run model +inference for getting embeddings. + +.. code:: ipython3 + + embeddings = {} + for modality in modalities: + embeddings[modality] = ov_modality_models[modality](inputs[modality])[ov_modality_models[modality].output(0)] + +The probability matrix shows the correspondence between source +embeddings and label embeddings, it is a 2D matrix, where x-dimension +represents label-modality data and y-dimension - source-modality data. +It can be calculated as a dot-product between embeddings vectors and +normalized into the [0,1] range using softmax. Then a higher score on +the intersection between x and y labels, then higher confidence that +they represent the same object. + +.. code:: ipython3 + + import matplotlib.pyplot as plt + import numpy as np + from scipy.special import softmax + + + def visualize_prob_matrix(matrix, x_label, y_label): + fig, ax = plt.subplots() + ax.matshow(matrix, cmap="winter") + + for (i, j), z in np.ndenumerate(matrix): + ax.text(j, i, "{:0.3f}".format(z), ha="center", va="center") + ax.set_xticks(range(len(x_label)), x_label) + ax.set_yticks(range(len(y_label)), y_label) + + + image_list = [img.split("/")[-1] for img in image_paths] + audio_list = [audio.split("/")[-1] for audio in audio_paths] + +Text-Image classification +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + text_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, axis=-1) + + visualize_prob_matrix(text_vision_scores, text_list, image_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_20_0.png + + +Text-Audio classification +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + text_audio_scores = softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, axis=-1) + + visualize_prob_matrix(text_audio_scores, text_list, audio_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_22_0.png + + +Image-Audio classification +~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + audio_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, axis=-1) + + visualize_prob_matrix(audio_vision_scores, image_list, audio_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_24_0.png + + +Putting all together, we can match text, image, and sound for our data. + +.. code:: ipython3 + + import IPython.display as ipd + from PIL import Image + + text_image_ids = np.argmax(text_vision_scores, axis=0) + text_audio_ids = np.argmax(text_audio_scores, axis=0) + print( + f"Predicted label: {text_list[0]} \nprobability for image - {text_vision_scores[text_image_ids[0], 0]:.3f}\nprobability for audio - {text_audio_scores[0, text_audio_ids[0]]:.3f}" + ) + display(Image.open(image_paths[text_image_ids[0]])) + ipd.Audio(audio_paths[text_audio_ids[0]]) + + +.. parsed-literal:: + + Predicted label: A car + probability for image - 1.000 + probability for audio - 1.000 + + + +.. image:: image-bind-with-output_files/image-bind-with-output_26_1.png + + + + +.. raw:: html + + + + + + + +.. code:: ipython3 + + print( + f"Predicted label: {text_list[1]} \nprobability for image - {text_vision_scores[text_image_ids[1], 1]:.3f}\nprobability for audio - {text_audio_scores[1, text_audio_ids[1]]:.3f}" + ) + display(Image.open(image_paths[text_image_ids[1]])) + ipd.Audio(audio_paths[text_audio_ids[1]]) + + +.. parsed-literal:: + + Predicted label: A bird + probability for image - 0.986 + probability for audio - 1.000 + + + +.. image:: image-bind-with-output_files/image-bind-with-output_27_1.png + + + + +.. raw:: html + + + + + + + +.. code:: ipython3 + + print( + f"Predicted label: {text_list[2]} \nprobability for image - {text_vision_scores[text_image_ids[2], 2]:.3f}\nprobability for audio - {text_audio_scores[2, text_audio_ids[2]]:.3f}" + ) + display(Image.open(image_paths[text_image_ids[2]])) + ipd.Audio(audio_paths[text_audio_ids[2]]) + + +.. parsed-literal:: + + Predicted label: A dog + probability for image - 0.984 + probability for audio - 1.000 + + + +.. image:: image-bind-with-output_files/image-bind-with-output_28_1.png + + + + +.. raw:: html + + + + + + + +Post-Training Quantization of ImageBind model with NNCF +------------------------------------------------------- + + + +The goal of this part of tutorial is to demonstrate how to speed up the +model by applying 8-bit post-training quantization from +`NNCF `__ (Neural Network +Compression Framework) and infer quantized model via OpenVINO™ Toolkit. + +The optimization process contains the following steps: 1. Prepare +quantization dataset 2. Quantize OpenVINO model with NNCF. 3. Compare +probability matrices between converted and quantized models on input +data examples. 4. Compare model size of converted and quantized models. +5. Compare performance of converted and quantized models. + +.. code:: ipython3 + + modalities = [ModalityType.TEXT, ModalityType.VISION, ModalityType.AUDIO] + fp_model_paths = {modality: Path(f"image-bind-{modality}") / f"image-bind-{modality}.xml" for modality in modalities} + int8_model_paths = {modality: Path(f"image-bind-{modality}") / f"image-bind-{modality}_int8.xml" for modality in modalities} + +Prepare datasets +~~~~~~~~~~~~~~~~ + + + +The `Conceptual +Captions `__ dataset +consisting of ~3.3M images annotated with captions. Dataset is used to +quantize image and text models. + +.. code:: ipython3 + + import imagebind.data as data + import os + import requests + import tempfile + + from requests.packages.urllib3.exceptions import InsecureRequestWarning + + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + + + def check_text_data(data): + """ + Check if the given data is text-based. + """ + if isinstance(data, str): + return True + if isinstance(data, list): + return all(isinstance(x, str) for x in data) + return False + + + def collate_fn(examples, image_column="image_url", text_column="caption"): + """ + Collates examples into a batch for processing. + Preprocesses each example by loading and transforming image and text data. + Checks if the text data in the example is valid by calling the `check_text_data` function. + Downloads the image specified by the URL in the image_column of the example dictionary. + Constructs and returns a dictionary representing the collated batch with the following keys: + - "pixel_values": The pixel values of the preprocessed example. + - "input_ids": The transformed text data of the preprocessed example. + """ + assert len(examples) == 1 + example = examples[0] + if not check_text_data(example[text_column]): + raise ValueError("Text data is not valid") + + url = example[image_column] + with tempfile.TemporaryDirectory() as tempdir: + f_name = os.path.join(tempdir, "image.jpg") + try: + response = requests.get(url, verify=False, timeout=20) + with open(f_name, "wb") as file: + file.write(response.content) + pixel_values = data.load_and_transform_vision_data([f_name], "cpu") + except Exception: + print(f"Can't load image from url: {url}") + return None + + text = data.load_and_transform_text([example[text_column]], "cpu") + + return {"pixel_values": pixel_values, "input_ids": text} + +.. code:: ipython3 + + from datasets import load_dataset + import itertools + import torch + from tqdm.notebook import tqdm + + + def collect_vision_text_data(dataloader, init_steps): + """ + This function collects vision and text data from a dataloader for a specified number of initialization steps. + It iterates over the dataloader, fetching batches and storing the relevant vision and text data. + Returns a tuple containing the collected vision_data and text_data lists. + """ + text_data = [] + vision_data = [] + print(f"Fetching {init_steps} for the initialization...") + counter = 0 + for batch in tqdm(dataloader): + if counter == init_steps: + break + with torch.no_grad(): + if batch: + counter += 1 + text_data.append(batch["input_ids"].to("cpu")) + vision_data.append(batch["pixel_values"].to("cpu")) + return vision_data, text_data + + + def prepare_vision_text_dataset(opt_init_steps=50): + """ + Prepares a vision-text dataset for quantization by collecting vision and text data. + """ + dataset = load_dataset("google-research-datasets/conceptual_captions", streaming=False, trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=0) + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + vision_data, text_data = collect_vision_text_data(dataloader, opt_init_steps) + return vision_data, text_data + +The `ESC-50 `__ dataset is +used to quantize the audio modality of the ImageBind model. Dataset is a +labeled collection of 2000 environmental audio recordings suitable for +benchmarking methods of environmental sound classification. The dataset +consists of 5-second-long recordings organized into 50 semantic classes. + +.. code:: ipython3 + + import numpy as np + import torchaudio + + + def collect_audio_data(dataloader, init_steps=300): + """ + This function collects audio data from a dataloader for a specified number of initialization steps. + It iterates over the dataloader, fetching batches and storing them in a list. + """ + audio_data = [] + for _, batch in tqdm(zip(range(init_steps), itertools.islice(dataloader, 0, init_steps))): + with torch.no_grad(): + audio_data.append(batch) + return audio_data + + + def prepare_audio_dataset(): + """ + Prepares an "ashraq/esc50" audio dataset for quantization by collecting audio data. + Collects audio data from the dataloader by calling the `collect_audio_data` function. + Returns a list containing the collected calibration audio data batches. + """ + audio_dataset = load_dataset("ashraq/esc50", streaming=True, trust_remote_code=True) + train_dataset = audio_dataset["train"].shuffle(seed=42, buffer_size=1000) + + def collate_fn(examples): + assert len(examples) == 1 + with tempfile.TemporaryDirectory() as tempdir: + f_name = os.path.join(tempdir, "audio.wav") + audio_data = examples[0]["audio"]["array"] + sample_rate = examples[0]["audio"]["sampling_rate"] + audio_data = torch.from_numpy(audio_data).to(torch.float32).unsqueeze(0) + torchaudio.save(f_name, audio_data, sample_rate) + return data.load_and_transform_audio_data([f_name], "cpu") + + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + calibration_data = collect_audio_data(dataloader) + return calibration_data + +.. code:: ipython3 + + vision_data, text_data = [], [] + + if not int8_model_paths[ModalityType.TEXT].exists() or not int8_model_paths[ModalityType.VISION].exists(): + vision_data, text_data = prepare_vision_text_dataset() + +Apply quantization +~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + import logging + import nncf + import openvino as ov + + nncf.set_log_level(logging.ERROR) + + core = ov.Core() + + + def quantize_openvino_model(modality, calibration_data): + model_path = fp_model_paths[modality] + model = core.read_model(model_path) + quantized_model = nncf.quantize( + model=model, + calibration_dataset=calibration_data, + model_type=nncf.ModelType.TRANSFORMER, + ) + ov.save_model(quantized_model, int8_model_paths[modality]) + return quantized_model + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +Quantize ImageBind model for vision modality. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take a long time. + +.. code:: ipython3 + + if not int8_model_paths[ModalityType.VISION].exists(): + if len(vision_data) == 0: + raise RuntimeError("Calibration dataset is empty. Please check internet connection and try to download images manually from the URLs above.") + + vision_dataset = nncf.Dataset(vision_data) + vision_quantized_model = quantize_openvino_model(modality=ModalityType.VISION, calibration_data=vision_dataset) + +Quantize ImageBind model for text modality +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + + +.. code:: ipython3 + + if not int8_model_paths[ModalityType.TEXT].exists(): + text_dataset = nncf.Dataset(text_data) + text_quantized_model = quantize_openvino_model(modality=ModalityType.TEXT, calibration_data=text_dataset) + +Quantize ImageBind model for audio modality +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + + +.. code:: ipython3 + + if not int8_model_paths[ModalityType.AUDIO].exists(): + audio_calibration_data = prepare_audio_dataset() + audio_dataset = nncf.Dataset(audio_calibration_data) + audio_quantized_model = quantize_openvino_model(modality=ModalityType.AUDIO, calibration_data=audio_dataset) + +NNCF also supports quantization-aware training, and other algorithms +than quantization. See the `NNCF +documentation `__ +in the NNCF repository for more information. + +Compare results for the OpenVINO FP16 model and the quantized model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Compare the probability matrices for ``FP16`` and ``INT8`` models. + +.. code:: ipython3 + + # Prepare inputs + + text_list = ["A car", "A bird", "A dog"] + image_paths = [ + ".assets/dog_image.jpg", + ".assets/car_image.jpg", + ".assets/bird_image.jpg", + ] + audio_paths = [ + ".assets/dog_audio.wav", + ".assets/bird_audio.wav", + ".assets/car_audio.wav", + ] + + inputs = { + ModalityType.TEXT: data.load_and_transform_text(text_list, "cpu"), + ModalityType.VISION: data.load_and_transform_vision_data(image_paths, "cpu"), + ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, "cpu"), + } + +Select inference device +^^^^^^^^^^^^^^^^^^^^^^^ + + + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + embeddings = {} + for modality in modalities: + ov_model = core.compile_model(fp_model_paths[modality], device.value) + embeddings[modality] = ov_model(inputs[modality])[0] + + quantized_embeddings = {} + for modality in modalities: + model = core.compile_model(int8_model_paths[modality], device.value) + quantized_embeddings[modality] = model(inputs[modality])[0] + +.. code:: ipython3 + + def visualize_prob_matrices(fp_matrix, int_matrix, x_label, y_label): + fig, ax = plt.subplots(1, 2) + for i, matrix in enumerate([fp_matrix, int_matrix]): + ax[i].matshow(matrix, cmap="winter") + + for (k, j), z in np.ndenumerate(matrix): + ax[i].title.set_text("FP16 probs" if i == 0 else "INT8 probs") + ax[i].text(j, k, "{:0.3f}".format(z), ha="center", va="center") + ax[i].set_xticks(range(len(x_label)), x_label) + ax[i].set_yticks(range(len(y_label)), y_label) + fig.tight_layout() + + + image_list = [img.split("/")[-1] for img in image_paths] + audio_list = [audio.split("/")[-1] for audio in audio_paths] + +.. code:: ipython3 + + fp_text_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, axis=-1) + int_text_vision_scores = softmax( + quantized_embeddings[ModalityType.VISION] @ quantized_embeddings[ModalityType.TEXT].T, + axis=-1, + ) + + visualize_prob_matrices(fp_text_vision_scores, int_text_vision_scores, text_list, image_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_52_0.png + + +.. code:: ipython3 + + fp_text_audio_scores = softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, axis=-1) + int_text_audio_scores = softmax( + quantized_embeddings[ModalityType.AUDIO] @ quantized_embeddings[ModalityType.TEXT].T, + axis=-1, + ) + + visualize_prob_matrices(fp_text_audio_scores, int_text_audio_scores, text_list, image_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_53_0.png + + +.. code:: ipython3 + + fp_audio_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, axis=-1) + int_audio_vision_scores = softmax( + quantized_embeddings[ModalityType.VISION] @ quantized_embeddings[ModalityType.AUDIO].T, + axis=-1, + ) + + visualize_prob_matrices(fp_audio_vision_scores, int_audio_vision_scores, text_list, image_list) + + + +.. image:: image-bind-with-output_files/image-bind-with-output_54_0.png + + +Compare File Size +~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + def calculate_compression_rate(modality): + fp16_ir_model_size = Path(fp_model_paths[modality]).with_suffix(".bin").stat().st_size / 1024 + quantized_model_size = Path(int8_model_paths[modality]).with_suffix(".bin").stat().st_size / 1024 + print(f"Modality: {modality}") + print(f" * FP16 IR model size: {fp16_ir_model_size:.2f} KB") + print(f" * INT8 model size: {quantized_model_size:.2f} KB") + print(f" * Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") + + + for modality in modalities: + calculate_compression_rate(modality) + + +.. parsed-literal:: + + Modality: text + * FP16 IR model size: 691481.69 KB + * INT8 model size: 347006.66 KB + * Model compression rate: 1.993 + Modality: vision + * FP16 IR model size: 1235995.15 KB + * INT8 model size: 620132.79 KB + * Model compression rate: 1.993 + Modality: audio + * FP16 IR model size: 168429.15 KB + * INT8 model size: 84818.40 KB + * Model compression rate: 1.986 + + +Compare inference time of the FP16 IR and quantized models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +To measure the inference performance of the ``FP16`` and ``INT8`` +models, we use median inference time on calibration dataset. So we can +approximately estimate the speed up of the dynamic quantized models. + + **NOTE**: For the most accurate performance estimation, it is + recommended to run ``benchmark_app`` in a terminal/command prompt + after closing other applications with static shapes. + +.. code:: ipython3 + + import time + + + def calculate_inference_time(model_path, calibration_data): + model = core.compile_model(model_path) + output_layer = model.output(0) + inference_time = [] + for batch in calibration_data: + start = time.perf_counter() + _ = model(batch)[output_layer] + end = time.perf_counter() + delta = end - start + inference_time.append(delta) + return np.median(inference_time) + +Vision model +^^^^^^^^^^^^ + + + +.. code:: ipython3 + + fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.VISION], vision_data) + int8_latency = calculate_inference_time(int8_model_paths[ModalityType.VISION], vision_data) + print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") + + +.. parsed-literal:: + + Performance speed up: 2.375 + + +Text model +^^^^^^^^^^ + + + +.. code:: ipython3 + + fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.TEXT], text_data) + int8_latency = calculate_inference_time(int8_model_paths[ModalityType.TEXT], text_data) + print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") + + +.. parsed-literal:: + + Performance speed up: 1.492 + + +Audio model +^^^^^^^^^^^ + + + +.. code:: ipython3 + + fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.AUDIO], audio_calibration_data) + int8_latency = calculate_inference_time(int8_model_paths[ModalityType.AUDIO], audio_calibration_data) + print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") + + +.. parsed-literal:: + + Performance speed up: 5.770 + diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png new file mode 100644 index 00000000000000..b61da5d71d0e90 --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407f4039d44322edd717fb1eba4c0e029205b2c691614606f1a5b33ed31aa047 +size 15474 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png new file mode 100644 index 00000000000000..bf96c415a07c15 --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098a56bdaf58b412fe6935d327bcd810942f01789ecd5c2efe834888eba3b819 +size 13795 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png new file mode 100644 index 00000000000000..54a9a68752100b --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe454683f2419970a93baaea6a5beb973dd832627217464d87c14bf2a61e8032 +size 18633 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png new file mode 100644 index 00000000000000..6be4611dbc7a18 --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09352f8474421fa78d601cc5afbe88df3d0403c157f91605d424b66a2f1809a +size 303014 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png new file mode 100644 index 00000000000000..174dcfdcbe8079 --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609e506939d69a89fb59d36622d72005d5b162afccf70c1e2463cd51d544d4dd +size 777583 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png new file mode 100644 index 00000000000000..a4b0b02a4d7c0b --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7509d532217e990ed721424c57aecbadfb634d397bd1c069852f873fee8741a9 +size 572170 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png new file mode 100644 index 00000000000000..9274858833d2aa --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c01dd2ebbddd60573c560ddcb00f7671b63bf1e49ca68497be1d39fd5cb86c +size 19998 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png new file mode 100644 index 00000000000000..76f09aa4eb803b --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb5ca8757899c94fa8fd68a647975ea031ffa3f4955214b9a39d097b179ad27 +size 17315 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png new file mode 100644 index 00000000000000..f2f53fccfbdd8c --- /dev/null +++ b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30a62c61037f25fa771225ab71ab9ecb407a0589103a79de2c5e0374583adf1 +size 22314 diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index 177ffd97209a57..7bf7172f720588 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -70,6 +70,7 @@ Guide `__ to speed up pipeline. - **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -83,6 +82,7 @@ pipeline. pipelines <#compare-inference-time-of-the-fp16-and-int8-pipelines>`__ - `Interactive demo <#interactive-demo>`__ + Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -102,25 +102,15 @@ Prerequisites .. code:: ipython3 - import requests - - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - -.. code:: ipython3 + from pathlib import Path + import sys - from cmd_helper import clone_repo + repo_dir = Path("InstantID") + if not repo_dir.exists(): + !git clone https://github.com/InstantID/InstantID.git - clone_repo("https://github.com/InstantID/InstantID.git") + sys.path.insert(0, str(repo_dir)) .. code:: ipython3 @@ -146,9 +136,6 @@ recognition results. .. code:: ipython3 - from pathlib import Path - - MODELS_DIR = Path("models") face_detector_path = MODELS_DIR / "antelopev2" / "scrfd_10g_bnkps.onnx" face_embeddings_path = MODELS_DIR / "antelopev2" / "glintr100.onnx" @@ -518,9 +505,14 @@ Select Inference Device for Face Recognition .. code:: ipython3 import openvino as ov + import requests - from notebook_utils import device_widget + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import device_widget device = device_widget() @@ -604,7 +596,7 @@ generated image -.. image:: instant-id-with-output_files/instant-id-with-output_16_0.png +.. image:: instant-id-with-output_files/instant-id-with-output_15_0.png @@ -615,7 +607,7 @@ generated image -.. image:: instant-id-with-output_files/instant-id-with-output_17_0.png +.. image:: instant-id-with-output_files/instant-id-with-output_16_0.png @@ -1683,50 +1675,46 @@ Select inference device for InstantID -Create pipeline -~~~~~~~~~~~~~~~ - +.. code:: ipython3 + text_encoder = core.compile_model(ov_text_encoder_path, device.value) + text_encoder_2 = core.compile_model(ov_text_encoder_2_path, device.value) + vae_decoder = core.compile_model(ov_vae_decoder_path, device.value) + unet = core.compile_model(ov_unet_path, device.value) + controlnet = core.compile_model(ov_controlnet_path, device.value) + image_proj_model = core.compile_model(ov_image_proj_encoder_path, device.value) .. code:: ipython3 from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(MODELS_DIR / "tokenizer") + tokenizer_2 = AutoTokenizer.from_pretrained(MODELS_DIR / "tokenizer_2") + scheduler = LCMScheduler.from_pretrained(MODELS_DIR / "scheduler") - def create_ov_pipe( - text_encoder_path, - text_encoder_2_path, - image_proj_encoder_path, - controlnet_path, - unet_path, - vae_decoder_path, - tokenizer_path, - tokenizer_2_path, - scheduler_path, - ): - return OVStableDiffusionXLInstantIDPipeline( - core.compile_model(text_encoder_path, device.value), - core.compile_model(text_encoder_2_path, device.value), - core.compile_model(image_proj_encoder_path, device.value), - core.compile_model(controlnet_path, device.value), - core.compile_model(unet_path, device.value), - core.compile_model(vae_decoder_path, device.value), - AutoTokenizer.from_pretrained(tokenizer_path), - AutoTokenizer.from_pretrained(tokenizer_2_path), - LCMScheduler.from_pretrained(scheduler_path), - ) +.. parsed-literal:: - ov_pipe = create_ov_pipe( - ov_text_encoder_path, - ov_text_encoder_2_path, - ov_image_proj_encoder_path, - ov_controlnet_path, - ov_unet_path, - ov_vae_decoder_path, - MODELS_DIR / "tokenizer", - MODELS_DIR / "tokenizer_2", - MODELS_DIR / "scheduler", + The config attributes {'interpolation_type': 'linear', 'skip_prk_steps': True, 'use_karras_sigmas': False} were passed to LCMScheduler, but are not expected and will be ignored. Please verify your scheduler_config.json configuration file. + + +Create pipeline +~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ov_pipe = OVStableDiffusionXLInstantIDPipeline( + text_encoder, + text_encoder_2, + image_proj_model, + controlnet, + unet, + vae_decoder, + tokenizer, + tokenizer_2, + scheduler, ) Run inference @@ -1763,7 +1751,7 @@ Run inference -.. image:: instant-id-with-output_files/instant-id-with-output_40_0.png +.. image:: instant-id-with-output_files/instant-id-with-output_41_0.png @@ -1796,8 +1784,8 @@ improve model inference speed. from notebook_utils import quantization_widget - skip_for_device = "GPU" in device.value or (device.value == "AUTO" and any("GPU" in device_name for device_name in core.available_devices)) - to_quantize = quantization_widget(not skip_for_device) + skip_for_device = "GPU" in device.value + to_quantize = quantization_widget(skip_for_device) to_quantize @@ -2000,14 +1988,6 @@ Quantization of the first ``Convolution`` layer impacts the generation results. We recommend using ``IgnoredScope`` to keep accuracy sensitive layers in FP16 precision. -.. code:: ipython3 - - %%skip not $to_quantize.value - - # Delete loaded full precision pipeline before quantization to lower peak memory footprint. - ov_pipe = None - gc.collect() - .. code:: ipython3 %%skip not $to_quantize.value @@ -3962,16 +3942,22 @@ pipelines. %%skip not $to_quantize.value - int8_pipe = create_ov_pipe( - ov_int8_text_encoder_path, - ov_int8_text_encoder_2_path, - ov_image_proj_encoder_path, - ov_int8_controlnet_path, - ov_int8_unet_path, - ov_int8_vae_decoder_path, - MODELS_DIR / "tokenizer", - MODELS_DIR / "tokenizer_2", - MODELS_DIR / "scheduler" + optimized_controlnet = core.compile_model(ov_int8_controlnet_path, device.value) + optimized_unet = core.compile_model(ov_int8_unet_path, device.value) + optimized_text_encoder = core.compile_model(ov_int8_text_encoder_path, device.value) + optimized_text_encoder_2 = core.compile_model(ov_int8_text_encoder_2_path, device.value) + optimized_vae_decoder = core.compile_model(ov_int8_vae_decoder_path, device.value) + + int8_pipe = OVStableDiffusionXLInstantIDPipeline( + optimized_text_encoder, + optimized_text_encoder_2, + image_proj_model, + optimized_controlnet, + optimized_unet, + optimized_vae_decoder, + tokenizer, + tokenizer_2, + scheduler, ) .. code:: ipython3 @@ -3997,7 +3983,7 @@ pipelines. .. code:: ipython3 - %%skip not $to_quantize.value + # %%skip not $to_quantize.value import matplotlib.pyplot as plt @@ -4078,33 +4064,13 @@ Compare inference time of the FP16 and INT8 pipelines To measure the inference performance of the ``FP16`` and ``INT8`` pipelines, we use mean inference time on 5 samples. -Please select below whether you would like to run inference time -comparison. - **NOTE**: For the most accurate performance estimation, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. -.. - - **NOTE**: This is a memory consuming step because two pipelines need - to be loaded at the same time. - -.. code:: ipython3 - - import ipywidgets as widgets - - run_inference_time_comparison = widgets.Checkbox( - value=False, - description="Inference time comparison", - disabled=not to_quantize.value, - ) - - run_inference_time_comparison - .. code:: ipython3 - %%skip (not $to_quantize.value or not $run_inference_time_comparison.value) + %%skip not $to_quantize.value import time @@ -4133,24 +4099,7 @@ comparison. .. code:: ipython3 - %%skip (not $to_quantize.value or not $run_inference_time_comparison.value) - - # Load full precision pipeline - ov_pipe = create_ov_pipe( - ov_text_encoder_path, - ov_text_encoder_2_path, - ov_image_proj_encoder_path, - ov_controlnet_path, - ov_unet_path, - ov_vae_decoder_path, - MODELS_DIR / "tokenizer", - MODELS_DIR / "tokenizer_2", - MODELS_DIR / "scheduler", - ) - -.. code:: ipython3 - - %%skip (not $to_quantize.value or not $run_inference_time_comparison.value) + %%skip not $to_quantize.value fp_latency = calculate_inference_time(ov_pipe, face_info) print(f"FP16 pipeline: {fp_latency:.3f} seconds") @@ -4197,25 +4146,6 @@ to launch the interactive demo. -.. code:: ipython3 - - if use_quantized_models.value: - if ov_pipe is not None: - del ov_pipe - gc.collect() - elif ov_pipe is None: - ov_pipe = create_ov_pipe( - ov_text_encoder_path, - ov_text_encoder_2_path, - ov_image_proj_encoder_path, - ov_controlnet_path, - ov_unet_path, - ov_vae_decoder_path, - MODELS_DIR / "tokenizer", - MODELS_DIR / "tokenizer_2", - MODELS_DIR / "scheduler", - ) - .. code:: ipython3 import gradio as gr diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.jpg b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.jpg new file mode 100644 index 00000000000000..9204c96e2f27b5 --- /dev/null +++ b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cb87ed65dd68ed3a55169231dc4b535b642c700b6f407a8fd36551ccdb1b44 +size 57403 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.png b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.png new file mode 100644 index 00000000000000..75c81edf77497e --- /dev/null +++ b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_15_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3aaae251bfd76143e413cba9580629e315af4a963723a4d5f2baed7d616befe +size 418498 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.jpg b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.jpg index 9204c96e2f27b5..3922ec0b103b57 100644 --- a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.jpg +++ b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8cb87ed65dd68ed3a55169231dc4b535b642c700b6f407a8fd36551ccdb1b44 -size 57403 +oid sha256:f84d31ac2853b19f8dc12f280bffcd132b9d5905f51aefcea3b5e9c79a5e2508 +size 13727 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.png b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.png index 75c81edf77497e..3bdbdcbd0acd60 100644 --- a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.png +++ b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_16_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3aaae251bfd76143e413cba9580629e315af4a963723a4d5f2baed7d616befe -size 418498 +oid sha256:9cc497376f7ac254d2906f1a3be71adc8948e0c566fd50cab1b8df2709db4318 +size 10852 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.jpg b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.jpg deleted file mode 100644 index 3922ec0b103b57..00000000000000 --- a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f84d31ac2853b19f8dc12f280bffcd132b9d5905f51aefcea3b5e9c79a5e2508 -size 13727 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.png b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.png deleted file mode 100644 index 3bdbdcbd0acd60..00000000000000 --- a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_17_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9cc497376f7ac254d2906f1a3be71adc8948e0c566fd50cab1b8df2709db4318 -size 10852 diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_40_0.jpg b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_41_0.jpg similarity index 100% rename from docs/notebooks/instant-id-with-output_files/instant-id-with-output_40_0.jpg rename to docs/notebooks/instant-id-with-output_files/instant-id-with-output_41_0.jpg diff --git a/docs/notebooks/instant-id-with-output_files/instant-id-with-output_40_0.png b/docs/notebooks/instant-id-with-output_files/instant-id-with-output_41_0.png similarity index 100% rename from docs/notebooks/instant-id-with-output_files/instant-id-with-output_40_0.png rename to docs/notebooks/instant-id-with-output_files/instant-id-with-output_41_0.png diff --git a/docs/notebooks/internvl2-with-output.rst b/docs/notebooks/internvl2-with-output.rst index c9073cd6aa48ae..ed67209a0303eb 100644 --- a/docs/notebooks/internvl2-with-output.rst +++ b/docs/notebooks/internvl2-with-output.rst @@ -57,21 +57,29 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "transformers>4.36" "torch>=2.1" "torchvision" "einops" "timm" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.14.0" "datasets" - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q -U "openvino>=2024.5" "openvino-tokenizers>=2024.5" "openvino-genai>=2024.5" - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" + %pip install -q "transformers>4.36,<4.45" "torch>=2.1" "torchvision" "einops" "timm" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.3.0" "nncf>=2.12.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 from pathlib import Path import requests + if not Path("conversation.py").exists(): + r = requests.get("https://huggingface.co/OpenGVLab/InternVL2-1B/raw/main/conversation.py") + open("conversation.py", "w", encoding="utf-8").write(r.text) + + if not Path("internvl2_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/internvl2/internvl2_helper.py") + open("internvl2_helper.py", "w", encoding="utf-8").write(r.text) + if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/internvl2/gradio_helper.py") open("gradio_helper.py", "w", encoding="utf-8").write(r.text) @@ -79,10 +87,6 @@ Prerequisites if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w", encoding="utf-8").write(r.text) - - if not Path("cmd_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") - open("cmd_helper.py", "w", encoding="utf-8").write(r.text) Select model ------------ @@ -96,25 +100,18 @@ using widget bellow: .. code:: ipython3 - model_ids = ["OpenGVLab/InternVL2-1B", "OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-4B", "OpenGVLab/InternVL2-8B"] - - - def model_selector(default=model_ids[0]): - import ipywidgets as widgets - - model_checkpoint = widgets.Dropdown( - options=model_ids, - default=default, - description="Model:", - ) - return model_checkpoint - + from internvl2_helper import model_selector model_id = model_selector() model_id +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + .. parsed-literal:: @@ -133,84 +130,94 @@ using widget bellow: .. parsed-literal:: Selected OpenGVLab/InternVL2-1B - + Convert and Optimize model -------------------------- -Our model conversion and optimization consist of following steps: 1. -Download original PyTorch model. 2. Convert model to OpenVINO format. 3. -Compress model weights using NNCF. - -Let’s consider each step more deeply. - -Convert model to OpenVINO IR format using Optimum CLI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - +InternVL2 is PyTorch model. OpenVINO supports PyTorch models via +conversion to OpenVINO Intermediate Representation (IR). `OpenVINO model +conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. +``internvl2_helper.py`` script contains helper function for model +conversion, please check its content if you interested in conversion +details. -OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation format. For convenience, we will use OpenVINO integration -with HuggingFace Optimum. `Optimum -Intel `__ is the -interface between the Transformers and Diffusers libraries and the -different tools and libraries provided by Intel to accelerate end-to-end -pipelines on Intel architectures. - -Among other use cases, Optimum Intel provides a simple interface to -optimize your Transformers and Diffusers models, convert them to the -OpenVINO Intermediate Representation (IR) format and run inference using -OpenVINO Runtime. ``optimum-cli`` provides command line interface for -model conversion and optimization. +.. raw:: html -General command format: +
-.. code:: bash +Click here for more detailed explanation of conversion steps InternVL2 +is autoregressive transformer generative model, it means that each next +model step depends from model output from previous step. The generation +approach is based on the assumption that the probability distribution of +a word sequence can be decomposed into the product of conditional next +word distributions. In other words, model predicts the next token in the +loop guided by previously generated tokens until the stop-condition will +be not reached (generated sequence of maximum length or end of string +token obtained). The way the next token will be selected over predicted +probabilities is driven by the selected decoding methodology. You can +find more information about the most popular decoding methods in this +blog. The entry point for the generation process for models from the +Hugging Face Transformers library is the ``generate`` method. You can +find more information about its parameters and configuration in the +documentation. To preserve flexibility in the selection decoding +methodology, we will convert only model inference for one step. + +The inference flow has difference on first step and for the next. On the +first step, model accept preprocessed input instruction and image, that +transformed to the unified embedding space using ``input_embedding`` and +``image_encoder`` models, after that ``language model``, LLM-based part +of model, runs on input embeddings to predict probability of next +generated tokens. On the next step, ``language_model`` accepts only next +token id selected based on sampling strategy and processed by +``input_embedding`` model and cached attention key and values. Since the +output side is auto-regressive, an output token hidden state remains the +same once computed for every further generation step. Therefore, +recomputing it every time you want to generate a new token seems +wasteful. With the cache, the model saves the hidden state once it has +been computed. The model only computes the one for the most recently +generated output token at each time step, re-using the saved ones for +hidden tokens. This reduces the generation complexity from +:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details +about how it works can be found in this +`article `__. +To sum up above, model consists of 4 parts: + +- **Image encoder** for encoding input images into embedding space. +- **Input Embedding** for conversion input text tokens into embedding + space +- **Language Model** for generation answer based on input embeddings + provided by Image Encoder and Input Embedding models. - optimum-cli export openvino --model --task +.. raw:: html -where task is task to export the model for, if not specified, the task -will be auto-inferred based on the model. You can find a mapping between -tasks and model classes in Optimum TaskManager -`documentation `__. -Additionally, you can specify weights compression using -``--weight-format`` argument with one of following options: ``fp32``, -``fp16``, ``int8`` and ``int4``. Fro int8 and int4 -`nncf `__ will be used for -weight compression. More details about model export provided in `Optimum -Intel -documentation `__. +
Compress model weights to 4-bit ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For reducing memory consumption, weights compression optimization can be applied using -`NNCF `__ via ``optimum-cli`` -command. In this tutorial we will demonstrates how to apply accurate -int4 weight quantization using AWQ method. +`NNCF `__. .. raw:: html
-.. raw:: html - - - -Click here for more details about weight compression - -.. raw:: html - - - -Weight compression aims to reduce the memory footprint of a model. It -can also lead to significant performance improvement for large -memory-bound models, such as Large Language Models (LLMs). LLMs and -other models, which require extensive memory to store the weights during -inference, can benefit from weight compression in the following ways: +Click here for more details about weight compression Weight compression +aims to reduce the memory footprint of a model. It can also lead to +significant performance improvement for large memory-bound models, such +as Large Language Models (LLMs). LLMs and other models, which require +extensive memory to store the weights during inference, can benefit from +weight compression in the following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -231,13 +238,11 @@ with the performance of the full model quantization. In addition, weight compression is data-free and does not require a calibration dataset, making it easy to use. -Usually 4-bit compression allows to get maximal speedup and minimal -memory footprint comparing with 8-bit compression, but in the same time -it may significantly drop model accuracy. `Activation-aware Weight -Quantization `__ (AWQ) is an algorithm -that tunes model weights for more accurate INT4 compression. It slightly -improves generation quality of compressed models, but requires -additional time for tuning weights on a calibration dataset. +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. More details about weights compression, can be found in `OpenVINO documentation `__. @@ -248,99 +253,151 @@ documentation self.max_seq_len_cached: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + + +.. parsed-literal:: + + ✅ Language model successfully converted + ⌛ Weights compression with int4_asym mode started + INFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (1 / 1) │ 100% (1 / 1) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:00 • 0:00:00 - [?25h + │ 8 │ 28% (1 / 169) │ 0% (0 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 72% (168 / 169) │ 100% (168 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + + .. parsed-literal:: - Attempt to save config using standard API has failed with 'architectures'. There may be an issue with model config, please check its correctness before usage. - + Output() + + + + + + + + + +.. parsed-literal:: + + ✅ Weights compression finished + ✅ OpenGVLab/InternVL2-1B model conversion finished. You can find results in InternVL2-1B + Select inference device ----------------------- @@ -369,76 +426,49 @@ Prepare model inference pipeline -`OpenVINO™ GenAI `__ -is a library of the most popular Generative AI model pipelines, -optimized execution methods, and samples that run on top of highly -performant `OpenVINO -Runtime `__. +As discussed, the model comprises Image Encoder and LLM (with separated +text embedding part) that generates answer. In ``internvl2_helper.py`` +we defined LLM inference class ``OvModelForCausalLMWithEmb`` that will +represent generation cycle, It is based on `HuggingFace Transformers +GenerationMixin `__ +and looks similar to `Optimum +Intel `__ +``OVModelForCausalLM`` that is used for LLM inference with only +difference that it can accept input embedding. In own turn, general +multimodal model class ``OVInternVLChatModel`` handles chatbot +functionality including image processing and answer generation using +LLM. -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality -(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor -of OpenVINO™, aiming to simplify running inference of generative AI -models. It hides the complexity of the generation process and minimizes -the amount of code required. +.. code:: ipython3 -Inference Visual language models can be implemented using OpenVINO GenAI -``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in -this -`notebook `__. -It supports chat mode with preserving conversational history inside -pipeline, that allows us effectively implements chatbot that supports -conversation about provided images content. For pipeline initialization -we should provide path to model directory and inference device. + from internvl2_helper import OVInternVLChatModel + from transformers import AutoTokenizer + + # Uncomment below lines to see the model inference class code + + # OVInternVLChatModel?? .. code:: ipython3 - import openvino_genai as ov_genai - - ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) + tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) + ov_model = OVInternVLChatModel(model_dir, device.value) Run model inference ------------------- -For preparing input data, ``VLMPipeline`` use tokenizer and image -processor inside, we just need to convert image to input OpenVINO tensor -and provide question as string. Additionally, we can provides options -for controlling generation process (e.g. number of maximum generated -tokens or using multinomial sampling for decoding instead of greedy -search approach) using ``GenerationConfig``. - -Generation process for long response may be time consuming, for -accessing partial result as soon as it is generated without waiting when -whole process finished, Streaming API can be used. Token streaming is -the mode in which the generative system returns the tokens one by one as -the model generates them. This enables showing progressive generations -to the user rather than waiting for the whole generation. Streaming is -an essential aspect of the end-user experience as it reduces latency, -one of the most critical aspects of a smooth experience. +Our interface is fully compatible with Transformers interface for +InternVL2, you can try any of represented here `usage +examples `__. +Let’s check model capabilities in answering questions about image: .. code:: ipython3 - import requests - from PIL import Image - from io import BytesIO - import numpy as np - import openvino as ov - - config = ov_genai.GenerationConfig() - config.max_new_tokens = 100 - - - def load_image(image_file): - if isinstance(image_file, str) and (image_file.startswith("http") or image_file.startswith("https")): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) - return image, ov.Tensor(image_data) + import PIL + from internvl2_helper import load_image + from transformers import TextIteratorStreamer + from threading import Thread EXAMPLE_IMAGE = Path("examples_image1.jpg") @@ -449,41 +479,59 @@ one of the most critical aspects of a smooth experience. with EXAMPLE_IMAGE.open("wb") as handler: handler.write(img_data) + pixel_values = load_image(EXAMPLE_IMAGE, max_num=12) - def streamer(subword: str) -> bool: - """ - - Args: - subword: sub-word of the generated text. + streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - Returns: Return flag corresponds whether generation should be stopped. + generation_config = dict(max_new_tokens=100, do_sample=True, streamer=streamer) + question = "\nPlease describe the image shortly." - """ - print(subword, end="", flush=True) - - - question = "Please describe the image shortly" - - - image, image_tensor = load_image(EXAMPLE_IMAGE) - display(image) + display(PIL.Image.open(EXAMPLE_IMAGE)) print(f"User: {question}\n") print("Assistant:") - output = ov_model.generate(question, image=image_tensor, generation_config=config, streamer=streamer) + + thread = Thread( + target=ov_model.chat, + kwargs=dict( + tokenizer=tokenizer, + pixel_values=pixel_values, + question=question, + history=None, + return_history=False, + generation_config=generation_config, + ), + ) + thread.start() + + generated_text = "" + # Loop through the streamer to get the new text as it is generated + for new_text in streamer: + if new_text == ov_model.conv_template.sep: + break + generated_text += new_text + print(new_text, end="", flush=True) # Print each new chunk of generated text on the same line -.. image:: internvl2-with-output_files/internvl2-with-output_14_0.png +.. image:: internvl2-with-output_files/internvl2-with-output_16_0.png .. parsed-literal:: - User: Please describe the image shortly + User: + Please describe the image shortly. Assistant: - . - - The image shows a red panda, a type of mammal known for its distinctive red fur and white markings. The animal is resting on a wooden structure, possibly a platform or a platform-like object, with its head turned slightly towards the camera. The background is a natural setting, with trees and foliage visible, suggesting that the red panda is in a forested or wooded area. The red panda's eyes are large and expressive, and its ears are perked up, indicating that it is alert + + +.. parsed-literal:: + + Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation. + + +.. parsed-literal:: + + The image shows a red panda lying on its side, partially wrapped in a wooden structure, possibly a container or log. The red panda appears to be looking at the camera with large, expressive eyes, displaying an endearing and lively appearance. The background consists of a portion of the red panda's habitat environment, which appears to be a tree and some greenery. Interactive demo ---------------- @@ -494,11 +542,25 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(ov_model) + demo = make_demo(ov_model, tokenizer) try: - demo.launch(debug=True, height=600) + demo.launch(debug=False, height=600) except Exception: - demo.launch(debug=True, share=True, height=600) + demo.launch(debug=False, share=True, height=600) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.jpg b/docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.jpg similarity index 100% rename from docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.jpg rename to docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.jpg diff --git a/docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.png b/docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.png similarity index 100% rename from docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.png rename to docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.png diff --git a/docs/notebooks/janus-multimodal-generation-with-output.rst b/docs/notebooks/janus-multimodal-generation-with-output.rst deleted file mode 100644 index a8a5cc599699c6..00000000000000 --- a/docs/notebooks/janus-multimodal-generation-with-output.rst +++ /dev/null @@ -1,472 +0,0 @@ -Multimodal understanding and generation with Janus and OpenVINO -=============================================================== - -Janus is a novel autoregressive framework that unifies multimodal -understanding and generation. It addresses the limitations of previous -approaches by decoupling visual encoding into separate pathways, while -still utilizing a single, unified transformer architecture for -processing. The decoupling not only alleviates the conflict between the -visual encoder’s roles in understanding and generation, but also -enhances the framework’s flexibility. Janus surpasses previous unified -model and matches or exceeds the performance of task-specific models. -The simplicity, high flexibility, and effectiveness of Janus make it a -strong candidate for next-generation unified multimodal models. - -More details can be found in the -`paper `__, original -`repository `__ and `model -card `__ - -In this tutorial we consider how to run and optimize Janus using -OpenVINO. - -**Table of contents:** - -- `Prerequisites <#prerequisites>`__ -- `Convert and Optimize model <#convert-and-optimize-model>`__ - - - `Compress model weights to - 4-bit <#compress-model-weights-to-4-bit>`__ - -- `Create Inference Pipeline <#create-inference-pipeline>`__ - - - `Select Inference Device <#select-inference-device>`__ - - `Run visual language chat <#run-visual-language-chat>`__ - - `Run Image generation <#run-image-generation>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -.. code:: ipython3 - - from pathlib import Path - import requests - - utility_files = ["notebook_utils.py"] - local_helpers = ["ov_janus_helper.py", "gradio_helper.py"] - - base_utils_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" - base_local_files_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/janus-multimodal-generation/" - - - for util_path in utility_files: - if not Path(util_path).exists(): - r = requests.get(base_utils_url + util_path) - with open(util_path, "w") as f: - f.write(r.text) - - for util_path in local_helpers: - if not Path(util_path).exists(): - r = requests.get(base_local_files_url + util_path) - with open(util_path, "w") as f: - f.write(r.text) - -.. code:: ipython3 - - import platform - - %pip install -q "gradio>=4.19" "torch>=2.2" "torchvision" "safetensors" "transformers>=4.38" "nncf>=2.14" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/deepseek-ai/Janus" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -U --pre "openvino>2024.5" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" - -Convert and Optimize model --------------------------- - - - -Janus is PyTorch model. OpenVINO supports PyTorch models via conversion -to OpenVINO Intermediate Representation (IR). `OpenVINO model conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original PyTorch model instance and example input for tracing and -returns ``ov.Model`` representing this model in OpenVINO framework. -Converted model can be used for saving on disk using ``ov.save_model`` -function or directly loading on device using ``core.complie_model``. - -The script ``ov_janus_helper.py`` contains helper function for model -conversion, please check its content if you interested in conversion -details. - -.. raw:: html - -
- -.. raw:: html - - - -Click here for more detailed explanation of conversion steps - -.. raw:: html - - - -Janus is autoregressive transformer generative model, it means that each -next model step depends from model output from previous step. The -generation approach is based on the assumption that the probability -distribution of a token sequence can be decomposed into the product of -conditional next token distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of generation token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this blog. The entry point for the generation -process for models from the Hugging Face Transformers library is the -``generate`` method. You can find more information about its parameters -and configuration in the documentation. To preserve flexibility in the -selection decoding methodology, we will convert only model inference for -one step. - -For both tasks, image understanding and image generation, Janus utilizes -the same basic transformer architecture in ``language_model`` and change -only components responsible for preparing input embeddings (joined image -embeddings prepared using ``vision_embeddings_model`` and text -embeddings prepared using ``text_embeddings_model`` for image -understanding and ``text_embeddings_model`` on the first step as initial -prompt embeddings and ``gen_embeddings_model`` for the next) and -conversion final hidden state to tokens probabilities (``lm_head`` for -text tokens, ``gen_head`` for image tokens). Additionally, for image -generation model uses ``gen_decoder`` to convert generated image tokens -to images. - -To sum up above, model consists of 7 parts: \* **Image Embeddings** for -encoding input images into embedding space in image understanding task. -\* **Text Embedding** for conversion input text tokens into embedding -space \* **Gen Embeddings** for encoding image generation tokens to -embeddings space in image generation task \* **Language Model** for -generation hidden state guided by input embeddings \* **LM Head** for -conversion Language Model hidden state to text generation token -probabilities \* **Gen Head** for conversion Language Model hidden state -to image generation token probabilities \* **Gen Decoder** for decoding -generated image from latent token space to image tensor space. - -For preserving original model flexibility of switching between tasks, we -also should preserve original model partitioning and convert each model -part separately. - -.. raw:: html - -
- -Compress model weights to 4-bit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For reducing memory -consumption, weights compression optimization can be applied using -`NNCF `__. - -.. raw:: html - -
- -.. raw:: html - - - -Click here for more details about weight compression - -.. raw:: html - - - -Weight compression aims to reduce the memory footprint of a model. It -can also lead to significant performance improvement for large -memory-bound models, such as Large Language Models (LLMs). LLMs and -other models, which require extensive memory to store the weights during -inference, can benefit from weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - -.. raw:: html - -
- -.. code:: ipython3 - - import nncf - from ov_janus_helper import convert_janus_model - - model_id = "deepseek-ai/Janus-1.3B" - model_path = Path(model_id.split("/")[-1] + "-ov") - - compression_configuration = { - "mode": nncf.CompressWeightsMode.INT4_ASYM, - "group_size": 64, - "ratio": 1.0, - } - - # uncomment the line to see model conversion code - # ??convert_janus_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - 2024-11-26 20:09:59.629857: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-26 20:09:59.643309: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1732637399.658322 1754417 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1732637399.662894 1754417 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-11-26 20:09:59.679869: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - -.. parsed-literal:: - - Python version is above 3.10, patching the collections module. - - -.. parsed-literal:: - - /home/ea/work/py311/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:520: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead - warnings.warn( - - -.. code:: ipython3 - - convert_janus_model(model_id, model_path, compression_configuration) - - -.. parsed-literal:: - - ✅ Janus-1.3B model already converted. You can find results in Janus-1.3B-ov - - -Create Inference Pipeline -------------------------- - - - -``OVJanusModel`` defined in ``ov_janus_helper.py`` provides unified -interface for running model inference for both text and image -generation. It accepts model directory and target device for inference. - -Select Inference Device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from notebook_utils import device_widget - - device = device_widget("CPU", ["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -.. code:: ipython3 - - from ov_janus_helper import OVJanusModel - from janus.models import VLChatProcessor - - # uncomment the line to see model inference code - - # ??OVJanusModel - -``VLChatPRocessor`` class used for pre- and postprocessing steps in -original Janus model. Our model is also compatible with the same -processor code and we can reuse it. - -.. code:: ipython3 - - ov_model = OVJanusModel(model_path, device.value) - - processor = VLChatProcessor.from_pretrained(model_path) - - -.. parsed-literal:: - - Some kwargs in processor config are unused and will not have any effect: image_end_tag, sft_format, image_tag, num_image_tokens, add_special_token, mask_prompt, ignore_id, image_start_tag. - - -Run visual language chat -~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from PIL import Image - from io import BytesIO - from janus.utils.io import load_pil_images - - - input_prompt = "Describe image in details" - image_path = Path("cat_in_box.png") - - if not image_path.exists(): - response = requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11") - image = Image.open(BytesIO(response.content)).convert("RGB") - image.save(image_path) - - conversation = [ - { - "role": "User", - "content": f"{input_prompt}\n", - "images": [str(image_path)], - }, - {"role": "Assistant", "content": ""}, - ] - pil_images = load_pil_images(conversation) - -.. code:: ipython3 - - from transformers import TextStreamer - - prepare_inputs = processor(conversations=conversation, images=pil_images, force_batchify=True) - # run image encoder to get the image embeddings - inputs_embeds = ov_model.prepare_inputs_embeds(**prepare_inputs) - - streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) - - print(f"Question:\n{input_prompt}") - display(pil_images[0]) - print("Answer:") - - answer_token_ids = ov_model.language_model.generate( - inputs_embeds=inputs_embeds, - attention_mask=prepare_inputs.attention_mask, - pad_token_id=processor.tokenizer.eos_token_id, - bos_token_id=processor.tokenizer.bos_token_id, - eos_token_id=processor.tokenizer.eos_token_id, - max_new_tokens=128, - do_sample=False, - streamer=streamer, - ) - - -.. parsed-literal:: - - Question: - Describe image in details - - - -.. image:: janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.png - - -.. parsed-literal:: - - Answer: - The image depicts a gray and white tabby cat lying comfortably inside a cardboard box. The cat is lying on its back with its legs and paws spread out in a relaxed manner. The cat's eyes are closed, and it appears to be enjoying a nap. The box is placed on a light-colored carpet, and in the background, there is a portion of a white couch visible. The lighting in the room is soft and natural, suggesting that the photo was taken during the daytime. The overall scene conveys a sense of tranquility and contentment. - - -Run Image generation -~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from ov_janus_helper import generate_image - - # Uncomment the line to see image generation code - # ??generate_image - -.. code:: ipython3 - - from transformers import set_seed - - set_seed(12345) - - images = generate_image( - ov_model, - processor, - "A close-up professional photo of Yorkshire Terrier on beach, extrimely detailed, hyper realistic, full hd", - output_dir=None, - parallel_size=1, - ) - - - -.. parsed-literal:: - - 0%| | 0/576 [00:00`__ is a Python library for -accelerator-oriented array computation and program transformation, -designed for high-performance numerical computing and large-scale -machine learning. JAX provides a familiar NumPy-style API for ease of -adoption by researchers and engineers. - -In this tutorial we will show how to convert JAX -`ViT `__ -and -`Mixer `__ -models in OpenVINO format. - -.. raw:: html - -
- -.. raw:: html - - - -Click here for more detailed information about the models - -.. raw:: html - - - -Vision Transformer -~~~~~~~~~~~~~~~~~~ - -Overview of the model: authors split an image into fixed-size patches, -linearly embed each of them, add position embeddings, and feed the -resulting sequence of vectors to a standard Transformer encoder. In -order to perform classification, authors use the standard approach of -adding an extra learnable “classification token” to the sequence. - -MLP-Mixer -~~~~~~~~~ - -MLP-Mixer (Mixer for short) consists of per-patch linear embeddings, -Mixer layers, and a classifier head. Mixer layers contain one -token-mixing MLP and one channel-mixing MLP, each consisting of two -fully-connected layers and a GELU nonlinearity. Other components -include: skip-connections, dropout, and linear classifier head. - -.. raw:: html - -
- - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Load and run the original model and a - sample <#load-and-run-the-original-model-and-a-sample>`__ -- `Convert the model to OpenVINO - IR <#convert-the-model-to-openvino-ir>`__ -- `Compiling the model <#compiling-the-model>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -.. code:: ipython3 - - import requests - - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - -.. code:: ipython3 - - from cmd_helper import clone_repo - - - clone_repo("https://github.com/google-research/vision_transformer.git") - -.. code:: ipython3 - - %pip install -q "openvino>=2024.5.0" - %pip install -q Pillow "jax>=0.4.2" "absl-py>=0.12.0" "flax>=0.6.4" "pandas>=1.1.0" "tensorflow-cpu>=2.4.0" tf_keras tqdm "einops>=0.3.0" "ml-collections>=0.1.0" - -.. code:: ipython3 - - import PIL - import jax - import numpy as np - - from vit_jax import checkpoint - from vit_jax import models_vit - from vit_jax import models_mixer - from vit_jax.configs import models as models_config - - import openvino as ov - -.. code:: ipython3 - - import ipywidgets as widgets - - available_models = ["ViT-B_32", "Mixer-B_16"] - - - model_to_use = widgets.Select( - options=available_models, - value=available_models[0], - description="Select model:", - disabled=False, - ) - - model_to_use - - - - -.. parsed-literal:: - - Select(description='Select model:', options=('ViT-B_32', 'Mixer-B_16'), value='ViT-B_32') - - - -Load and run the original model and a sample --------------------------------------------- - - - -Download a pre-trained model. - -.. code:: ipython3 - - from notebook_utils import download_file - - - model_name = model_to_use.value - model_config = models_config.MODEL_CONFIGS[model_name] - - - if model_name.startswith("Mixer"): - # Download model trained on imagenet2012 - model_name_path = download_file(f"https://storage.googleapis.com/mixer_models/imagenet1k/{model_name}.npz", filename=f"{model_name}_imagenet2012.npz") - model = models_mixer.MlpMixer(num_classes=1000, **model_config) - else: - # Download model pre-trained on imagenet21k and fine-tuned on imagenet2012. - model_name_path = download_file( - f"https://storage.googleapis.com/vit_models/imagenet21k+imagenet2012/{model_name}.npz", filename=f"{model_name}_imagenet2012.npz" - ) - model = models_vit.VisionTransformer(num_classes=1000, **model_config) - - - -.. parsed-literal:: - - ViT-B_32_imagenet2012.npz: 0%| | 0.00/337M [00:00`__ -should be used for these purposes. ``ov.convert_model`` function accepts -original JAX model instance and example input for tracing and returns -``ov.Model`` representing this model in OpenVINO framework. Converted -model can be used for saving on disk using ``ov.save_model`` function or -directly loading on device using ``core.complie_model``. - -Before conversion we need to create the -`Jaxprs `__ -(JAX’s internal intermediate representation (IR) of programs) object by -tracing a Python function using the -`jax.make_jaxpr `__ -function. [``jax.make_jaxpr``] take a function as argument, that should -perform the forward pass. In our case it is calling of ``model.apply`` -method. But ``model.apply`` requires not only input data, but also -``params`` and keyword argument ``train=False`` in our case. To handle -it create a wrapper function ``model_apply`` that calls -``model.apply(params, x, train=False)``. - -.. code:: ipython3 - - from pathlib import Path - - - model_path = Path(f"models/{model_name}.xml") - - - def model_apply(x): - return model.apply(dict(params=params), x, train=False) - - - jaxpr = jax.make_jaxpr(model_apply)((np.array(img) / 128 - 1)[None, ...]) - - converted_model = ov.convert_model(jaxpr) - ov.save_model(converted_model, model_path) - -Compiling the model -------------------- - - - -Select device from dropdown list for running inference using OpenVINO. - -.. code:: ipython3 - - from notebook_utils import device_widget - - - core = ov.Core() - - device = device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - compiled_model = core.compile_model(model_path, device.value) - -Run OpenVINO model inference ----------------------------- - -.. code:: ipython3 - - (logits_ov,) = list(compiled_model(data).values())[0] - - preds = np.array(jax.nn.softmax(logits_ov)) - for idx in preds.argsort()[:-11:-1]: - print(f"{preds[idx]:.5f} : {imagenet_labels[idx]}", end="") - - -.. parsed-literal:: - - 0.95255 : alp - 0.03881 : valley, vale - 0.00192 : cliff, drop, drop-off - 0.00173 : ski - 0.00059 : lakeside, lakeshore - 0.00049 : promontory, headland, head, foreland - 0.00036 : volcano - 0.00021 : snowmobile - 0.00017 : mountain_bike, all-terrain_bike, off-roader - 0.00017 : mountain_tent - diff --git a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg deleted file mode 100644 index 4e389f1fcb75af..00000000000000 --- a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b9ce29fc2d800faa2667de9fc47770370f12c829217c22142bfcd1f5e1a2752 -size 33195 diff --git a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png deleted file mode 100644 index 901c02bacbed30..00000000000000 --- a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffe240660061089dfc38c95d77b074051cc37b794c4d096e5841cf8d575311d9 -size 237944 diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 478d333d54d7e7..1cdb2e1d286245 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -77,7 +77,14 @@ Prerequisites .. code:: ipython3 %pip install -q "openvino>=2024.2.0" "datasets>=2.20" "nncf>=2.11.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" "typing_extensions>=4.9" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Instantiate model ----------------- @@ -96,6 +103,17 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) + +.. parsed-literal:: + + 2024-11-05 01:41:58.578137: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:41:58.612620: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 01:41:59.276782: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + Prepare input data ~~~~~~~~~~~~~~~~~~ @@ -109,32 +127,28 @@ passing in the PIL.Image objects. from PIL import Image import requests - from pathlib import Path - if not Path("notebook_utils.py").exists(): - # image input data - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) + # image input data + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) - open("notebook_utils.py", "w").write(r.text) + open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget - if not Path("data/furseal.png").exists(): - download_file( - "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", - "furseal.png", - directory="data", - ) + download_file( + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", + "furseal.png", + directory="data", + ) img_furseal = Image.open("./data/furseal.png") - if not Path("data/coco.jpg").exists(): - image_path = download_file( - "https://github.com/user-attachments/assets/1c66a05d-7442-45c2-a34c-bb08b95af7a6", - "coco.jpg", - directory="data", - ) + image_path = download_file( + "https://github.com/user-attachments/assets/1c66a05d-7442-45c2-a34c-bb08b95af7a6", + "coco.jpg", + directory="data", + ) img_coco = Image.open("./data/coco.jpg") @@ -278,6 +292,23 @@ loading on device using ``core.complie_model``. ov_text_model = ov.convert_model(model.text_model, example_input=text_inputs["input_ids"]) ov.save_model(ov_text_model, fp16_text_model_path) + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-bert-flash-implementation/b78d1595de294f13ffe7b19d6cd63892a6e4e7a4/mha.py:333: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + softmax_scale = self.softmax_scale or 1.0 / math.sqrt(q.shape[-1]) + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-bert-flash-implementation/b78d1595de294f13ffe7b19d6cd63892a6e4e7a4/mha.py:343: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seqlen > self.linear_biases.shape[-1]: + + .. code:: ipython3 fp16_vision_model_path = Path("jina-clip-vision_v1_fp16.xml") @@ -286,6 +317,13 @@ loading on device using ``core.complie_model``. ov_vision_model = ov.convert_model(model.vision_model, example_input=vision_inputs["pixel_values"]) ov.save_model(ov_vision_model, fp16_vision_model_path) + +.. parsed-literal:: + + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-clip-implementation/96e41b892fe647a3c45bf921352f147184024aef/eva_model.py:468: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert H == self.img_size[0] and W == self.img_size[1], ( + + Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -374,15 +412,11 @@ inference faster. The optimization process contains the following steps: .. code:: ipython3 - if not Path("skip_kernel_extension.py").exists(): - # Fetch `skip_kernel_extension` module - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) - - int8_text_model_path = Path("jina-clip-text_v1_int8.xml") - int8_vision_model_path = Path("jina-clip-vision_v1_int8.xml") + # Fetch `skip_kernel_extension` module + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) %load_ext skip_kernel_extension @@ -472,19 +506,18 @@ Dataset with text data import logging import nncf - if not int8_text_model_path.exists(): - dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=42) + dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=42) - dataloader_text = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_text, batch_size=1) - calibration_data_text = prepare_calibration_data_text(dataloader_text, 50) + dataloader_text = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_text, batch_size=1) + calibration_data_text = prepare_calibration_data_text(dataloader_text, 50) .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino Fetching 50 samples for the initialization... - + .. parsed-literal:: @@ -555,18 +588,17 @@ Dataset with image data %%skip not $to_quantize.value - if not int8_vision_model_path.exists(): - dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=42) + dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=42) - dataloader_vis = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_vision, batch_size=1) - calibration_data_vision = prepare_calibration_data_vis(dataloader_vis, 50) + dataloader_vis = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_vision, batch_size=1) + calibration_data_vision = prepare_calibration_data_vis(dataloader_vis, 50) .. parsed-literal:: Fetching 50 samples for the initialization... - + .. parsed-literal:: @@ -589,48 +621,108 @@ Quantization of text model +.. code:: ipython3 + + int8_text_model_path = "jina-clip-text_v1_int8.xml" + .. code:: ipython3 %%skip not $to_quantize.value - if not int8_text_model_path.exists(): - if len(calibration_data_text) == 0: - raise RuntimeError( - 'Calibration dataset is empty. Please check internet connection and try to download images manually.' - ) + if len(calibration_data_text) == 0: + raise RuntimeError( + 'Calibration dataset is empty. Please check internet connection and try to download images manually.' + ) - ov_model_text = core.read_model(fp16_text_model_path) + ov_model_text = core.read_model(fp16_text_model_path) - calibration_dataset = nncf.Dataset(calibration_data_text) - quantized_model = nncf.quantize( - model=ov_model_text, - calibration_dataset=calibration_dataset - ) - ov.save_model(quantized_model, int8_text_model_path) + calibration_dataset = nncf.Dataset(calibration_data_text) + quantized_model = nncf.quantize( + model=ov_model_text, + calibration_dataset=calibration_dataset + ) + ov.save_model(quantized_model, int8_text_model_path) + + + +.. parsed-literal:: + + Output() + + + + + + + + + + +.. parsed-literal:: + + Output() + + + + + + + + Quantization of image model ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code:: ipython3 + + int8_vision_model_path = "jina-clip-vision_v1_int8.xml" + .. code:: ipython3 %%skip not $to_quantize.value - if not int8_vision_model_path.exists(): - if len(calibration_data_vision) == 0: - raise RuntimeError( - 'Calibration dataset is empty. Please check internet connection and try to download images manually.' - ) + if len(calibration_data_vision) == 0: + raise RuntimeError( + 'Calibration dataset is empty. Please check internet connection and try to download images manually.' + ) - ov_model_vision = core.read_model(fp16_vision_model_path) + ov_model_vision = core.read_model(fp16_vision_model_path) - calibration_dataset = nncf.Dataset(calibration_data_vision) - quantized_model = nncf.quantize( - model=ov_model_vision, - calibration_dataset=calibration_dataset - ) - ov.save_model(quantized_model, int8_vision_model_path) + calibration_dataset = nncf.Dataset(calibration_data_vision) + quantized_model = nncf.quantize( + model=ov_model_vision, + calibration_dataset=calibration_dataset + ) + ov.save_model(quantized_model, int8_vision_model_path) + + + +.. parsed-literal:: + + Output() + + + + + + + + + + +.. parsed-literal:: + + Output() + + + + + + + + .. code:: ipython3 @@ -647,7 +739,7 @@ Quantization of image model -.. image:: jina-clip-with-output_files/jina-clip-with-output_37_0.png +.. image:: jina-clip-with-output_files/jina-clip-with-output_39_0.png Compare File Size @@ -679,7 +771,7 @@ Compare File Size Text model: FP16 model size - 266.88 MB; INT8 model size - 136.98 MB; Model compression rate: 1.948 Vision model: FP16 model size - 163.83 MB; INT8 model size - 82.64 MB; Model compression rate: 1.983 - + Compare inference time of the FP16 IR and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -728,9 +820,9 @@ approximately estimate the speed up of the dynamic quantized models. .. parsed-literal:: - Performance speed up for text model: 1.610 - Performance speed up for vision model: 1.489 - + Performance speed up for text model: 1.978 + Performance speed up for vision model: 1.428 + Gradio demo ----------- @@ -814,9 +906,23 @@ example, ``cat,dog,bird``) demo = make_demo(image_text_fn=image_text_sim, text_text_fn=text_text_sim, image_image_fn=image_image_sim, model_choice_visible=model_choice_visible) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(share=True, debug=True) + demo.queue().launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png index c5043ea82df122..83744f48df88cc 100644 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d328ce0b22f2a80ed7640ac0a2b292df687aaf303427e56d954d30de439c0c56 +oid sha256:9b913407ebaac94ee389f4ecd1b166dfbbb2b9bfd12ceaff8df783460cbd5e64 size 427929 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png index c5043ea82df122..83744f48df88cc 100644 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d328ce0b22f2a80ed7640ac0a2b292df687aaf303427e56d954d30de439c0c56 +oid sha256:9b913407ebaac94ee389f4ecd1b166dfbbb2b9bfd12ceaff8df783460cbd5e64 size 427929 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png deleted file mode 100644 index 71eaff4146ac7d..00000000000000 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8a465646b9f698e200a3934841843024767c8d0a559d0a1267f76c5bcf9b87e -size 428007 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png new file mode 100644 index 00000000000000..dc44386559455c --- /dev/null +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3ee89570303f7037c893a1a6a2381569ec34fc5d9e29526f4ae1c94ead1f96 +size 428013 diff --git a/docs/notebooks/knowledge-graphs-conve-with-output.rst b/docs/notebooks/knowledge-graphs-conve-with-output.rst index 4d01d076afd676..de9115fd9ab4a8 100644 --- a/docs/notebooks/knowledge-graphs-conve-with-output.rst +++ b/docs/notebooks/knowledge-graphs-conve-with-output.rst @@ -196,19 +196,19 @@ Settings: Including path to the serialized model files and input data files .. parsed-literal:: - kg_training_entids.txt: 0%| | 0.00/3.79k [00:00`__ .. parsed-literal:: - 2024-12-10 02:10:00.149367: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:10:00.174583: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:44:54.753766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:44:54.788691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 01:44:55.309895: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -373,14 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:452: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:519: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:559: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -408,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:168: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -543,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:859: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:975: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1261: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1391,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.760 seconds - Optimized pipeline: 1.136 seconds - Performance speed-up: 2.430 + FP32 pipeline: 2.746 seconds + Optimized pipeline: 1.140 seconds + Performance speed-up: 2.409 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 8cbf8c6845558b..2310cb001b0c6b 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90eb5c813dbef6b48b4d6e6acca89940550e650f29648178615bc5b73cfbad07 -size 123201 +oid sha256:9ca596f09c0f6c0dafa4aca0fbe7974941301cfcbc6bcb3a8c4255774c347d0b +size 123320 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index 76747126a0b8a7..91289c35d7c60c 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c680f410cf278d774523ad5338a2a1c4a5fe705113306c7abbec065c2108968 -size 1150690 +oid sha256:56d06f7d654939feda627f67196b813de9b38a718acba9f5daed59a43314829f +size 1150807 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 3a29f664a441a1..d98f56141b1252 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39a74767a21f27ea1076d4d999630d18c019b8de712c05c75fca7ef1a7979199 -size 1148499 +oid sha256:0d7f8506e5f1bd369debee273b45c601d05901af4937d8cc976f985cd4a81fed +size 1149292 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 6586a554fa5fcc..b53344f52b7396 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18799247eb9a64ea7a8828cd7587fcc1b428cc2d5e300dcf64393ce9bd0e4bc9 -size 124329 +oid sha256:edd5a47baf47ae90532b47bc5ee05e8503b7d1deda59d956a354688ed949c8b5 +size 121605 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 0193662b0a661b..2edc9a038ff8c3 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea65e060c07381de785e4c03e02fadd599b89d605a00be7e62987cb582d00d97 -size 1150941 +oid sha256:aa184084b598dac717e99fe9677f1fe9dd4f6b85ec123c075d4109c75b134841 +size 1150675 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index e9c92052b26bae..21ecfe511f1b76 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,9 +101,10 @@ Imports .. parsed-literal:: - 2024-12-10 02:16:53.582571: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:16:53.608080: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:51:49.197259: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:51:49.231710: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 01:51:49.783615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -149,7 +150,7 @@ Perform the following: .. parsed-literal:: - MRPC.zip: 0%| | 0.00/387M [00:00`__. In this tutorial, we consider how to convert and run LCM using OpenVINO. An additional part demonstrates how to run quantization with -`NNCF `__ to speed up pipeline -and generate images using `OpenVINO -GenAI `__. +`NNCF `__ to speed up +pipeline. **Table of contents:** - `Prerequisites <#prerequisites>`__ +- `Prepare models for OpenVINO format + conversion <#prepare-models-for-openvino-format-conversion>`__ - `Convert models to OpenVINO format <#convert-models-to-openvino-format>`__ + + - `Text Encoder <#text-encoder>`__ + - `U-Net <#u-net>`__ + - `VAE <#vae>`__ + - `Prepare inference pipeline <#prepare-inference-pipeline>`__ - `Configure Inference Pipeline <#configure-inference-pipeline>`__ @@ -63,10 +69,9 @@ GenAI `__. - `Run quantization <#run-quantization>`__ - `Compare inference time of the FP16 and INT8 models <#compare-inference-time-of-the-fp16-and-int8-models>`__ - - `Compare UNet file size <#compare-unet-file-size>`__ -- `Run Text to image generation using OpenVINO - GenAI <#run-text-to-image-generation-using-openvino-genai>`__ + - `Compare UNet file size <#compare-unet-file-size>`__ + - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -87,27 +92,10 @@ Prerequisites .. code:: ipython3 %pip install -q "torch>=2.1" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "transformers>=4.45" tqdm accelerate "diffusers>=0.30.1" pillow "gradio>=4.19" "nncf>=2.12.0" "datasets>=2.14.6" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - %pip install -qU --pre "openvino>=2024.4.0" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q "openvino>=2024.3.0" transformers "diffusers>=0.30.1" pillow "gradio>=4.19" "nncf>=2.12.0" "datasets>=2.14.6" --extra-index-url https://download.pytorch.org/whl/cpu -.. code:: ipython3 - - from pathlib import Path - import requests - - utility_files = [Path("notebook_utils.py"), Path("skip_kernel_extension.py"), Path("cmd_helper.py")] - - base_utils_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" - - for utility_file in utility_files: - if not utility_file.exists(): - r = requests.get(base_utils_url + utility_file.name) - with utility_file.open("w") as f: - f.write(r.text) - -Convert models to OpenVINO format ---------------------------------- +Prepare models for OpenVINO format conversion +--------------------------------------------- @@ -129,48 +117,316 @@ and distilled using LCD. The distillation approach efficiently converts a pre-trained guided diffusion model into a latent consistency model by solving an augmented PF-ODE. -For simplifying model export we will utilize Optimum Intel library. -`Optimum Intel `__ is -the interface between the -`Transformers `__ and -`Diffusers `__ libraries -and OpenVINO to accelerate end-to-end pipelines on Intel architectures. -It provides ease-to-use -`interface `__ -for exporting models to `OpenVINO Intermediate Representation -(IR) `__ -format. - -The command bellow demonstrates basic command for model export with -``optimum-cli`` - -.. code:: bash - - optimum-cli export openvino --model --task - -where ``--model`` argument is model id from HuggingFace Hub or local -directory with model (saved using ``.save_pretrained`` method), -``--task`` is one of `supported -task `__ -that exported model should solve. For image generation it will be -``text-to-image``. If model initialization requires to use remote code, -``--trust-remote-code`` flag additionally should be passed. You can also -apply fp16, 8-bit or 4-bit weight compression on the Linear, -Convolutional and Embedding layers when exporting your model with the -CLI by setting ``--weight-format`` to respectively fp16, int8 or int4. -This type of optimization allows to reduce the memory footprint and -inference latency. We will quantize our model later using nncf, so in -this step we will use fp16 as base model export precision. +For starting work with LCM, we should instantiate generation pipeline +first. ``DiffusionPipeline.from_pretrained`` method download all +pipeline components for LCM and configure them. This model uses custom +inference pipeline stored as part of model repository, we also should +provide which module should be loaded for initialization using +``custom_pipeline`` argument and revision for it. .. code:: ipython3 - from cmd_helper import optimum_cli + import gc + import warnings + from pathlib import Path + from diffusers import DiffusionPipeline + import numpy as np + + + warnings.filterwarnings("ignore") + + TEXT_ENCODER_OV_PATH = Path("model/text_encoder.xml") + UNET_OV_PATH = Path("model/unet.xml") + VAE_DECODER_OV_PATH = Path("model/vae_decoder.xml") - model_id = "SimianLuo/LCM_Dreamshaper_v7" - model_path = Path(model_id.split("/")[-1] + "_ov") - if not model_path.exists(): - optimum_cli(model_id, model_path, additional_args={"weight-format": "fp16"}) + def load_orginal_pytorch_pipeline_componets(skip_models=False, skip_safety_checker=False): + pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") + scheduler = pipe.scheduler + tokenizer = pipe.tokenizer + feature_extractor = pipe.feature_extractor if not skip_safety_checker else None + safety_checker = pipe.safety_checker if not skip_safety_checker else None + text_encoder, unet, vae = None, None, None + if not skip_models: + text_encoder = pipe.text_encoder + text_encoder.eval() + unet = pipe.unet + unet.eval() + vae = pipe.vae + vae.eval() + del pipe + gc.collect() + return ( + scheduler, + tokenizer, + feature_extractor, + safety_checker, + text_encoder, + unet, + vae, + ) + +.. code:: ipython3 + + skip_conversion = TEXT_ENCODER_OV_PATH.exists() and UNET_OV_PATH.exists() and VAE_DECODER_OV_PATH.exists() + + ( + scheduler, + tokenizer, + feature_extractor, + safety_checker, + text_encoder, + unet, + vae, + ) = load_orginal_pytorch_pipeline_componets(skip_conversion) + + + +.. parsed-literal:: + + Fetching 15 files: 0%| | 0/15 [00:00`__ is crucial for + synthesizing high-quality text-aligned images in Stable Diffusion, + because it controls how similar the generated image will be to the + prompt. In Latent Consistency Models, CFG serves as augmentation + parameter for PF-ODE. + +Model predicts the ``sample`` state for the next step. + +.. code:: ipython3 + + def convert_unet(unet: torch.nn.Module, ir_path: Path): + """ + Convert U-net model to IR format. + Function accepts unet model, prepares example inputs for conversion, + Parameters: + unet (StableDiffusionPipeline): unet from Stable Diffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + # prepare inputs + dummy_inputs = { + "sample": torch.randn((1, 4, 64, 64)), + "timestep": torch.ones([1]).to(torch.float32), + "encoder_hidden_states": torch.randn((1, 77, 768)), + "timestep_cond": torch.randn((1, 256)), + } + unet.eval() + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=dummy_inputs) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect() + print(f"Unet successfully converted to IR and saved to {ir_path}") + + + if not UNET_OV_PATH.exists(): + convert_unet(unet, UNET_OV_PATH) + else: + print(f"Unet will be loaded from {UNET_OV_PATH}") + del unet + gc.collect(); + +VAE +~~~ + + + +The VAE model has two parts, an encoder and a decoder. The encoder is +used to convert the image into a low dimensional latent representation, +which will serve as the input to the U-Net model. The decoder, +conversely, transforms the latent representation back into an image. + +During latent diffusion training, the encoder is used to get the latent +representations (latents) of the images for the forward diffusion +process, which applies more and more noise at each step. During +inference, the denoised latents generated by the reverse diffusion +process are converted back into images using the VAE decoder. When you +run inference for text-to-image, there is no initial image as a starting +point. You can skip this step and directly generate initial random +noise. + +In our inference pipeline, we will not use VAE encoder part and skip its +conversion for reducing memory consumption. The process of conversion +VAE encoder, can be found in Stable Diffusion notebook. + +.. code:: ipython3 + + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): + """ + Convert VAE model for decoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE model frm StableDiffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + + class VAEDecoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, latents): + return self.vae.decode(latents) + + vae_decoder = VAEDecoderWrapper(vae) + latents = torch.zeros((1, 4, 64, 64)) + + vae_decoder.eval() + with torch.no_grad(): + ov_model = ov.convert_model(vae_decoder, example_input=latents) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f"VAE decoder successfully converted to IR and saved to {ir_path}") + + + if not VAE_DECODER_OV_PATH.exists(): + convert_vae_decoder(vae, VAE_DECODER_OV_PATH) + else: + print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") + + del vae + gc.collect(); Prepare inference pipeline -------------------------- @@ -205,27 +461,237 @@ number of steps required ~2-8) to step-by-step retrieve better latent image representations. When complete, the latent image representation is decoded by the decoder part of the variational auto encoder. -For starting work with LCM, we should instantiate the generation -pipeline first. ``DiffusionPipeline.from_pretrained`` method downloads -all pipeline components (if required) for LCM and configure them. -Loading LCM for OpenVINO inference using Optimum Intel looks similar, we -only should replace ``DiffusionPipeline`` with ``OVDiffusionPpeline``. -This model class accepts model id from HuggingFace Hub or local -directory for original PyTorch pipeline or already converted. In case, -if path to original pipeline provided, it will be automatically -converted to OpenVINO format, but as we already converted model before -using Optimum CLI, we will use models from the previous step. +.. code:: ipython3 + + from typing import Union, Optional, Any, List, Dict + from transformers import CLIPTokenizer, CLIPImageProcessor + from diffusers.pipelines.stable_diffusion.safety_checker import ( + StableDiffusionSafetyChecker, + ) + from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput + from diffusers.image_processor import VaeImageProcessor + + + class OVLatentConsistencyModelPipeline(DiffusionPipeline): + def __init__( + self, + vae_decoder: ov.Model, + text_encoder: ov.Model, + tokenizer: CLIPTokenizer, + unet: ov.Model, + scheduler: None, + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPImageProcessor, + requires_safety_checker: bool = True, + ): + super().__init__() + self.vae_decoder = vae_decoder + self.text_encoder = text_encoder + self.tokenizer = tokenizer + self.register_to_config(unet=unet) + self.scheduler = scheduler + self.safety_checker = safety_checker + self.feature_extractor = feature_extractor + self.vae_scale_factor = 2**3 + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + + def _encode_prompt( + self, + prompt, + num_images_per_prompt, + prompt_embeds: None, + ): + r""" + Encodes the prompt into text encoder hidden states. + Args: + prompt (`str` or `List[str]`, *optional*): + prompt to be encoded + num_images_per_prompt (`int`): + number of images that should be generated per prompt + prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not + provided, text embeddings will be generated from `prompt` input argument. + """ + + if prompt_embeds is None: + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + prompt_embeds = self.text_encoder(text_input_ids, share_inputs=True, share_outputs=True) + prompt_embeds = torch.from_numpy(prompt_embeds[0]) + + bs_embed, seq_len, _ = prompt_embeds.shape + # duplicate text embeddings for each generation per prompt + prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) + prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # Don't need to get uncond prompt embedding because of LCM Guided Distillation + return prompt_embeds + + def run_safety_checker(self, image, dtype): + if self.safety_checker is None: + has_nsfw_concept = None + else: + if torch.is_tensor(image): + feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") + else: + feature_extractor_input = self.image_processor.numpy_to_pil(image) + safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt") + image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_checker_input.pixel_values.to(dtype)) + return image, has_nsfw_concept + + def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, latents=None): + shape = ( + batch_size, + num_channels_latents, + height // self.vae_scale_factor, + width // self.vae_scale_factor, + ) + if latents is None: + latents = torch.randn(shape, dtype=dtype) + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + + def get_w_embedding(self, w, embedding_dim=512, dtype=torch.float32): + """ + see https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 + Args: + timesteps: torch.Tensor: generate embedding vectors at these timesteps + embedding_dim: int: dimension of the embeddings to generate + dtype: data type of the generated embeddings + Returns: + embedding vectors with shape `(len(timesteps), embedding_dim)` + """ + assert len(w.shape) == 1 + w = w * 1000.0 + + half_dim = embedding_dim // 2 + emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb) + emb = w.to(dtype)[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0, 1)) + assert emb.shape == (w.shape[0], embedding_dim) + return emb + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]] = None, + height: Optional[int] = 512, + width: Optional[int] = 512, + guidance_scale: float = 7.5, + num_images_per_prompt: Optional[int] = 1, + latents: Optional[torch.FloatTensor] = None, + num_inference_steps: int = 4, + lcm_origin_steps: int = 50, + prompt_embeds: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + ): + # 1. Define call parameters + if prompt is not None and isinstance(prompt, str): + batch_size = 1 + elif prompt is not None and isinstance(prompt, list): + batch_size = len(prompt) + else: + batch_size = prompt_embeds.shape[0] + + # do_classifier_free_guidance = guidance_scale > 0.0 + # In LCM Implementation: cfg_noise = noise_cond + cfg_scale * (noise_cond - noise_uncond) , (cfg_scale > 0.0 using CFG) + + # 2. Encode input prompt + prompt_embeds = self._encode_prompt( + prompt, + num_images_per_prompt, + prompt_embeds=prompt_embeds, + ) + + # 3. Prepare timesteps + self.scheduler.set_timesteps(num_inference_steps, original_inference_steps=lcm_origin_steps) + timesteps = self.scheduler.timesteps + + # 4. Prepare latent variable + num_channels_latents = 4 + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + prompt_embeds.dtype, + latents, + ) + + bs = batch_size * num_images_per_prompt + + # 5. Get Guidance Scale Embedding + w = torch.tensor(guidance_scale).repeat(bs) + w_embedding = self.get_w_embedding(w, embedding_dim=256) + + # 6. LCM MultiStep Sampling Loop: + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + ts = torch.full((bs,), t, dtype=torch.long) + + # model prediction (v-prediction, eps, x) + model_pred = self.unet( + [latents, ts, prompt_embeds, w_embedding], + share_inputs=True, + share_outputs=True, + )[0] + + # compute the previous noisy sample x_t -> x_t-1 + latents, denoised = self.scheduler.step(torch.from_numpy(model_pred), t, latents, return_dict=False) + progress_bar.update() + + if not output_type == "latent": + image = torch.from_numpy(self.vae_decoder(denoised / 0.18215, share_inputs=True, share_outputs=True)[0]) + image, has_nsfw_concept = self.run_safety_checker(image, prompt_embeds.dtype) + else: + image = denoised + has_nsfw_concept = None + + if has_nsfw_concept is None: + do_denormalize = [True] * image.shape[0] + else: + do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] + + image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) Configure Inference Pipeline ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Optionally, we can setup which device will be used for running -inference. Select desired inference device from dropdown list bellow. +First, you should create instances of OpenVINO Model and compile it +using selected device. Select device from dropdown list for running +inference using OpenVINO. .. code:: ipython3 + core = ov.Core() + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import device_widget device = device_widget() @@ -237,27 +703,18 @@ inference. Select desired inference device from dropdown list bellow. .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from optimum.intel.openvino import OVDiffusionPipeline + text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) + unet_model = core.compile_model(UNET_OV_PATH, device.value) - ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) - - -.. parsed-literal:: - - 2024-11-14 12:52:11.556586: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-14 12:52:11.570192: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1731574331.585339 2056327 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1731574331.589784 2056327 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-11-14 12:52:11.606540: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) Model tokenizer and scheduler are also important parts of the pipeline. This pipeline is also can use Safety Checker, the filter for detecting @@ -267,6 +724,18 @@ embeddings using CLIP model, so additionally feature extractor component should be added in the pipeline. We reuse tokenizer, feature extractor, scheduler and safety checker from original LCM pipeline. +.. code:: ipython3 + + ov_pipe = OVLatentConsistencyModelPipeline( + tokenizer=tokenizer, + text_encoder=text_enc, + unet=unet_model, + vae_decoder=vae_decoder, + scheduler=scheduler, + feature_extractor=feature_extractor, + safety_checker=safety_checker, + ) + Text-to-image generation ------------------------ @@ -276,13 +745,18 @@ Now, let’s see model in action .. code:: ipython3 - import torch - prompt = "a beautiful pink unicorn, 8k" num_inference_steps = 4 + torch.manual_seed(1234567) images = ov_pipe( - prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, height=512, width=512, generator=torch.Generator().manual_seed(1234567) + prompt=prompt, + num_inference_steps=num_inference_steps, + guidance_scale=8.0, + lcm_origin_steps=50, + output_type="pil", + height=512, + width=512, ).images @@ -299,19 +773,12 @@ Now, let’s see model in action -.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png +.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png Nice. As you can see, the picture has quite a high definition 🔥. -.. code:: ipython3 - - import gc - - del ov_pipe - gc.collect(); - Quantization ------------ @@ -347,7 +814,6 @@ improve model inference speed. skip_for_device = "GPU" in device.value to_quantize = quantization_widget(not skip_for_device) - int8_model_path = model_path.parent / (model_path.name + "_int8") to_quantize @@ -360,13 +826,22 @@ improve model inference speed. +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + .. code:: ipython3 + int8_pipe = None + + # Fetch `skip_kernel_extension` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) %load_ext skip_kernel_extension -Let’s load ``skip magic`` extension to skip quantization if -``to_quantize`` is not selected - Prepare calibration dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -385,8 +860,6 @@ model inputs for calibration we should customize ``CompiledModel``. from tqdm.notebook import tqdm from transformers import set_seed from typing import Any, Dict, List - import openvino as ov - import numpy as np set_seed(1) @@ -401,9 +874,9 @@ model inputs for calibration we should customize ``CompiledModel``. self.data_cache.append(*args) return super().__call__(*args, **kwargs) - def collect_calibration_data(lcm_pipeline, subset_size: int) -> List[Dict]: - original_unet = lcm_pipeline.unet.request - lcm_pipeline.unet.request = CompiledModelDecorator(original_unet, prob=0.3) + def collect_calibration_data(lcm_pipeline: OVLatentConsistencyModelPipeline, subset_size: int) -> List[Dict]: + original_unet = lcm_pipeline.unet + lcm_pipeline.unet = CompiledModelDecorator(original_unet, prob=0.3) dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) lcm_pipeline.set_progress_bar_config(disable=True) @@ -415,25 +888,27 @@ model inputs for calibration we should customize ``CompiledModel``. diff = 0 for batch in dataset: prompt = batch["caption"] - if len(prompt) > lcm_pipeline.tokenizer.model_max_length: + if len(prompt) > tokenizer.model_max_length: continue _ = lcm_pipeline( prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, + lcm_origin_steps=50, + output_type="pil", height=512, width=512, ) - collected_subset_size = len(lcm_pipeline.unet.request.data_cache) + collected_subset_size = len(lcm_pipeline.unet.data_cache) if collected_subset_size >= subset_size: pbar.update(subset_size - pbar.n) break pbar.update(collected_subset_size - diff) diff = collected_subset_size - calibration_dataset = lcm_pipeline.unet.request.data_cache + calibration_dataset = lcm_pipeline.unet.data_cache lcm_pipeline.set_progress_bar_config(disable=False) - lcm_pipeline.unet.request = original_unet + lcm_pipeline.unet = original_unet lcm_pipeline.safety_checker = safety_checker return calibration_dataset @@ -445,12 +920,10 @@ model inputs for calibration we should customize ``CompiledModel``. logging.basicConfig(level=logging.WARNING) logger = logging.getLogger(__name__) - if not int8_model_path.exists(): + UNET_INT8_OV_PATH = Path("model/unet_int8.xml") + if not UNET_INT8_OV_PATH.exists(): subset_size = 200 - ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) unet_calibration_data = collect_calibration_data(ov_pipe, subset_size=subset_size) - del ov_pipe - gc.collect(); @@ -475,11 +948,12 @@ Create a quantized model from the pre-trained converted OpenVINO model. import nncf from nncf.scopes import IgnoredScope - import shutil - core = ov.Core() - if not int8_model_path.exists(): - unet = core.read_model(model_path / "unet/openvino_model.xml") + if UNET_INT8_OV_PATH.exists(): + print("Loading quantized model") + quantized_unet = core.read_model(UNET_INT8_OV_PATH) + else: + unet = core.read_model(UNET_OV_PATH) quantized_unet = nncf.quantize( model=unet, subset_size=subset_size, @@ -489,19 +963,12 @@ Create a quantized model from the pre-trained converted OpenVINO model. disable_bias_correction=True ) ) - ov.save_model(quantized_unet, int8_model_path / "unet/openvino_model.xml") - del quantized_unet - del unet - gc.collect() - for filename in model_path.rglob("*"): - if filename.is_dir(): - continue - relative_file_name = filename.relative_to(model_path) - if (int8_model_path / relative_file_name).exists(): - continue - dst_path = int8_model_path / relative_file_name - dst_path.parent.mkdir(exist_ok=True, parents=True) - shutil.copy(filename, dst_path) + ov.save_model(quantized_unet, UNET_INT8_OV_PATH) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino @@ -518,6 +985,14 @@ Create a quantized model from the pre-trained converted OpenVINO model. + + + + + + + + .. parsed-literal:: Output() @@ -531,6 +1006,19 @@ Create a quantized model from the pre-trained converted OpenVINO model. + + + + + + + +.. parsed-literal:: + + INFO:nncf:122 ignored nodes were found by name in the NNCFGraph + + + .. parsed-literal:: Output() @@ -543,11 +1031,29 @@ Create a quantized model from the pre-trained converted OpenVINO model. + + + + + + + + .. code:: ipython3 %%skip not $to_quantize.value - int8_pipe = OVDiffusionPipeline.from_pretrained(int8_model_path, device=device.value) + unet_optimized = core.compile_model(UNET_INT8_OV_PATH, device.value) + + int8_pipe = OVLatentConsistencyModelPipeline( + tokenizer=tokenizer, + text_encoder=text_enc, + unet=unet_optimized, + vae_decoder=vae_decoder, + scheduler=scheduler, + feature_extractor=feature_extractor, + safety_checker=safety_checker, + ) Let us check predictions with the quantized UNet using the same input data. @@ -560,14 +1066,16 @@ data. prompt = "a beautiful pink unicorn, 8k" num_inference_steps = 4 + torch.manual_seed(1234567) images = int8_pipe( prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, + lcm_origin_steps=50, + output_type="pil", height=512, width=512, - generator=torch.Generator().manual_seed(1234567) ).images display(images[0]) @@ -580,7 +1088,7 @@ data. -.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png +.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png Compare inference time of the FP16 and INT8 models @@ -619,6 +1127,8 @@ pipelines, we use median inference time on calibration subset. prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, + lcm_origin_steps=50, + output_type="pil", height=512, width=512, ) @@ -633,154 +1143,38 @@ pipelines, we use median inference time on calibration subset. %%skip not $to_quantize.value - int8_latency = calculate_inference_time(int8_pipe, validation_data) - del int8_pipe - gc.collect() - ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) fp_latency = calculate_inference_time(ov_pipe, validation_data) + int8_latency = calculate_inference_time(int8_pipe, validation_data) print(f"Performance speed up: {fp_latency / int8_latency:.3f}") - - del ov_pipe - gc.collect(); .. parsed-literal:: - Performance speed up: 1.357 + Performance speed up: 1.319 Compare UNet file size -~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 - UNET_OV_PATH = model_path / "unet/openvino_model.xml" - UNET_INT8_OV_PATH = int8_model_path / "unet/openvino_model.xml" - - if UNET_INT8_OV_PATH.exists(): - fp16_ir_model_size = UNET_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - - print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") - print(f"INT8 model size: {quantized_model_size:.2f} KB") - print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") - - -.. parsed-literal:: - - FP16 model size: 1678912.69 KB - INT8 model size: 841591.46 KB - Model compression rate: 1.995 - - -Run Text to image generation using OpenVINO GenAI -------------------------------------------------- - - - -`OpenVINO™ GenAI `__ -is a library of the most popular Generative AI model pipelines, -optimized execution methods, and samples that run on top of highly -performant `OpenVINO -Runtime `__. - -|image0| - -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality. - -``openvino_genai.Text2ImagePipeline`` class supports inference of -`Diffusers -models `__. -For pipeline initialization, we should provide directory with converted -by Optimum Intel pipeline and specify inference device. Optionally, we -can provide configuration for LoRA Adapters using ``adapter_config``. -For starting generation process ``generate`` method should be used. -Basically, it required to provide input text prompt for image -generation. You can provide additional arguments like negative prompt, -number of steps, guidance scale, image width and height to control -generation process. - -.. |image0| image:: https://media.githubusercontent.com/media/openvinotoolkit/openvino.genai/refs/heads/master/src/docs/openvino_genai.svg - -.. code:: ipython3 - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - import ipywidgets as widgets + %%skip not $to_quantize.value - int8_can_be_used = int8_model_path.exists() and "GPU" not in device.value - use_quantized_model = widgets.Checkbox(value=int8_can_be_used, description="Use INT8 model", disabled=not int8_can_be_used) + fp16_ir_model_size = UNET_OV_PATH.with_suffix(".bin").stat().st_size / 1024 + quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - use_quantized_model - - + print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") + print(f"INT8 model size: {quantized_model_size:.2f} KB") + print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") .. parsed-literal:: - Checkbox(value=True, description='Use INT8 model') - - - -.. code:: ipython3 - - import openvino_genai as ov_genai - - used_model_path = model_path if not use_quantized_model.value else int8_model_path - - pipe = ov_genai.Text2ImagePipeline(used_model_path, device.value) - -.. code:: ipython3 - - from PIL import Image - import torch - import openvino as ov - - - class Generator(ov_genai.Generator): - def __init__(self, seed): - ov_genai.Generator.__init__(self) - self.generator = torch.Generator(device="cpu").manual_seed(seed) - - def next(self): - return torch.randn(1, generator=self.generator, dtype=torch.float32).item() - - def randn_tensor(self, shape: ov.Shape): - torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32) - return ov.Tensor(torch_tensor.numpy()) - - - prompt = "a beautiful pink unicorn, 8k" - num_inference_steps = 4 - - random_generator = Generator(1234567) - - image_tensor = pipe.generate(prompt, width=512, height=512, num_inference_steps=4, num_images_per_prompt=1, generator=random_generator) - - image = Image.fromarray(image_tensor.data[0]) - - image - - - - -.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png - + FP16 model size: 1678912.37 KB + INT8 model size: 840792.93 KB + Model compression rate: 1.997 Interactive demo @@ -792,7 +1186,7 @@ Interactive demo import random import gradio as gr - import numpy as np + from functools import partial MAX_SEED = np.iinfo(np.int32).max @@ -804,6 +1198,7 @@ Interactive demo def generate( + pipeline: OVLatentConsistencyModelPipeline, prompt: str, seed: int = 0, width: int = 512, @@ -811,15 +1206,28 @@ Interactive demo guidance_scale: float = 8.0, num_inference_steps: int = 4, randomize_seed: bool = False, + num_images: int = 1, progress=gr.Progress(track_tqdm=True), ): seed = randomize_seed_fn(seed, randomize_seed) - random_generator = Generator(seed) - result = pipe.generate( - prompt, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=random_generator - ) - result = Image.fromarray(result.data[0]) + torch.manual_seed(seed) + result = pipeline( + prompt=prompt, + width=width, + height=height, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + num_images_per_prompt=num_images, + lcm_origin_steps=50, + output_type="pil", + ).images[0] return result, seed + + + generate_original = partial(generate, ov_pipe) + generate_optimized = partial(generate, int8_pipe) + quantized_model_present = int8_pipe is not None + generate = generate_optimized if quantized_model_present else generate_original .. code:: ipython3 @@ -831,7 +1239,7 @@ Interactive demo from gradio_helper import make_demo_lcm - demo = make_demo_lcm(fn=generate) + demo = make_demo_lcm(fn=generate, quantized=quantized_model_present) try: demo.queue().launch(debug=False) @@ -840,3 +1248,8 @@ Interactive demo # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + +.. code:: ipython3 + + # please uncomment and run this cell for stopping gradio interface + # demo.close() diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg deleted file mode 100644 index 1ea60cbbf8d222..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:affe930458b7c4c643d79b905269590fc084ca969ee5f0545b8bba525006fa8a -size 19295 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png deleted file mode 100644 index 5955c1e4362d9f..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ef1cbdb29f5fea43c3624c52f20799e4677fc0f52f6451bbe24bf0cf11a8463 -size 389641 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg new file mode 100644 index 00000000000000..c6b4e28670b6d5 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124ac28d484e3f73c150deb379374cec294b47803cd2d8914461dc8ea215afd0 +size 25960 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png new file mode 100644 index 00000000000000..08ecde8427d295 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd41bc286b8dfb86e049235d232d30fd7a61ea4febfb1e4ccc340367a84ebb0 +size 412225 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg deleted file mode 100644 index 6408a5658cf117..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e8925bd54982f37545c019dbe0594bd794045ee40e5627f0121b221b44471c62 -size 19352 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png deleted file mode 100644 index 7b0ec07f79f970..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c7034ea0158e17cbd009e742938fe42fd1e0fb0011d0d2512524d6fab00889e -size 392614 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg new file mode 100644 index 00000000000000..08bc3ddf0e0710 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bce5a9ae0251f165e2becde51d5343c55a99c3234f327c9951f8a0279514a2e +size 22266 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png new file mode 100644 index 00000000000000..75211e26b3b388 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89231776665c02abb82840d447f7804d7aca7118ec11d1296e7e1f738fd11e63 +size 392583 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg deleted file mode 100644 index 4710b7e9307c1b..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b3bf64cb2d0dc5daa9387092f9c09eea26af451b5a6e0e7c5750d22a5fb66b1 -size 21932 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png deleted file mode 100644 index 7667008b2d5aa5..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:307294292b8bf501d51fae0bc667d06907d8d5b2adf9ed139467b766eccac901 -size 401843 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst new file mode 100644 index 00000000000000..a0bce9d85c7196 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -0,0 +1,252 @@ +Latent Consistency Model using Optimum-Intel OpenVINO +===================================================== + +This notebook provides instructions how to run Latent Consistency Model +(LCM). It allows to setup standard Hugging Face diffusers pipeline and +Optimum Intel pipeline optimized for Intel hardware including CPU and +GPU. Running inference on CPU and GPU it is easy to compare performance +and time required to generate an image for provided prompt. The notebook +can be also used on other Intel hardware with minimal or no +modifications. + +.. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 + +Optimum Intel is an interface from Hugging Face between both diffusers +and transformers libraries and various tools provided by Intel to +accelerate pipelines on Intel hardware. It allows to perform +quantization of the models hosted on Hugging Face. In this notebook +OpenVINO is used for AI-inference acceleration as a backend for Optimum +Intel! + +For more details please refer to Optimum Intel repository +https://github.com/huggingface/optimum-intel + +LCMs are the next generation of generative models after Latent Diffusion +Models (LDMs). They are proposed to overcome the slow iterative sampling +process of Latent Diffusion Models (LDMs), enabling fast inference with +minimal steps (from 2 to 4) on any pre-trained LDMs (e.g. Stable +Diffusion). To read more about LCM please refer to +https://latent-consistency-models.github.io/ + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Full precision model on the + CPU <#using-full-precision-model-in-cpu-with-latentconsistencymodelpipeline>`__ +- `Running inference using Optimum Intel + OVLatentConsistencyModelPipeline <#running-inference-using-optimum-intel-ovlatentconsistencymodelpipeline>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +~~~~~~~~~~~~~ + + + +Install required packages + +.. code:: ipython3 + + %pip install -q "openvino>=2023.3.0" + %pip install -q "onnx>=1.11.0,<1.16.2" + %pip install -q "optimum-intel[diffusers]@git+https://github.com/huggingface/optimum-intel.git" "ipywidgets" "torch>=2.1" "transformers>=4.33.0" --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import warnings + + warnings.filterwarnings("ignore") + +Showing Info Available Devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +The ``available_devices`` property shows the available devices in your +system. The “FULL_DEVICE_NAME” option to ``ie.get_property()`` shows the +name of the device. Check what is the ID name for the discrete GPU, if +you have integrated GPU (iGPU) and discrete GPU (dGPU), it will show +``device_name="GPU.0"`` for iGPU and ``device_name="GPU.1"`` for dGPU. +If you just have either an iGPU or dGPU that will be assigned to +``"GPU"`` + +Note: For more details about GPU with OpenVINO visit this +`link `__. +If you have been facing any issue in Ubuntu 20.04 or Windows 11 read +this +`blog `__. + +.. code:: ipython3 + + import openvino as ov + import openvino.properties as props + + + core = ov.Core() + devices = core.available_devices + + for device in devices: + device_name = core.get_property(device, props.device.full_name) + print(f"{device}: {device_name}") + + +.. parsed-literal:: + + CPU: Intel(R) Core(TM) Ultra 7 155H + GNA.GNA_SW: GNA_SW + GNA.GNA_HW: GNA_HW + GPU: Intel(R) Arc(TM) Graphics (iGPU) + NPU: Intel(R) AI Boost + + +Using full precision model in CPU with ``LatentConsistencyModelPipeline`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Standard pipeline for the Latent Consistency Model(LCM) from Diffusers +library is used here. For more information please refer to +https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models + +.. code:: ipython3 + + from diffusers import LatentConsistencyModelPipeline + import gc + + pipeline = LatentConsistencyModelPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/7 [00:00=2.3.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.14.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" - %pip install -q -U "openvino-tokenizers>=2024.5.0" "openvino>=2024.5.0" "openvino-genai>=2024.5.0"| + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai utility_files = ["notebook_utils.py", "cmd_helper.py"] @@ -134,8 +134,8 @@ Install required dependencies r = requests.get( url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", ) - with local_path.open("w") as f: - f.write(r.text) + with local_path.open("w") as f: + f.write(r.text) Convert and Optimize Model -------------------------- @@ -329,7 +329,7 @@ conversation about provided images content. .. code:: ipython3 - import openvino_genai as ov_genai + from openvino_genai import VLMPipeline, GenerationConfig Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -397,7 +397,7 @@ and inference device. .. code:: ipython3 - ov_model = ov_genai.VLMPipeline(str(model_base_path / model_variant.value), device=device.value) + ov_model = VLMPipeline(str(model_base_path / model_variant.value), device=device.value) Run model inference ------------------- @@ -435,7 +435,7 @@ one of the most critical aspects of a smooth experience. from io import BytesIO import numpy as np - config = ov_genai.GenerationConfig() + config = GenerationConfig() config.max_new_tokens = 100 @@ -445,7 +445,7 @@ one of the most critical aspects of a smooth experience. image = Image.open(BytesIO(response.content)).convert("RGB") else: image = Image.open(image_file).convert("RGB") - image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) + image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte) return image, ov.Tensor(image_data) diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst index b278013b8a258b..ae14876b33b633 100644 --- a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst @@ -121,9 +121,9 @@ Install required dependencies import requests %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.14.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" --index-url https://download.pytorch.org/whl/cpu - %pip install -q -U "openvino-tokenizers>=2024.5.0" "openvino>=2024.5.0" "openvino-genai>=2024.5.0" + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai utility_files = ["notebook_utils.py", "cmd_helper.py"] @@ -302,7 +302,7 @@ Prepare OpenVINO based inference pipeline OpenVINO integration with Optimum Intel provides ready-to-use API for model inference that can be used for smooth integration with -transformers-based solutions. For loading model, we will use +transformers-based solutions. For loading pixtral model, we will use ``OVModelForVisualCausalLM`` class that have compatible interface with Transformers LLaVA implementation. For loading a model, ``from_pretrained`` method should be used. It accepts path to the model diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst index 6696ee663a8a30..3e26205ee0272b 100644 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst @@ -20,7 +20,7 @@ model for creating multimodal chatbot, but the similar actions are also applicable to other models of LLaVA family compatible with HuggingFace transformers implementation. Additionally, we demonstrate how to apply stateful transformation on LLM part and model optimization techniques -like weights compression using +like weights compression and quantization using `NNCF `__ @@ -28,18 +28,28 @@ like weights compression using - `Prerequisites <#prerequisites>`__ -- `Convert model to OpenVINO IR format using Optimum - CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ +- `Download PyTorch model <#download-pytorch-model>`__ +- `Convert model to OpenVINO Intermediate + Representation <#convert-model-to-openvino-intermediate-representation>`__ + + - `Image Encoder <#image-encoder>`__ + - `Text Embedding <#text-embedding>`__ + - `Language Model <#language-model>`__ + - `Compress Language Model Weights to 4 bits <#compress-language-model-weights-to-4-bits>`__ +- `Quantize Image Encoder to 8 + bits <#quantize-image-encoder-to-8-bits>`__ + + - `Prepare datasets <#prepare-datasets>`__ + - `Perform quantization <#perform-quantization>`__ + - `Prepare model inference pipeline <#prepare-model-inference-pipeline>`__ +- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Select device <#select-device>`__ - - `Select model variant <#select-model-variant>`__ - - `Load OpenVINO Model <#load-openvino-model>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -59,9 +69,7 @@ Prerequisites .. code:: ipython3 - %pip install -q "nncf>=2.14.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5" - %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. code:: ipython3 @@ -69,63 +77,435 @@ Prerequisites import requests - utility_files = ["notebook_utils.py", "cmd_helper.py"] + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) - for utility in utility_files: - local_path = Path(utility) - if not local_path.exists(): - r = requests.get( - url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", - ) - with local_path.open("w") as f: - f.write(r.text) + MODEL_DIR = Path("model") + IMAGE_ENCODER_PATH = MODEL_DIR / "image_encoder.xml" + INPUT_EMBEDDING_PATH = MODEL_DIR / "input_embeddings.xml" + LANGUAGE_MODEL_PATH = MODEL_DIR / "language_model.xml" + + requires_pt_model_loading = not all([p.exists() for p in [IMAGE_ENCODER_PATH, INPUT_EMBEDDING_PATH, LANGUAGE_MODEL_PATH]]) + +Download PyTorch model +---------------------- + + + +.. code:: ipython3 + + from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration + import torch + import gc + + processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") + image_encoder_model, input_embedding_model, language_model = None, None, None + + + class ImageEncoder(torch.nn.Module): + def __init__(self, config, vision_tower, multi_modal_projector): + super().__init__() + self.config = config + self.vision_tower = vision_tower + self.multi_modal_projector = multi_modal_projector - model_id = "llava-hf/llava-v1.6-mistral-7b-hf" - MODEL_DIR = Path(model_id.split("/")[-1].replace("-hf", "-ov")) + def forward(self, pixel_values): + batch_size, num_patches, num_channels, height, width = pixel_values.shape + reshaped_pixel_values = pixel_values.view(batch_size * num_patches, num_channels, height, width) + image_features = self.vision_tower(reshaped_pixel_values, output_hidden_states=True) + selected_image_feature = image_features.hidden_states[self.config.vision_feature_layer] + if self.config.vision_feature_select_strategy == "default": + selected_image_feature = selected_image_feature[:, 1:] + elif self.config.vision_feature_select_strategy == "full": + selected_image_feature = selected_image_feature + image_features = self.multi_modal_projector(selected_image_feature) + return image_features + + + if requires_pt_model_loading: + model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", low_cpu_mem_usage=True) + model.config.save_pretrained(MODEL_DIR) + image_encoder_model = ImageEncoder(model.config, model.vision_tower, model.multi_modal_projector) + input_embedding_model = input_embedding_model = model.get_input_embeddings() + language_model = model.language_model + del model + gc.collect() -Convert model to OpenVINO IR format using Optimum CLI +Convert model to OpenVINO Intermediate Representation ----------------------------------------------------- OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation (IR) format. For convenience, we will use OpenVINO -integration with HuggingFace Optimum. `Optimum -Intel `__ is the -interface between the Transformers and Diffusers libraries and the -different tools and libraries provided by Intel to accelerate end-to-end -pipelines on Intel architectures. - -Among other use cases, Optimum Intel provides a simple interface to -optimize your Transformers and Diffusers models, convert them to the -OpenVINO Intermediate Representation (IR) format and run inference using -OpenVINO Runtime. ``optimum-cli`` provides command line interface for -model conversion and optimization. - -General command format: - -.. code:: bash - - optimum-cli export openvino --model --task - -where task is task to export the model for, if not specified, the task -will be auto-inferred based on the model. You can find a mapping between -tasks and model classes in Optimum TaskManager -`documentation `__. -Additionally, you can specify weights compression using -``--weight-format`` argument with one of following options: ``fp32``, -``fp16``, ``int8`` and ``int4``. Fro int8 and int4 -`nncf `__ will be used for -weight compression. More details about model export provided in `Optimum -Intel -documentation `__. +Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +LLaVA-NeXT is autoregressive transformer generative model, it means that +each next model step depends from model output from previous step. The +generation approach is based on the assumption that the probability +distribution of a word sequence can be decomposed into the product of +conditional next word distributions. In other words, model predicts the +next token in the loop guided by previously generated tokens until the +stop-condition will be not reached (generated sequence of maximum length +or end of string token obtained). The way the next token will be +selected over predicted probabilities is driven by the selected decoding +methodology. You can find more information about the most popular +decoding methods in this +`blog `__. The entry point +for the generation process for models from the Hugging Face Transformers +library is the ``generate`` method. You can find more information about +its parameters and configuration in the +`documentation `__. +To preserve flexibility in the selection decoding methodology, we will +convert only model inference for one step. + +The inference flow has difference on first step and for the next. On the +first step, model accept preprocessed input instruction and image, that +transformed to the unified embedding space using ``input_embedding`` and +``image_encoder`` models, after that ``language model``, LLM-based part +of model, runs on input embeddings to predict probability of next +generated tokens. On the next step, ``language_model`` accepts only next +token id selected based on sampling strategy and processed by +``input_embedding`` model and cached attention key and values. Since the +output side is auto-regressive, an output token hidden state remains the +same once computed for every further generation step. Therefore, +recomputing it every time you want to generate a new token seems +wasteful. With the cache, the model saves the hidden state once it has +been computed. The model only computes the one for the most recently +generated output token at each time step, re-using the saved ones for +hidden tokens. This reduces the generation complexity from +:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details +about how it works can be found in this +`article `__. + +To sum up above, model consists of 3 parts: + +- **Image Encoder** for encoding input images into embedding space +- **Input Embedding** for conversion input text tokens into embedding + space +- **Language Model** for generation answer based on input embeddings + provided by Image Encoder and Input Embedding models. + +Let’s convert each model part. + +Image Encoder +~~~~~~~~~~~~~ + + + +Image Encoder is represented in LLaVA by pretrained CLIP model. + +.. code:: ipython3 + + import torch + import openvino as ov + import gc + + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + + if not IMAGE_ENCODER_PATH.exists(): + ov_image_encoder = ov.convert_model(image_encoder_model, example_input=torch.zeros((1, 5, 3, 336, 336))) + ov.save_model(ov_image_encoder, IMAGE_ENCODER_PATH) + del ov_image_encoder + cleanup_torchscript_cache() + + del image_encoder_model + gc.collect(); + +Text Embedding +~~~~~~~~~~~~~~ + + + +In LLMs, input embedding is a part of language model, but for LLaVA the +first step hidden state produced by this model part should be integrated +with image embeddings into common embedding space. For ability to reuse +this model part and avoid introduction of llm model instance, we will +use it separately. + +.. code:: ipython3 + + llm_input = None + + if not LANGUAGE_MODEL_PATH.exists(): + llm_input = input_embedding_model(torch.ones((2, 2), dtype=torch.int64)) + + if not INPUT_EMBEDDING_PATH.exists(): + ov_input_embeddings_model = ov.convert_model(input_embedding_model, example_input=torch.ones((2, 2), dtype=torch.int64)) + ov.save_model(ov_input_embeddings_model, INPUT_EMBEDDING_PATH) + del ov_input_embeddings_model + cleanup_torchscript_cache() + + del input_embedding_model + gc.collect(); + +Language Model +~~~~~~~~~~~~~~ + + + +Language Model is responsible for generation answer in LLaVA. This part +is very similar to standard LLM for text generation. Our model uses +`mistralai/Mistral-7B-Instruct-v0.2 `__ +as base LLM. To optimize the generation process and use memory more +efficiently, HuggingFace transformers API provides a mechanism for +caching model state externally using ``use_cache=True`` parameter and +``past_key_values`` argument in inputs and outputs. With the cache, the +model saves the hidden state once it has been computed. The model only +computes the one for the most recently generated output token at each +time step, re-using the saved ones for hidden tokens. This reduces the +generation complexity from :math:`O(n^3)` to :math:`O(n^2)` for a +transformer model. With this option, the model gets the previous step’s +hidden states (cached attention keys and values) as input and +additionally provides hidden states for the current step as output. It +means for all next iterations, it is enough to provide only a new token +obtained from the previous step and cached key values to get the next +token prediction. + +With increasing model size like in modern LLMs, we also can note an +increase in the number of attention blocks and size past key values +tensors respectively. The strategy for handling cache state as model +inputs and outputs in the inference cycle may become a bottleneck for +memory-bounded systems, especially with processing long input sequences, +for example in a chatbot scenario. OpenVINO suggests a transformation +that removes inputs and corresponding outputs with cache tensors from +the model keeping cache handling logic inside the model. Such models are +also called stateful. A stateful model is a model that implicitly +preserves data between two consecutive inference calls. The tensors +saved from one run are kept in an internal memory buffer called a +``state`` or a ``variable`` and may be passed to the next run, while +never being exposed as model output. Hiding the cache enables storing +and updating the cache values in a more device-friendly representation. +It helps to reduce memory consumption and additionally optimize model +performance. More details about stateful models and working with state +can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + from typing import Optional, Tuple, List + from openvino.runtime import opset13 + import numpy as np + + + def model_has_state(ov_model: ov.Model): + return len(ov_model.get_sinks()) > 0 + + + def model_has_input_output_name(ov_model: ov.Model, name: str): + """ + Helper function for checking that model has specified input or output name + + Parameters: + ov_model (ov.Model): + name (str): + name of input or output + + Returns: + True if input or output with requested name exists else False + """ + return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], []) + + + def fuse_cache_reorder( + ov_model: ov.Model, + not_kv_inputs: List[str], + key_value_input_names: List[str], + gather_dim: int, + ): + """ + Fuses reored_cache during generate cycle into ov.Model. Used with stateful models, because we can not modify model state directly. + + Adds a new beam_idx parameter and Gather op per each kv-cache input in a given model. + Should be run before make_stateful. Implements optimumum's _reorder_cache + inside the model in the beginning of each iteration. + Gather works along given gather_dim dimension that may vary from model to model. + KV-cache inputs are identified based on names in key_value_input_names. + Append the new beam_idx parameter to not_kv_inputs. + + Parameters: + ov_model (`ov.Model`): + openvino model for processing + not_kv_inputs (`List[str]`): + list of input nodes in model that not related to past key values + key_value_input_names (`List[str]`): + list of names for key value input layers + gather_dim (int): + dimension for gathering cache during reorder pass + """ + + if model_has_input_output_name(ov_model, "beam_idx"): + raise ValueError("Model already has fused cache") + input_batch = ov_model.input("inputs_embeds").get_partial_shape()[0] + beam_idx = opset13.parameter(name="beam_idx", dtype=ov.Type.i32, shape=ov.PartialShape([input_batch])) + beam_idx.output(0).get_tensor().add_names({"beam_idx"}) # why list is not accepted? + ov_model.add_parameters([beam_idx]) + not_kv_inputs.append(ov_model.inputs[-1]) + # Go over all cache parameters and fuse _reorder_cache with indices provided by the new parameter beam_idx + for input_name in key_value_input_names: + parameter_output_port = ov_model.input(input_name) + consumers = parameter_output_port.get_target_inputs() + gather = opset13.gather(parameter_output_port, beam_idx, opset13.constant(gather_dim)) + for consumer in consumers: + consumer.replace_source_output(gather.output(0)) + ov_model.validate_nodes_and_infer_types() + + + def build_state_initializer(ov_model: ov.Model, batch_dim: int): + """ + Build initialization ShapeOf Expression for all ReadValue ops + + Parameters: + ov_model (ov.Model): + openvino model + batch_dim (int): + index of dimension corresponding to batch size + """ + input_ids = ov_model.input("inputs_embeds") + batch = opset13.gather( + opset13.shape_of(input_ids, output_type="i64"), + opset13.constant([0]), + opset13.constant(0), + ) + for op in ov_model.get_ops(): + if op.get_type_name() == "ReadValue": + dims = [dim.min_length for dim in list(op.get_output_partial_shape(0))] + dims[batch_dim] = batch + dims = [(opset13.constant(np.array([dim], dtype=np.int64)) if isinstance(dim, int) else dim) for dim in dims] + shape = opset13.concat(dims, axis=0) + broadcast = opset13.broadcast(opset13.constant(0.0, dtype=op.get_output_element_type(0)), shape) + op.set_arguments([broadcast]) + ov_model.validate_nodes_and_infer_types() + + + def make_stateful( + ov_model: ov.Model, + not_kv_inputs: List[str], + key_value_input_names: List[str], + key_value_output_names: List[str], + batch_dim: int, + num_attention_heads: int, + num_beams_and_batch: int = None, + ): + """ + Hides kv-cache inputs and outputs inside the model as variables. + + Parameters: + ov_model (ov.Model): + openvino model + not_kv_inputs (`List[str]`): + list of input nodes in model that not related to past key values + key_value_input_names (`List[str]`): + list of names for key value input layers + key_value_output_names (`List[str]`): + list of names for key value input layers + batch_dim (int): + index of batch dimension in key value layers + num_attention_heads (int): + number of attention heads for batch dimension initialization + num_beams_an_batch (int): + precalculated number of beams and batch for shapes initialization + """ + from openvino._offline_transformations import apply_make_stateful_transformation + + input_output_map = {} + + if num_beams_and_batch is not None: + # Set batch size for input_ids and attention mask to avoid dynamic dimension got propagated from the end of the model back to ReadValue + for input in not_kv_inputs: + shape = input.get_partial_shape() + if shape.rank.get_length() <= 2: # == 1 for beam_index + shape[0] = num_beams_and_batch + input.get_node().set_partial_shape(shape) + for kv_name_pair in zip(key_value_input_names, key_value_output_names): + input_output_map[kv_name_pair[0]] = kv_name_pair[1] + if num_beams_and_batch is not None: + input = ov_model.input(kv_name_pair[0]) + shape = input.get_partial_shape() + shape[batch_dim] = num_beams_and_batch * num_attention_heads + input.get_node().set_partial_shape(shape) + + if num_beams_and_batch is not None: + # Re-validation model if shapes are altered above + ov_model.validate_nodes_and_infer_types() + + apply_make_stateful_transformation(ov_model, input_output_map) + if num_beams_and_batch is None: + build_state_initializer(ov_model, batch_dim) + + + def patch_stateful(ov_model): + key_value_input_names = [key.get_any_name() for key in ov_model.inputs[2:-1]] + key_value_output_names = [key.get_any_name() for key in ov_model.outputs[1:]] + not_kv_inputs = [input for input in ov_model.inputs if not any(name in key_value_input_names for name in input.get_names())] + if not key_value_input_names or not key_value_output_names: + return + batch_dim = 0 + num_attention_heads = 1 + + fuse_cache_reorder(ov_model, not_kv_inputs, key_value_input_names, batch_dim) + make_stateful( + ov_model, + not_kv_inputs, + key_value_input_names, + key_value_output_names, + batch_dim, + num_attention_heads, + None, + ) .. code:: ipython3 - from cmd_helper import optimum_cli + make_stateful_model = True + core = ov.Core() - if not (MODEL_DIR / "FP16").exists(): - optimum_cli(model_id, MODEL_DIR / "FP16", additional_args={"weight-format": "fp16"}) + if not LANGUAGE_MODEL_PATH.exists(): + pkv = language_model(inputs_embeds=llm_input, attention_mask=torch.ones((2, 2), dtype=torch.int64))[1] + model_inputs = ["attention_mask", "position_ids"] + model_outputs = ["logits"] + for idx in range(len(pkv)): + model_inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) + model_outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) + model_inputs.append("inputs_embeds") + language_model.config.torchscript = True + position_ids = torch.tensor([[2, 3], [2, 3]]) + ov_model = ov.convert_model( + language_model, + example_input={ + "inputs_embeds": llm_input, + "attention_mask": torch.ones((2, 4)), + "past_key_values": pkv, + "position_ids": position_ids, + }, + ) + + for input, input_name in zip(ov_model.inputs, model_inputs): + input.get_tensor().set_names({input_name}) + + for output, output_name in zip(ov_model.outputs, model_outputs): + output.get_tensor().set_names({output_name}) + if make_stateful_model: + patch_stateful(ov_model) + ov.save_model(ov_model, LANGUAGE_MODEL_PATH) + del ov_model + cleanup_torchscript_cache() + del language_model + gc.collect() Compress Language Model Weights to 4 bits ----------------------------------------- @@ -136,11 +516,9 @@ For reducing memory consumption, weights compression optimization can be applied using `NNCF `__. Weight compression aims to reduce the memory footprint of a model. It can also lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). - -LLMs and other models, which require extensive memory to store the -weights during inference, can benefit from weight compression in the -following ways: +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -196,10 +574,7 @@ documentation `__ (Neural Network +Compression Framework) and infer quantized model via OpenVINO™ Toolkit. +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. The optimization process contains the following steps: + +1. Prepare quantization dataset +2. Quantize the converted OpenVINO model with NNCF. +3. Save quantized model on disk for next usage. + +.. + + **Note:** quantization process may require additional time and memory + for performing. You can disable it using widget below: + +.. code:: ipython3 + + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +.. code:: ipython3 + + IMAGE_ENCODER_PATH_INT8 = IMAGE_ENCODER_PATH.parent / IMAGE_ENCODER_PATH.name.replace(".xml", "-int8.xml") + + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Prepare datasets +~~~~~~~~~~~~~~~~ + + + +The `Conceptual +Captions `__ dataset +consisting of ~3.3M images annotated with captions is used to quantize +model. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import requests + from io import BytesIO + import numpy as np + from PIL import Image + from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + + + def get_pil_from_url(url): + """ + Downloads and converts an image from a URL to a PIL Image object. + """ + response = requests.get(url, verify=False, timeout=20) + image = Image.open(BytesIO(response.content)) + return image.convert("RGB") + + def collate_fn(example, image_column="image_url"): + """ + Preprocesses an example by loading and transforming image and text data. + Checks if the text data in the example is valid by calling the `check_text_data` function. + Downloads the image specified by the URL in the image_column by calling the `get_pil_from_url` function. + If there is any error during the download process, returns None. + Returns the preprocessed inputs with transformed image and text data. + """ + assert len(example) == 1 + example = example[0] + url = example[image_column] + try: + image = get_pil_from_url(url) + h, w = image.size + if h == 1 or w == 1: + return None + except Exception: + return None + + inputs = processor.image_processor(images=[image], return_tensors="pt") + return inputs + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import torch + from datasets import load_dataset + from tqdm.notebook import tqdm + + def prepare_calibration_data(dataloader, init_steps): + """ + This function prepares calibration data from a dataloader for a specified number of initialization steps. + It iterates over the dataloader, fetching batches and storing the relevant data. + """ + data = [] + print(f"Fetching {init_steps} samples for the initialization...") + with tqdm(total=init_steps) as pbar: + for batch in dataloader: + if len(data) == init_steps: + break + if batch: + pbar.update(1) + with torch.no_grad(): + data.append( + { + "pixel_values": batch["pixel_values"].to("cpu") + } + ) + return data + + + def prepare_dataset(opt_init_steps=50, max_train_samples=1000): + """ + Prepares a vision-text dataset for quantization. + """ + dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=42) + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + calibration_data = prepare_calibration_data(dataloader, opt_init_steps) + return calibration_data + +.. code:: ipython3 + + %%skip not $to_quantize.value + + vcalibration_data = [] + if not IMAGE_ENCODER_PATH_INT8.exists(): + calibration_data = prepare_dataset() + +Perform quantization +~~~~~~~~~~~~~~~~~~~~ + + + +Create a quantized model from the pre-trained model. + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + + if not IMAGE_ENCODER_PATH_INT8.exists(): + if len(calibration_data) == 0: + raise RuntimeError( + 'Calibration dataset is empty. Please check internet connection and try to download images manually.' + ) + + ov_model = core.read_model(IMAGE_ENCODER_PATH) + calibration_dataset = nncf.Dataset(calibration_data) + quantized_model = nncf.quantize( + model=ov_model, + calibration_dataset=calibration_dataset, + model_type=nncf.ModelType.TRANSFORMER, + subset_size=len(calibration_data), + # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.6) + ) + ov.save_model(quantized_model, IMAGE_ENCODER_PATH_INT8) + del ov_model + del quantized_model + gc.collect() + Prepare model inference pipeline -------------------------------- @@ -244,42 +796,392 @@ Prepare model inference pipeline |image0| -`OpenVINO™ GenAI `__ -is a library of the most popular Generative AI model pipelines, -optimized execution methods, and samples that run on top of highly -performant `OpenVINO -Runtime `__. - -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality -(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor -of OpenVINO™, aiming to simplify running inference of generative AI -models. It hides the complexity of the generation process and minimizes -the amount of code required. - -Inference Visual language models can be implemented using OpenVINO GenAI -``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in -this -`notebook `__. -It supports chat mode with preserving conversational history inside -pipeline, that allows us effectively implements chatbot that supports -conversation about provided images content. +``OVLlavaForCausalLM`` class provides ease-to-use interface for using +model in generation scenario. It is based on +``transformers.generation.GenerationMixin`` that gives us opportunity to +reuse all reach capabilities for generation implemented in HuggingFace +Transformers library. More details about this interface can be found in +`HuggingFace +documentation `__. .. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/a562e9de-5b94-4e24-ac52-532019fc92d3 -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ +.. code:: ipython3 + + import torch + from transformers.generation import GenerationConfig, GenerationMixin + from transformers.modeling_outputs import CausalLMOutputWithPast + from transformers import AutoConfig + from transformers.models.llava_next.modeling_llava_next import ( + get_anyres_image_grid_shape, + unpad_image, + ) + import openvino as ov + + + class OVLlavaForCausalLM(GenerationMixin): + def __init__( + self, + core, + image_encoder_path, + input_embedding_path, + language_model_path, + lm_device, + img_encoder_device, + ): + self.image_encoder = core.compile_model(core.read_model(image_encoder_path), img_encoder_device) + self.input_embeddings = core.compile_model(core.read_model(input_embedding_path), lm_device) + self.model = core.read_model(language_model_path) + self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} + self.output_names = {idx: key for idx, key in enumerate(self.model.outputs)} + self.key_value_input_names = [key for key in list(self.input_names) if key not in ["beam_idx", "inputs_embeds", "attention_mask", "position_ids"]] + self.key_value_output_names = [key for key in list(self.output_names)[1:]] + self.stateful = len(self.key_value_input_names) == 0 + compiled_model = core.compile_model(self.model, lm_device) + self.request = compiled_model.create_infer_request() + self.config = AutoConfig.from_pretrained(Path(language_model_path).parent) + self.generation_config = GenerationConfig.from_model_config(self.config) + self.main_input_name = "input_ids" + self.device = torch.device("cpu") + self.num_pkv = 2 + self.next_beam_idx = None + self.image_newline = torch.zeros(self.config.text_config.hidden_size, dtype=torch.float32) + self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 + self.past_len = 0 + self._supports_cache_class = False + + def can_generate(self): + """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" + return True + + def __call__( + self, + input_ids: torch.LongTensor, + pixel_values: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + position_ids: Optional[torch.LongTensor] = None, + image_sizes=None, + **kwargs, + ) -> CausalLMOutputWithPast: + return self.forward( + input_ids, + pixel_values, + attention_mask, + past_key_values, + position_ids, + image_sizes, + **kwargs, + ) + + def forward( + self, + input_ids: torch.LongTensor, + pixel_values: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + position_ids: Optional[torch.LongTensor] = None, + image_sizes=None, + **kwargs, + ) -> CausalLMOutputWithPast: + """General inference method""" + inputs = {} + if past_key_values is not None: + inputs = {} + if not self.stateful: + past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) + # Add the past_key_values to the decoder inputs + inputs = dict(zip(self.key_value_input_names, past_key_values)) + # input_ids = np.array(input_ids)[:, -1:] + inputs_embeds = self.input_embeddings(input_ids)[0] + inputs["inputs_embeds"] = inputs_embeds + # inputs["attention_mask"] = attention_mask + if "beam_idx" in self.input_names: + inputs["beam_idx"] = self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) + + if not self.stateful: + first_layer_past_key_value = torch.from_numpy(past_key_values[0][0][:, :, :, 0]) + else: + first_layer_past_key_value = torch.from_numpy(self.request.query_state()[0].state.data[:, :, :, 0]) + + # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941 + batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0) + + # Get the target length + target_length = input_ids.shape[1] + past_length = first_layer_past_key_value.shape[-1] + + extended_attention_mask = torch.ones( + (attention_mask.shape[0], past_length), + dtype=attention_mask.dtype, + device=attention_mask.device, + ) + + # Filter out only the tokens that can be un-attended, this can happen + # if one uses Llava + Fused modules where the cache on the + # first iteration is already big enough, or if one passes custom cache + valid_indices = non_attended_tokens < extended_attention_mask.size(-1) + new_batch_index = batch_index[valid_indices] + new_non_attended_tokens = non_attended_tokens[valid_indices] + + # Zero-out the places where we don't need to attend + extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0 + + attention_mask = torch.cat((extended_attention_mask, attention_mask[:, -target_length:]), dim=1) + position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 + inputs["attention_mask"] = attention_mask + inputs["position_ids"] = position_ids + + else: + inputs = self.prepare_multimodal_input(input_ids, pixel_values, attention_mask, position_ids, image_sizes) + + # Run inference + self.request.start_async(inputs, share_inputs=True) + self.request.wait() + + logits = torch.from_numpy(self.request.get_tensor(self.output_names[0]).data) + + if not self.stateful: + # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) + past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) + # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) + past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) + else: + past_key_values = ((),) + self.past_len += inputs["inputs_embeds"].shape[1] + return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) + + def prepare_multimodal_input(self, input_ids, pixel_values, attention_mask, position_ids, image_sizes=None): + """Preprocessing function for embedding multimodal data""" + inputs = {} + inputs_embeds = torch.from_numpy(self.input_embeddings(input_ids)[0]) + batch_size = input_ids.shape[0] + if not self.stateful: + for input_name in self.key_value_input_names: + model_inputs = self.model.input(input_name) + shape = model_inputs.get_partial_shape() + shape[0] = batch_size + if shape[2].is_dynamic: + shape[2] = 0 + else: + shape[1] = 0 + inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape()) + else: + self.past_len = 0 + self.request.reset_state() + # Set initial value for the next beam_idx input that will be used at the current iteration + # and will be optionally updated by _reorder_cache at the next iterations if beam_search is used + self.next_beam_idx = np.arange(batch_size, dtype=int) + + if "beam_idx" in self.input_names: + inputs["beam_idx"] = self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) + if pixel_values is None: + inputs["inputs_embeds"] = inputs_embeds + inputs["attention_mask"] = attention_mask + if position_ids is None: + position_ids = torch.cumsum(attention_mask, axis=1) - 1 + position_ids[attention_mask == 0] = 1 + inputs["position_ids"] = position_ids + res = self.image_encoder(pixel_values) + image_features = torch.from_numpy(res[0]) + split_sizes = [image.shape[0] for image in pixel_values] + image_features = torch.split(image_features, split_sizes, dim=0) + + # NOTE we only support multimodal_patch_merge_type == "spatial_unpad" + height = width = self.config.vision_config.image_size // self.config.vision_config.patch_size + + new_image_features = [] + for image_idx, image_feature in enumerate(image_features): + if image_feature.shape[0] > 1: + base_image_feature = image_feature[0] + image_feature = image_feature[1:] + + if height * width != base_image_feature.shape[0]: + raise ValueError("The number of patches is not consistent with the image size.") + num_patch_height, num_patch_width = get_anyres_image_grid_shape( + image_sizes[image_idx], + self.config.image_grid_pinpoints, + self.config.vision_config.image_size, + ) + image_feature = image_feature.view(num_patch_height, num_patch_width, height, width, -1) + image_feature = image_feature.permute(4, 0, 2, 1, 3).contiguous() + image_feature = image_feature.flatten(1, 2).flatten(2, 3) + image_feature = unpad_image(image_feature, image_sizes[image_idx]) + image_feature = torch.cat( + ( + image_feature, + self.image_newline[:, None, None].expand(*image_feature.shape[:-1], 1), + ), + dim=-1, + ) + image_feature = image_feature.flatten(1, 2).transpose(0, 1) + image_feature = torch.cat((base_image_feature, image_feature), dim=0) + else: + image_feature = image_feature[0] + image_feature = torch.cat((image_feature, self.image_newline[None]), dim=0) + new_image_features.append(image_feature) + image_features = torch.stack(new_image_features, dim=0) + + ( + inputs_embeds, + attention_mask, + position_ids, + ) = self._merge_input_ids_with_image_features(image_features, inputs_embeds, input_ids, attention_mask, None) + inputs["inputs_embeds"] = inputs_embeds + inputs["attention_mask"] = attention_mask + inputs["position_ids"] = position_ids + + return inputs + + def _merge_input_ids_with_image_features(self, image_features, inputs_embeds, input_ids, attention_mask, labels): + num_images, num_image_patches, embed_dim = image_features.shape + batch_size, sequence_length = input_ids.shape + left_padding = not torch.sum(input_ids[:, -1] == torch.tensor(self.pad_token_id)) + # 1. Create a mask to know where special image tokens are + special_image_token_mask = input_ids == self.config.image_token_index + num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1) + # Compute the maximum embed dimension + max_embed_dim = (num_special_image_tokens.max() * (num_image_patches - 1)) + sequence_length + batch_indices, non_image_indices = torch.where(input_ids != self.config.image_token_index) + + # 2. Compute the positions where text should be written + # Calculate new positions for text tokens in merged image-text sequence. + # `special_image_token_mask` identifies image tokens. Each image token will be replaced by `nb_text_tokens_per_images - 1` text tokens. + # `torch.cumsum` computes how each image token shifts subsequent text token positions. + # - 1 to adjust for zero-based indexing, as `cumsum` inherently increases indices by one. + new_token_positions = torch.cumsum((special_image_token_mask * (num_image_patches - 1) + 1), -1) - 1 + nb_image_pad = max_embed_dim - 1 - new_token_positions[:, -1] + if left_padding: + new_token_positions += nb_image_pad[:, None] # offset for left padding + text_to_overwrite = new_token_positions[batch_indices, non_image_indices] + + # 3. Create the full embedding, already padded to the maximum position + final_embedding = torch.zeros( + batch_size, + max_embed_dim, + embed_dim, + dtype=inputs_embeds.dtype, + device=inputs_embeds.device, + ) + final_attention_mask = torch.zeros( + batch_size, + max_embed_dim, + dtype=attention_mask.dtype, + device=inputs_embeds.device, + ) + # In case the Vision model or the Language model has been offloaded to CPU, we need to manually + # set the corresponding tensors into their correct target device. + target_device = inputs_embeds.device + batch_indices, non_image_indices, text_to_overwrite = ( + batch_indices.to(target_device), + non_image_indices.to(target_device), + text_to_overwrite.to(target_device), + ) + attention_mask = attention_mask.to(target_device) + + # 4. Fill the embeddings based on the mask. If we have ["hey" "", "how", "are"] + # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features + final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices] + final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices] + if labels is not None: + final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices] + + # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling + image_to_overwrite = torch.all(final_embedding == 0, dim=-1) + image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device) + if image_to_overwrite.sum() != image_features.shape[:-1].numel(): + raise ValueError( + f"The input provided to the model are wrong. The number of image tokens is {torch.sum(special_image_token_mask)} while" + f" the number of image given to the model is {num_images}. This prevents correct indexing and breaks batch generation." + ) + + final_embedding[image_to_overwrite] = image_features.contiguous().reshape(-1, embed_dim).to(target_device) + final_attention_mask |= image_to_overwrite + position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1) + + # 6. Mask out the embedding at padding positions, as we later use the past_key_value value to determine the non-attended tokens. + batch_indices, pad_indices = torch.where(input_ids == self.pad_token_id) + indices_to_mask = new_token_positions[batch_indices, pad_indices] + + final_embedding[batch_indices, indices_to_mask] = 0 + + return final_embedding, final_attention_mask, position_ids + + def prepare_inputs_for_generation( + self, + input_ids, + past_key_values=None, + inputs_embeds=None, + pixel_values=None, + image_sizes=None, + attention_mask=None, + **kwargs, + ): + if past_key_values is not None: + if not self.stateful: + cache_length = past_length = past_key_values[0][0].shape[2] + else: + cache_length = past_length = self.past_len + + # Keep only the unprocessed tokens: + # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where + # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as + # input) + if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]: + input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :] + # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard + # input_ids based on the past_length.llava + elif past_length < input_ids.shape[1]: + input_ids = input_ids[:, past_length:] + # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens. + elif self.config.image_token_index in input_ids: + input_ids = input_ids[:, input_ids.shape[1] - 1 :] + # If the cache has seen more tokens than it can hold, then the cache has a size limit. Let's discard the + # older attention values, as their corresponding values are not part of the input. + if cache_length < past_length and attention_mask is not None: + attention_mask = attention_mask[:, -(cache_length + input_ids.shape[1]) :] + + position_ids = kwargs.get("position_ids", None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch gllavaenerationsubset_siz + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -input_ids.shape[1] :] + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "position_ids": position_ids, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + "pixel_values": pixel_values, + "image_sizes": image_sizes, + } + ) + return model_inputs + +Run OpenVINO model inference +---------------------------- -Select device from dropdown list for running inference using OpenVINO. +Select device for language model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + .. code:: ipython3 from notebook_utils import device_widget - device = device_widget("CPU", exclude=["NPU"]) + device = device_widget(exclude=["NPU"]) device @@ -288,19 +1190,29 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1'), value='CPU') -Select model variant -~~~~~~~~~~~~~~~~~~~~ +.. code:: ipython3 + + lm_device = device.value + +Select device for image encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - import ipywidgets as widgets - + device + +.. code:: ipython3 + + img_encoder_device = device.value + +.. code:: ipython3 + use_int4_lang_model = widgets.Checkbox( value=LANGUAGE_MODEL_PATH_INT4.exists(), description="INT4 language model", @@ -318,110 +1230,126 @@ Select model variant -Load OpenVINO model -~~~~~~~~~~~~~~~~~~~ +.. code:: ipython3 + use_int8_image_encoder = widgets.Checkbox( + value=IMAGE_ENCODER_PATH_INT8.exists(), + description="INT8 image encoder", + disabled=not IMAGE_ENCODER_PATH_INT8.exists(), + ) + + use_int8_image_encoder -For pipeline initialization we should provide path to model directory -and inference device. -.. code:: ipython3 - import openvino_genai as ov_genai - - model_dir = MODEL_DIR / "FP16" if not use_int4_lang_model.value else MODEL_DIR / "INT4" - - ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) - -Run OpenVINO model inference ----------------------------- +.. parsed-literal:: + Checkbox(value=True, description='INT4 language model') -Now, when we have model and defined generation pipeline, we can run -model inference. -For preparing input data, ``VLMPipeline`` use tokenizer and image -processor inside, we just need to convert image to input OpenVINO tensor -and provide question as string. Additionally, we can provides options -for controlling generation process (e.g. number of maximum generated -tokens or using multinomial sampling for decoding instead of greedy -search approach) using ``GenerationConfig``. +.. code:: ipython3 -Generation process for long response may be time consuming, for -accessing partial result as soon as it is generated without waiting when -whole process finished, Streaming API can be used. Token streaming is -the mode in which the generative system returns the tokens one by one as -the model generates them. This enables showing progressive generations -to the user rather than waiting for the whole generation. Streaming is -an essential aspect of the end-user experience as it reduces latency, -one of the most critical aspects of a smooth experience. + lang_model_path = LANGUAGE_MODEL_PATH_INT4 if use_int4_lang_model.value else LANGUAGE_MODEL_PATH + image_encoder_path = IMAGE_ENCODER_PATH_INT8 if use_int8_image_encoder.value else IMAGE_ENCODER_PATH + + ov_llava_model = OVLlavaForCausalLM(core, image_encoder_path, INPUT_EMBEDDING_PATH, lang_model_path, lm_device, img_encoder_device) .. code:: ipython3 - import requests from PIL import Image - from io import BytesIO - import numpy as np - - config = ov_genai.GenerationConfig() - config.max_new_tokens = 100 - - - def load_image(image_file): - if image_file.startswith("http") or image_file.startswith("https"): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) - return image, ov.Tensor(image_data) - - - def streamer(subword: str) -> bool: - """ - - Args: - subword: sub-word of the generated text. - - Returns: Return flag corresponds whether generation should be stopped. - - """ - print(subword, end="", flush=True) - + import requests - image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" - image, image_tensor = load_image(image_file) - text_message = "What is unusual on this image?" + from transformers import TextStreamer - prompt = text_message + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + image = Image.open(requests.get(url, stream=True).raw) + question = "What is unusual on this image?" + prompt = f"[INST] \n{question}[/INST]" + streamer = TextStreamer(processor, skip_special_tokens=True, skip_prompt=True) - display(image) - print(f"Question:\n{text_message}") + inputs = processor(prompt, image, return_tensors="pt") + print(f"Question:\n{question}") + image + + +.. parsed-literal:: + + Question: + What is unusual on this image? + + + + +.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png + + + +.. code:: ipython3 + print("Answer:") - output = ov_model.generate(prompt, image=image_tensor, generation_config=config, streamer=streamer) + streamer = TextStreamer(processor, skip_special_tokens=True, skip_prompt=True) + output = ov_llava_model.generate(**inputs, max_new_tokens=49, streamer=streamer) +.. parsed-literal:: -.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png + Setting `pad_token_id` to `eos_token_id`:2 for open-end generation. .. parsed-literal:: - Question: - What is unusual on this image? Answer: - - - The unusual aspect of this image is that a cat is lying inside a cardboard box. Cats are known for their curiosity and love for small, enclosed spaces. They often find comfort and security in boxes, bags, or other confined spaces. In this case, the cat has chosen to lie down in a cardboard box, which is an unconventional and amusing sight. It is not common to see a cat lounging in a box, as they usually + The image shows a cat lying on its back inside a cardboard box. What's unusual is that the cat appears to be in a relaxed and somewhat human-like pose, with its paws up in the air and its belly exposed. + Interactive demo ---------------- +.. code:: ipython3 + + import gradio as gr + from transformers import TextIteratorStreamer + from threading import Thread + from PIL import Image + import torch + + + def bot_streaming(message, history): + print(message) + if message["files"]: + image = message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1] + else: + # if there's no image uploaded for this turn, look for images in the past turns + # kept inside tuples, take the last one + for hist in history: + if isinstance(hist[0], tuple): + image = hist[0][0] + + if image is None: + gr.Error("You need to upload an image for LLaVA to work.") + prompt = f"[INST] \n{message['text']} [/INST]" + image = Image.open(image).convert("RGB") + inputs = processor(prompt, image, return_tensors="pt") + + streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True}) + generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=100) + + thread = Thread(target=ov_llava_model.generate, kwargs=generation_kwargs) + thread.start() + + text_prompt = f"[INST] \n{message['text']} [/INST]" + + buffer = "" + for new_text in streamer: + buffer += new_text + generated_text_without_prompt = buffer[len(text_prompt) :] + yield generated_text_without_prompt + .. code:: ipython3 if not Path("gradio_helper.py").exists(): @@ -430,7 +1358,7 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(ov_model) + demo = make_demo(fn=bot_streaming) try: demo.launch(debug=False) @@ -439,3 +1367,8 @@ Interactive demo # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + +.. code:: ipython3 + + # please uncomment and run this cell for stopping gradio interface + # demo.close() diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.jpg b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.jpg deleted file mode 100644 index c6aeec77cd3cb2..00000000000000 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e -size 60425 diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png deleted file mode 100644 index c6673a757ab5dc..00000000000000 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 -size 854224 diff --git a/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg similarity index 100% rename from docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg diff --git a/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png similarity index 100% rename from docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png diff --git a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst index 8f94b7ce67973a..6aa437b9f2d37a 100644 --- a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst +++ b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst @@ -230,7 +230,7 @@ code: if repo_name == "OpenVINO": hf_hub.snapshot_download(llm_model_id.value, local_dir=llm_model_path) else: - optimum_cli(llm_model_id.value, llm_model_path, additional_args=-{"task": "text-generation-with-past", "weight-format": "int4"}) + !optimum_cli(llm_model_id.value, llm_model_path, additional_args=-{"task": "text-generation-with-past", "weight-format": "int4"}) Download Embedding model ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/notebooks/llm-agent-react-langchain-with-output.rst b/docs/notebooks/llm-agent-react-langchain-with-output.rst index 9adb0311542426..2b1b289f90db0b 100644 --- a/docs/notebooks/llm-agent-react-langchain-with-output.rst +++ b/docs/notebooks/llm-agent-react-langchain-with-output.rst @@ -66,29 +66,6 @@ Prerequisites -.. code:: ipython3 - - import requests - from pathlib import Path - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - if not Path("cmd_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") - open("cmd_helper.py", "w", encoding="utf-8").write(r.text) - - - - -.. parsed-literal:: - - 1491 - - - .. code:: ipython3 import os @@ -97,27 +74,16 @@ Prerequisites %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel - %pip install --pre -Uq "openvino>=2024.5.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.38.1" "langchain>=0.2.3" "langchain-huggingface>=0.1.2" "langchain-community>=0.2.4" "Wikipedia" \ + %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \ "torch>=2.1" \ "datasets" \ "accelerate" \ - "pydantic<2.10.0" \ "gradio>=4.19" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.38.1" "langchain>=0.2.3" "langchain-community>=0.2.4" "Wikipedia" %pip install -q "git+https://github.com/huggingface/optimum-intel.git" \ "git+https://github.com/openvinotoolkit/nncf.git" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - Create a tools -------------- @@ -212,7 +178,7 @@ previous agent tool invocations and the corresponding tool outputs. .. code:: ipython3 - PREFIX = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:""" + PREFIX = """[INST]Respond to the human as helpfully and accurately as possible. You have access to the following tools:""" FORMAT_INSTRUCTIONS = """Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). @@ -244,10 +210,10 @@ previous agent tool invocations and the corresponding tool outputs. "action": "Final Answer", "action_input": "Final response to human" }}}} - ```""" + ```[/INST]""" SUFFIX = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. - Thought:""" + Thought:[INST]""" HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}" @@ -259,32 +225,18 @@ Create LLM Large Language Models (LLMs) are a core component of LangChain. LangChain does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs. In this example, we -select following models as LLM in agent pipeline. - -- **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - - Qwen2.5 is the latest series of Qwen large language models. Comparing - with Qwen2, Qwen2.5 series brings significant improvements in coding, - mathematics and general knowledge skills. Additionally, it brings - long-context and multiple languages support including Chinese, - English, French, Spanish, Portuguese, German, Italian, Russian, - Japanese, Korean, Vietnamese, Thai, Arabic, and more. For more - details, please refer to - `model_card `__, - `blog `__, - `GitHub `__, and - `Documentation `__. -- **llama-3.1-8b-instruct** - The Llama 3.1 instruction tuned text only - models (8B, 70B, 405B) are optimized for multilingual dialogue use - cases and outperform many of the available open source and closed - chat models on common industry benchmarks. More details about model - can be found in `Meta blog - post `__, `model - website `__ and `model - card `__. +select ``Mistral-7B-Instruct-v0.3`` as LLM in agent pipeline. + +- **Mistral-7B-Instruct-v0.3** - The Mistral-7B-Instruct-v0.3 Large + Language Model (LLM) is an instruct fine-tuned version of the + Mistral-7B-v0.3. You can find more details about model in the `model + card `__, + `paper `__ and `release blog + post `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must be a registered user in Hugging Face Hub. Please visit `HuggingFace model - card `__, + card `__, carefully read terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the @@ -317,39 +269,13 @@ folder. .. code:: ipython3 - import ipywidgets as widgets - - llm_model_ids = ["Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/qwen2.5-14b-instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct"] - - llm_model_id = widgets.Dropdown( - options=llm_model_ids, - value=llm_model_ids[0], - description="Model:", - disabled=False, - ) - - llm_model_id - - - - -.. parsed-literal:: - - Dropdown(description='Model:', options=('Qwen/Qwen2.5-7B-Instruct', 'Qwen/Qwen2.5-3B-Instruct', 'Qwen/qwen2.5-… - - - -.. code:: ipython3 - - from cmd_helper import optimum_cli + from pathlib import Path - llm_model_path = llm_model_id.value.split("/")[-1] - repo_name = llm_model_id.value.split("/")[0] + model_id = "mistralai/Mistral-7B-Instruct-v0.3" + model_path = "Mistral-7B-Instruct-v0.3-ov-int4" - if not Path(llm_model_path).exists(): - optimum_cli( - llm_model_id.value, llm_model_path, additional_args={"task": "text-generation-with-past", "weight-format": "int4", "group-size": "128", "ratio": "1.0"} - ) + if not Path(model_path).exists(): + !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 {model_path} Select inference device for LLM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -358,11 +284,16 @@ Select inference device for LLM .. code:: ipython3 + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import device_widget device = device_widget("CPU", exclude=["NPU"]) - - device @@ -381,7 +312,7 @@ information `__. .. code:: ipython3 - from langchain_huggingface import HuggingFacePipeline + from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from transformers.generation.stopping_criteria import StoppingCriteriaList, StoppingCriteria import openvino.properties as props @@ -415,7 +346,7 @@ information `__. stop_tokens = ["Observation:"] ov_llm = HuggingFacePipeline.from_model_id( - model_id=llm_model_path, + model_id=model_path, task="text-generation", backend="openvino", model_kwargs={ @@ -425,16 +356,26 @@ information `__. }, pipeline_kwargs={"max_new_tokens": 2048}, ) + ov_llm = ov_llm.bind(skip_prompt=True, stop=["Observation:"]) tokenizer = ov_llm.pipeline.tokenizer ov_llm.pipeline._forward_params["stopping_criteria"] = StoppingCriteriaList([StopSequenceCriteria(stop_tokens, tokenizer)]) -.. code:: ipython3 - from langchain_huggingface import ChatHuggingFace - - ov_chat = ChatHuggingFace(llm=ov_llm, verbose=True) - ov_chat = ov_chat.bind(skip_prompt=True, stop=["Observation:"]) +.. parsed-literal:: + + 2024-06-07 23:17:16.804739: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-06-07 23:17:16.807973: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-06-07 23:17:16.850235: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + 2024-06-07 23:17:16.850258: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + 2024-06-07 23:17:16.850290: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-06-07 23:17:16.859334: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-06-07 23:17:17.692415: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + The argument `trust_remote_code` is to be used along with export=True. It will be ignored. + Compiling the model to GPU ... + You can get additional inference speed improvement with `Dynamic Quantization of activations and KV-cache quantization on @@ -468,7 +409,7 @@ outputs back to the agent, and repeats. from langchain.agents import AgentExecutor, StructuredChatAgent agent = StructuredChatAgent.from_llm_and_tools( - ov_chat, + ov_llm, tools, prefix=PREFIX, suffix=SUFFIX, @@ -497,68 +438,57 @@ prompt template. > Entering new AgentExecutor chain... - Thought: First, we need to take 3 to the fifth power. Then we will find the sum of twelve and three. After that, we multiply the first result by the second result. Finally, we'll square the whole result. + Thought: I can use the exponentiate and add tools to solve the first part, and then use the multiply tool for the second part, and finally the exponentiate tool again to square the result. Action: ``` { "action": "exponentiate", - "action_input": { - "base": 3, - "exponent": 5 - } + "action_input": {"base": 3, "exponent": 5} } ``` Observation: Observation: 243 - Thought:Next, let's find the sum of twelve and three. + Thought: Now I need to add twelve and three Action: ``` { "action": "add", - "action_input": { - "first_int": 12, - "second_int": 3 - } + "action_input": {"first_int": 12, "second_int": 3} } ``` Observation: Observation: 15 - Thought:Now, we will multiply the result of \(3^5\) (which is 243) by the sum of 12 and 3 (which is 15). + Thought: Now I need to multiply the result by 243 Action: ``` { "action": "multiply", - "action_input": { - "first_int": 243, - "second_int": 15 - } + "action_input": {"first_int": 243, "second_int": 15} } ``` Observation: Observation: 3645 - Thought:Thought: Now, we need to square the result of the multiplication (3645). + Thought: Finally, I need to square the result Action: ``` { "action": "exponentiate", - "action_input": { - "base": 3645, - "exponent": 2 - } + "action_input": {"base": 3645, "exponent": 2} } ``` + Observation: Observation: 13286025 - Thought:Thought: I know what to respond + Thought: I know what to respond Action: ``` { "action": "Final Answer", - "action_input": "The final result is 13286025." + "action_input": "The final answer is 13286025" } ``` @@ -570,7 +500,7 @@ prompt template. .. parsed-literal:: {'input': 'Take 3 to the fifth power and multiply that by the sum of twelve and three, then square the whole result', - 'output': 'The final result is 13286025.'} + 'output': 'The final answer is 13286025'} @@ -636,7 +566,7 @@ words generated by agent. .. parsed-literal:: - 'Page: OpenVINO\nSummary: OpenVINO is an open-source software toolkit for optimizing and deploying deep learning models. It enables programmers to develop scalable and efficient AI solutions with relatively few lines of code. It supports several popular model formats and categories, such as large language models, computer vision, and generative AI.\nActively developed by Intel, it prioritizes high-performance inference on Intel hardware but also supports ARM/ARM64 processors and encourages contributors to add new devices to the portfolio.\nBased in C++, it offers the following APIs: C/C++, Python, and Node.js (an early preview).\nOpenVINO is cross-platform and free for use under Apache License 2.0.\n\nPage: Audacity (audio editor)\nSummary: Audacity is a free and open-source digital audio editor and recording application software, available for Windows, macOS, Linux, and other Unix-like operating systems. \nAs of December 6, 2022, Audacity is the most popular download at FossHub, with over 114.' + 'Page: OpenVINO\nSummary: OpenVINO is an open-source software toolkit for optimizing and deploying deep learning models. It enables programmers to develop scalable and efficient AI solutions with relatively few lines of code. It supports several popular model formats and categories, such as large language models, computer vision, and generative AI.\nActively developed by Intel, it prioritizes high-performance inference on Intel hardware but also supports ARM/ARM64 processors and encourages contributors to add new devices to the portfolio.\nBased in C++, it offers the following APIs: C/C++, Python, and Node.js (an early preview).\nOpenVINO is cross-platform and free for use under Apache License 2.0.\n\nPage: Stable Diffusion\nSummary: Stable Diffusion is a deep learning, text-to-image model released in 2022 based on diffusion techniques. It is considered to be a part of the ongoing artificial intelligence boom.\nIt is primarily used to generate detailed images conditioned on text descriptions, t' @@ -713,7 +643,7 @@ In this examples, we will create 2 customized tools for .. parsed-literal:: - "{'current_condition': {'temp_C': '0', 'FeelsLikeC': '-4', 'humidity': '86', 'weatherDesc': [{'value': 'Clear'}], 'observation_time': '12:16 AM'}}" + "{'current_condition': {'temp_C': '9', 'FeelsLikeC': '8', 'humidity': '93', 'weatherDesc': [{'value': 'Sunny'}], 'observation_time': '04:39 AM'}}" @@ -727,7 +657,7 @@ Create AI agent demo with Gradio UI tools = [wikipedia, painting, weather] agent = StructuredChatAgent.from_llm_and_tools( - ov_chat, + ov_llm, tools, prefix=PREFIX, suffix=SUFFIX, @@ -773,7 +703,7 @@ Create AI agent demo with Gradio UI def request_cancel(): - ov_chat.llm.pipeline.model.request.cancel() + ov_llm.pipeline.model.request.cancel() .. code:: ipython3 @@ -793,6 +723,50 @@ Create AI agent demo with Gradio UI # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ + +.. parsed-literal:: + + + + > Entering new AgentExecutor chain... + Thought: I need to use the weather tool to get the current weather in London, then use the painting tool to generate a picture of Big Ben based on the weather information. + + Action: + ``` + { + "action": "weather", + "action_input": "London" + } + ``` + + Observation: + Observation: {'current_condition': {'temp_C': '9', 'FeelsLikeC': '8', 'humidity': '93', 'weatherDesc': [{'value': 'Sunny'}], 'observation_time': '04:39 AM'}} + Thought: I have the current weather in London. Now I can use the painting tool to generate a picture of Big Ben based on the weather information. + + Action: + ``` + { + "action": "painting", + "action_input": "Big Ben, sunny day" + } + ``` + + Observation: + Observation: {image_url: "https://image.pollinations.ai/prompt/Big%20Ben%2C%20sunny%20day"} + Thought: I have the image URL of Big Ben on a sunny day. Now I can respond to the human with the image URL. + + Action: + ``` + { + "action": "Final Answer", + "action_input": "Here is the image of Big Ben on a sunny day: https://image.pollinations.ai/prompt/Big%20Ben%2C%20sunny%20day" + } + ``` + Observation: + + > Finished chain. + + .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst index 791355276fd2fd..653b57a491dbf2 100644 --- a/docs/notebooks/llm-agent-react-with-output.rst +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -62,22 +62,22 @@ Prerequisites import os import requests - - + + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", ) open("pip_helper.py", "w").write(r.text) - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + from pip_helper import pip_install - + pip_install( "-q", "--extra-index-url", @@ -106,32 +106,33 @@ folder. Large Language Models (LLMs) are a core component of agent. LlamaIndex does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs. In this example, we can select -``Qwen2.5`` as LLM in agent pipeline. \* -**qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - -Qwen2.5 is the latest series of Qwen large language models. Comparing -with Qwen2, Qwen2.5 series brings significant improvements in coding, -mathematics and general knowledge skills. Additionally, it brings -long-context and multiple languages support including Chinese, English, -French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, -Vietnamese, Thai, Arabic, and more. For more details, please refer to -`model_card `__, -`blog `__, -`GitHub `__, and -`Documentation `__. +``Qwen2.5`` as LLM in agent pipeline. + +* **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - + Qwen2.5 is the latest series of Qwen large language models. Comparing + with Qwen2, Qwen2.5 series brings significant improvements in coding, + mathematics and general knowledge skills. Additionally, it brings + long-context and multiple languages support including Chinese, English, + French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, + Vietnamese, Thai, Arabic, and more. For more details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and + `Documentation `__. .. code:: ipython3 import ipywidgets as widgets - + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] - + llm_model_id = widgets.Dropdown( options=llm_model_ids, value=llm_model_ids[0], description="Model:", disabled=False, ) - + llm_model_id @@ -146,9 +147,9 @@ Vietnamese, Thai, Arabic, and more. For more details, please refer to .. code:: ipython3 from pathlib import Path - + llm_model_path = llm_model_id.value.split("/")[-1] - + if not Path(llm_model_path).exists(): !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} @@ -160,9 +161,9 @@ Select inference device for LLM .. code:: ipython3 from notebook_utils import device_widget - + llm_device = device_widget("CPU", exclude=["NPU"]) - + llm_device @@ -226,15 +227,15 @@ guide `__ import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - + import json import json5 import torch - + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + llm = OVModelForCausalLM.from_pretrained( llm_model_path, device=llm_device.value, @@ -242,7 +243,7 @@ guide `__ config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), trust_remote_code=True, ) - + llm.generation_config.top_k = 1 llm.generation_config.max_length = 2000 @@ -260,31 +261,31 @@ received from tool calling.. class StopSequenceCriteria(StoppingCriteria): """ This class can be used to stop generation whenever a sequence of tokens is encountered. - + Args: stop_sequences (`str` or `List[str]`): The sequence (or list of sequences) on which to stop execution. tokenizer: The tokenizer used to decode the model outputs. """ - + def __init__(self, stop_sequences, tokenizer): if isinstance(stop_sequences, str): stop_sequences = [stop_sequences] self.stop_sequences = stop_sequences self.tokenizer = tokenizer - + def __call__(self, input_ids, scores, **kwargs) -> bool: decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) - - + + def text_completion(prompt: str, stop_words) -> str: im_end = "<|im_end|>" if im_end not in stop_words: stop_words = stop_words + [im_end] streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) - + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) input_ids = torch.tensor([tokenizer.encode(prompt)]) generate_kwargs = dict( @@ -297,7 +298,7 @@ received from tool calling.. output = tokenizer.decode(output, errors="ignore") assert output.startswith(prompt) output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") - + for stop_str in stop_words: idx = output.find(stop_str) if idx != -1: @@ -339,13 +340,13 @@ parameter should be a sequence of messages that contains the .. code:: ipython3 TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" - + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: - + {tools_text} - + Use the following format: - + Question: the input question you must answer Thought: you should always think about what to do Action: the action to take, should be one of [{tools_name_text}] @@ -354,9 +355,9 @@ parameter should be a sequence of messages that contains the ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) Thought: I now know the final answer Final Answer: the final answer to the original input question - + Begin! - + Question: {query}""" Meanwhile we have to create function for consolidate the tools @@ -381,9 +382,9 @@ information and conversation history into the prompt template. raise NotImplementedError tools_text.append(tool) tools_text = "\n\n".join(tools_text) - + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) - + messages = [{"role": "system", "content": "You are a helpful assistant."}] for i, (query, response) in enumerate(chat_history): if list_of_tool_info: @@ -397,9 +398,9 @@ information and conversation history into the prompt template. messages.append({"role": "user", "content": query}) if response: messages.append({"role": "assistant", "content": response}) - + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") - + return prompt Create parser @@ -493,7 +494,7 @@ execute them according to the output of LLM. return str(ret) elif tool_name == "image_gen": import urllib.parse - + tool_args = tool_args.replace("(", "").replace(")", "") prompt = json5.loads(tool_args)["prompt"] prompt = urllib.parse.quote(prompt) @@ -503,11 +504,11 @@ execute them according to the output of LLM. ) else: raise NotImplementedError - - + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] - + planning_prompt = build_input_text(chat_history, list_of_tool_info) text = "" while True: @@ -522,7 +523,7 @@ execute them according to the output of LLM. else: text += output break - + new_history = [] new_history.extend(history) new_history.append({"user": prompt, "bot": text}) @@ -537,7 +538,7 @@ Run agent history = [] query = "get the weather in London, and create a picture of Big Ben based on the weather information" - + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index c09b463ae985d0..dab94c37d77a4c 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -81,9 +81,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install -q -U "openvino>=2024.3.0" openvino-tokenizers[transformers] openvino-genai @@ -103,12 +103,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -127,7 +127,7 @@ Install required dependencies r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py") with open("llm_config.py", "w", encoding="utf-8") as f: f.write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -238,7 +238,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -270,7 +270,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -304,7 +304,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -338,7 +338,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -399,7 +399,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -432,7 +432,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -466,7 +466,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -500,7 +500,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -531,7 +531,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -644,9 +644,9 @@ Click here to see available models options .. code:: ipython3 from llm_config import get_llm_selection_widget - + form, lang, model_id_widget, compression_variant, use_preconverted = get_llm_selection_widget() - + form @@ -668,7 +668,7 @@ Click here to see available models options .. parsed-literal:: Selected model qwen2-0.5b-instruct with INT4 compression - + Convert model using Optimum-CLI tool ------------------------------------ @@ -676,7 +676,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ -is the interface between the +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -749,12 +749,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. You can enable AWQ to @@ -776,28 +777,28 @@ be additionally applied during model export with INT4 precision using .. code:: ipython3 from llm_config import convert_and_compress_model - + model_dir = convert_and_compress_model(model_id, model_configuration, compression_variant.value, use_preconverted.value) .. parsed-literal:: ✅ INT4 qwen2-0.5b-instruct model already converted and can be found in qwen2/INT4_compressed_weights - + Let’s compare model size for different compression types .. code:: ipython3 from llm_config import compare_model_size - + compare_model_size(model_dir) .. parsed-literal:: Size of model with INT4 compressed weights is 358.86 MB - + Select device for inference --------------------------- @@ -807,9 +808,9 @@ Select device for inference .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="CPU", exclude=["NPU"]) - + device @@ -851,15 +852,15 @@ of the available generation parameters more deeply later. .. code:: ipython3 - import openvino_genai as ov_genai - + from openvino_genai import LLMPipeline + print(f"Loading model from {model_dir}\n") - - - pipe = ov_genai.LLMPipeline(str(model_dir), device.value) - + + + pipe = LLMPipeline(str(model_dir), device.value) + generation_config = pipe.get_generation_config() - + input_prompt = "The Sun is yellow bacause" print(f"Input text: {input_prompt}") print(pipe.generate(input_prompt, max_new_tokens=10)) @@ -868,10 +869,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: Loading model from qwen2/INT4_compressed_weights - + Input text: The Sun is yellow bacause it is made of hydrogen and oxygen atoms. The - + Run Chatbot ----------- @@ -1022,11 +1023,11 @@ Click here to see detailed description of advanced options if not Path("gradio_helper_genai.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper_genai.py") open("gradio_helper_genai.py", "w").write(r.text) - + from gradio_helper_genai import make_demo - + demo = make_demo(pipe, model_configuration, model_id, lang.value) - + try: demo.launch(debug=True) except Exception: diff --git a/docs/notebooks/llm-question-answering-with-output.rst b/docs/notebooks/llm-question-answering-with-output.rst index f9c792ba1657d6..2feb5ce81a08f5 100644 --- a/docs/notebooks/llm-question-answering-with-output.rst +++ b/docs/notebooks/llm-question-answering-with-output.rst @@ -581,9 +581,9 @@ generation is finished, we will write class-iterator based on .. code:: ipython3 - import openvino_genai as ov_genai + from openvino_genai import LLMPipeline - pipe = ov_genai.LLMPipeline(model_dir.as_posix(), device.value) + pipe = LLMPipeline(model_dir.as_posix(), device.value) print(pipe.generate("The Sun is yellow bacause", temperature=1.2, top_k=4, do_sample=True, max_new_tokens=150)) @@ -675,6 +675,7 @@ Setup imports from time import perf_counter from typing import List import numpy as np + from openvino_genai import StreamerBase from queue import Queue import re @@ -694,7 +695,7 @@ when it is needed. It will help estimate performance. detokinizer_dir = Path(model_dir, "openvino_detokenizer.xml") - class TextIteratorStreamer(ov_genai.StreamerBase): + class TextIteratorStreamer(StreamerBase): def __init__(self, tokenizer): super().__init__() self.tokenizer = tokenizer diff --git a/docs/notebooks/localai-with-output.rst b/docs/notebooks/localai-with-output.rst deleted file mode 100644 index fac17b8d241d82..00000000000000 --- a/docs/notebooks/localai-with-output.rst +++ /dev/null @@ -1,220 +0,0 @@ -LocalAI and OpenVINO -==================== - -`LocalAI `__ is the free, Open Source OpenAI -alternative. LocalAI act as a drop-in replacement REST API that’s -compatible with OpenAI API specifications for local inferencing. It -allows you to run LLMs, generate images, audio (and not only) locally or -on-prem with consumer grade hardware, supporting multiple model families -and architectures. Does not require GPU. It is created and maintained by -``Ettore Di Giacinto``. - -In this tutorial we show how to prepare a model config and launch an -OpenVINO LLM model with LocalAI in docker container. - - -**Table of contents:** - - -- `Prepare Docker <#prepare-docker>`__ -- `Prepare a model <#prepare-a-model>`__ -- `Run the server <#run-the-server>`__ -- `Send a client request <#send-a-client-request>`__ -- `Stop the server <#stop-the-server>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prepare Docker --------------- - -Install `Docker -Engine `__, including its -`post-installation `__ -steps, on your development system. To verify installation, test it, -using the following command. When it is ready, it will display a test -image and a message. - -.. code:: ipython3 - - !docker run hello-world - - -.. parsed-literal:: - - Unable to find image 'hello-world:latest' locally - latest: Pulling from library/hello-world - - Digest: sha256:305243c734571da2d100c8c8b3c3167a098cab6049c9a5b066b6021a60fcb966 - Status: Downloaded newer image for hello-world:latest - - Hello from Docker! - This message shows that your installation appears to be working correctly. - - To generate this message, Docker took the following steps: - 1. The Docker client contacted the Docker daemon. - 2. The Docker daemon pulled the "hello-world" image from the Docker Hub. - (amd64) - 3. The Docker daemon created a new container from that image which runs the - executable that produces the output you are currently reading. - 4. The Docker daemon streamed that output to the Docker client, which sent it - to your terminal. - - To try something more ambitious, you can run an Ubuntu container with: - $ docker run -it ubuntu bash - - Share images, automate workflows, and more with a free Docker ID: - https://hub.docker.com/ - - For more examples and ideas, visit: - https://docs.docker.com/get-started/ - - - -Prepare a model -~~~~~~~~~~~~~~~ - - - -LocalAI allows to use customized models. For more details you can read -the -`instruction `__ -where you can also find the detailed documentation. We will use one of -the OpenVINO optimized LLMs in the collection on the `collection on -🤗Hugging -Face `__. -In this example we will use -`TinyLlama-1.1B-Chat-v1.0-fp16-ov `__. -First of all we should create a model configuration file: - -.. code:: yaml - - name: TinyLlama-1.1B-Chat-v1.0-fp16-ov - backend: transformers - parameters: - model: OpenVINO/TinyLlama-1.1B-Chat-v1.0-fp16-ov - temperature: 0.2 - top_k: 40 - top_p: 0.95 - max_new_tokens: 32 - - type: OVModelForCausalLM - - template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}}<|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - - completion: | - {{.Input}} - - stopwords: - - <|im_end|> - -The fields ``backend``, ``model``, ``type`` you can find in the code -example on the model page (we added the corresponding comments): - -.. code:: python - - from transformers import AutoTokenizer # backend - from optimum.intel.openvino import OVModelForCausalLM # type - - model_id = "OpenVINO/TinyLlama-1.1B-Chat-v1.0-fp16-ov" # parameters.model - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = OVModelForCausalLM.from_pretrained(model_id) - -The name you can choose by yourself. By this name you will specify what -model to use on the client side. - -You can create a GitHub gist and modify fields: -`ov.yaml `__ - -Description of the parameters used in config YAML file can be found -`here `__. - -The most important: - -- ``name`` - model name, used to identify the model in API calls. -- ``backend`` - backend to use for computation (like llama-cpp, - diffusers, whisper, transformers). -- ``parameters.model`` - relative to the models path. -- ``temperature``, ``top_k``, ``top_p``, ``max_new_tokens`` - - parameters for the model. -- ``type`` - type of configuration, often related to the type of task - or model architecture. -- ``template`` - templates for various types of model interactions. -- ``stopwords`` - Words or phrases that halts processing. - -Run the server -~~~~~~~~~~~~~~ - - - -Everything is ready for launch. Use -``quay.io/go-skynet/local-ai:v2.23.0-ffmpeg`` image that contains all -required dependencies. For more details read `Run with container -images `__. -If you want to see the output remove the ``-d`` flag and send a client -request from a separate notebook. - -.. code:: ipython3 - - !docker run -d --rm --name="localai" -p 8080:8080 quay.io/go-skynet/local-ai:master-sycl-f16-ffmpeg https://gist.githubusercontent.com/aleksandr-mokrov/f007c8fa6036760a856ddc60f605a0b0/raw/9d24ceeb487f9c058a943113bd0290e8ae565b3e/ov.yaml - - -.. parsed-literal:: - - 67e1a2a8123aa15794c027278aed2c258a04e06883663459bbeaca22ff014740 - docker: Error response from daemon: failed to create task for container: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running hook #1: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'legacy' - nvidia-container-cli: requirement error: invalid expression: unknown. - - -Check whether the ``localai`` container is running normally: - -.. code:: ipython3 - - !docker ps | grep localai - -Send a client request -~~~~~~~~~~~~~~~~~~~~~ - - - -Now you can send HTTP requests using the model name -``TinyLlama-1.1B-Chat-v1.0-fp16-ov``. More details how to use `OpenAI -API `__. - -.. code:: ipython3 - - !curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{"model": "TinyLlama-1.1B-Chat-v1.0-fp16-ov", "prompt": "What is OpenVINO?"}' - - -.. parsed-literal:: - - curl: (7) Failed to connect to localhost port 8080: Connection refused - - -Stop the server -~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - !docker stop localai - - -.. parsed-literal:: - - Error response from daemon: No such container: localai - diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index f15167eae183b1..3ef21583fa5807 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -41,7 +41,6 @@ post `__ @@ -78,13 +77,8 @@ Prerequisites .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - supervision 0.25.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. + supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. - tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. - tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index 0ac9308155d4b7..c1317625880917 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -135,7 +135,7 @@ DeepLabV3P pre-trained models from PaddlePaddle community. .. parsed-literal:: - meter_det_model.tar.gz: 0%| | 0.00/192M [00:00 + diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png index 52a1b757cb6589..f5f465e5e0dad2 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08c5ae3bb47e095d707bdaa7f8008bed7eeb1f672c82ae4d63334e665ec3e4d8 +oid sha256:5277177823d4b99e277b1ecd207f67b850c5fd312974c2e691e260e016811526 size 170121 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png index 7151cac5e2d0e8..373f323c93bd56 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6433ef738eeb00f8d0dc4343ab289073c76321d2e12fe46318fbe374b0f745e2 +oid sha256:889b083b05c7dd518506e68c76a9c7e78d2cbc1273606e1edbd3c2f308a49d9e size 190271 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png index 05c23937df9fe5..6c3df0677c7f11 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d67df91f05c9aeb0442a1c4aaef7527cf27e9be0938642eed807f8b5342aa7b +oid sha256:8594e7ed5ce58de7b10de8aa066fa4f9adc43308be46e2ef4dd208da4913301e size 26914 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png index 61e57d642da114..20a9bb7513c0bc 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50b9f932b844d99b59b51f2c6947dd048f96bf1553fe36de3975d3a3ad1715e4 +oid sha256:eafe2bfb1d91093d1208523063def5d5b4d13285153568d173c302b3d600adfa size 8966 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png index b113bcf292fe00..4647a76e34c861 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad7114f80f8925643c865222d0fe0e05d4f65ab54e0b0d354edebe3e5c1ade7c +oid sha256:91a9b23ec86373699c0dbbb252a2cb1b9351ebb08b771a79a4fec4bffbb1787d size 170338 diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index c130f9e0c08d67..98f1217902a587 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -31,10 +31,11 @@ techniques like weights compression using - `Prepare model inference pipeline <#prepare-model-inference-pipeline>`__ +- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Select device <#select-device>`__ + - `Select language model variant <#select-language-model-variant>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -54,19 +55,30 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.45" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" "huggingface-hub>=0.24.0" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.14.0" - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q -U "openvino>=2024.5" "openvino-tokenizers>=2024.5" "openvino-genai>=2024.5" + %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.40" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" "huggingface-hub>=0.24.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.3.0" "nncf>=2.12.0" + + +.. parsed-literal:: + + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 import requests from pathlib import Path - if not Path("cmd_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") - open("cmd_helper.py", "w").write(r.text) + if not Path("minicpm_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/minicpm-v-multimodal-chatbot/minicpm_helper.py") + open("minicpm_helper.py", "w").write(r.text) if not Path("gradio_helper.py").exists(): @@ -85,36 +97,184 @@ Convert model to OpenVINO Intermediate Representation OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation format. For convenience, we will use OpenVINO integration -with HuggingFace Optimum. `Optimum -Intel `__ is the -interface between the Transformers and Diffusers libraries and the -different tools and libraries provided by Intel to accelerate end-to-end -pipelines on Intel architectures. - -Among other use cases, Optimum Intel provides a simple interface to -optimize your Transformers and Diffusers models, convert them to the -OpenVINO Intermediate Representation (IR) format and run inference using -OpenVINO Runtime. ``optimum-cli`` provides command line interface for -model conversion and optimization. - -General command format: - -.. code:: bash - - optimum-cli export openvino --model --task - -where task is task to export the model for, if not specified, the task -will be auto-inferred based on the model. You can find a mapping between -tasks and model classes in Optimum TaskManager -`documentation `__. -Additionally, you can specify weights compression using -``--weight-format`` argument with one of following options: ``fp32``, -``fp16``, ``int8`` and ``int4``. Fro int8 and int4 -`nncf `__ will be used for -weight compression. More details about model export provided in `Optimum -Intel -documentation `__. +Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +``minicpm_helper.py`` script contains helper function for model +conversion, please check its content if you interested in conversion +details. + +.. raw:: html + +
+ +.. raw:: html + + + +Click here for more detailed explanation of conversion steps + +.. raw:: html + + + +MiniCPM-V2.6 is autoregressive transformer generative model, it means +that each next model step depends from model output from previous step. +The generation approach is based on the assumption that the probability +distribution of a word sequence can be decomposed into the product of +conditional next word distributions. In other words, model predicts the +next token in the loop guided by previously generated tokens until the +stop-condition will be not reached (generated sequence of maximum length +or end of string token obtained). The way the next token will be +selected over predicted probabilities is driven by the selected decoding +methodology. You can find more information about the most popular +decoding methods in this +`blog `__. The entry point +for the generation process for models from the Hugging Face Transformers +library is the ``generate`` method. You can find more information about +its parameters and configuration in the +`documentation `__. +To preserve flexibility in the selection decoding methodology, we will +convert only model inference for one step. + +The inference flow has difference on first step and for the next. On the +first step, model accept preprocessed input instruction and image, that +transformed to the unified embedding space using ``input_embedding`` and +``image encoder`` models, after that ``language model``, LLM-based part +of model, runs on input embeddings to predict probability of next +generated tokens. On the next step, ``language_model`` accepts only next +token id selected based on sampling strategy and processed by +``input_embedding`` model and cached attention key and values. Since the +output side is auto-regressive, an output token hidden state remains the +same once computed for every further generation step. Therefore, +recomputing it every time you want to generate a new token seems +wasteful. With the cache, the model saves the hidden state once it has +been computed. The model only computes the one for the most recently +generated output token at each time step, re-using the saved ones for +hidden tokens. This reduces the generation complexity from +:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details +about how it works can be found in this +`article `__. + +With increasing model size like in modern LLMs, we also can note an +increase in the number of attention blocks and size past key values +tensors respectively. The strategy for handling cache state as model +inputs and outputs in the inference cycle may become a bottleneck for +memory-bounded systems, especially with processing long input sequences, +for example in a chatbot scenario. OpenVINO suggests a transformation +that removes inputs and corresponding outputs with cache tensors from +the model keeping cache handling logic inside the model. Such models are +also called stateful. A stateful model is a model that implicitly +preserves data between two consecutive inference calls. The tensors +saved from one run are kept in an internal memory buffer called a +``state`` or a ``variable`` and may be passed to the next run, while +never being exposed as model output. Hiding the cache enables storing +and updating the cache values in a more device-friendly representation. +It helps to reduce memory consumption and additionally optimize model +performance. More details about stateful models and working with state +can be found in `OpenVINO +documentation `__. + +In LLMs, ``input_embedding`` is a part of language model, but for +multimodal case, the first step hidden state produced by this model part +should be integrated with image embeddings into common embedding space. +For ability to reuse this model part and avoid introduction of llm model +instance, we will use it separately. + +``image_encoder`` is represented in MiniCPM-V by pretrained +`SigLIP `__ +model. Additionally, MiniCPM uses perceiver ``resampler`` that +compresses the image representations. To preserve model ability to +process images of different size with respect aspect ratio combined in +batch, we will use ``image_encoder`` and ``resampler`` as separated +models. + +To sum up above, model consists of 4 parts: + +- **Image Encoder** for encoding input images into embedding space. It + includes SigLIP model. +- **Resampler** for compression image representation. +- **Input Embedding** for conversion input text tokens into embedding + space. +- **Language Model** for generation answer based on input embeddings + provided by Image Encoder and Input Embedding models. + +Let’s convert each model part. + +.. raw:: html + +
+ +.. code:: ipython3 + + from minicpm_helper import convert_minicpmv26 + + # uncomment the line to see model conversion code + # ??convert_minicpmv26 + + +.. parsed-literal:: + + 2024-10-07 09:57:53.402018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-07 09:57:53.403877: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-10-07 09:57:53.440490: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-10-07 09:57:54.270302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. code:: ipython3 + + model_id = "openbmb/MiniCPM-V-2_6" + + model_dir = convert_minicpmv26(model_id) + + +.. parsed-literal:: + + ⌛ openbmb/MiniCPM-V-2_6 conversion started. Be patient, it may takes some time. + ⌛ Load Original model + + + +.. parsed-literal:: + + Loading checkpoint shards: 0%| | 0/4 [00:00 +.. + + **Note:** weights compression process may require additional time and + memory for performing. You can disable it using widget below: + +.. code:: ipython3 + + from minicpm_helper import compression_widget + + to_compress_weights = compression_widget() + + to_compress_weights + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Weights Compression') + + + .. code:: ipython3 - from cmd_helper import optimum_cli import nncf - import openvino as ov - import shutil import gc + import openvino as ov + + from minicpm_helper import llm_path, copy_llm_files + + compression_configuration = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0, "all_layers": True} - def compress_lm_weights(model_dir): - compression_configuration = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0, "all_layers": True} - ov_model_path = model_dir / "openvino_language_model.xml" - ov_int4_model_path = model_dir / "openvino_language_model_int4.xml" - ov_model = ov.Core().read_model(ov_model_path) + + core = ov.Core() + llm_int4_path = Path("language_model_int4") / llm_path.name + if to_compress_weights.value and not (model_dir / llm_int4_path).exists(): + ov_model = core.read_model(model_dir / llm_path) ov_compressed_model = nncf.compress_weights(ov_model, **compression_configuration) - ov.save_model(ov_compressed_model, ov_int4_model_path) + ov.save_model(ov_compressed_model, model_dir / llm_int4_path) del ov_compressed_model del ov_model gc.collect() - ov_model_path.unlink() - ov_model_path.with_suffix(".bin").unlink() - shutil.move(ov_int4_model_path, ov_model_path) - shutil.move(ov_int4_model_path.with_suffix(".bin"), ov_model_path.with_suffix(".bin")) - - - model_id = "openbmb/MiniCPM-V-2_6" - model_dir = Path(model_id.split("/")[-1] + "-ov") - - if not model_dir.exists(): - optimum_cli(model_id, model_dir, additional_args={"trust-remote-code": "", "weight-format": "fp16", "task": "image-text-to-text"}) - compress_lm_weights(model_dir) + copy_llm_files(model_dir, llm_int4_path.parent) .. parsed-literal:: @@ -221,27 +394,32 @@ Prepare model inference pipeline .. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/2727402e-3697-442e-beca-26b149967c84 -`OpenVINO™ GenAI `__ -is a library of the most popular Generative AI model pipelines, -optimized execution methods, and samples that run on top of highly -performant `OpenVINO -Runtime `__. - -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality -(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor -of OpenVINO™, aiming to simplify running inference of generative AI -models. It hides the complexity of the generation process and minimizes -the amount of code required. - -Inference Visual language models can be implemented using OpenVINO GenAI -``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in -this -`notebook `__. -It supports chat mode with preserving conversational history inside -pipeline, that allows us effectively implements chatbot that supports -conversation about provided images content. +As discussed, the model comprises Image Encoder and LLM (with separated +text embedding part) that generates answer. In ``minicpm_helper.py`` we +defined LLM inference class ``OvModelForCausalLMWithEmb`` that will +represent generation cycle, It is based on `HuggingFace Transformers +GenerationMixin `__ +and looks similar to `Optimum +Intel `__ +``OVModelForCausalLM``\ that is used for LLM inference with only +difference that it can accept input embedding. In own turn, general +multimodal model class ``OvMiniCPMVModel`` handles chatbot functionality +including image processing and answer generation using LLM. + +.. code:: ipython3 + + from minicpm_helper import OvModelForCausalLMWithEmb, OvMiniCPMV, init_model # noqa: F401 + + # uncomment the line to see model inference class + # ??OVMiniCPMV + + # uncomment the line to see language model inference class + # ??OvModelForCausalLMWithEmb + +Run OpenVINO model inference +---------------------------- + + Select device ~~~~~~~~~~~~~ @@ -265,78 +443,46 @@ Select device +Select language model variant +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + .. code:: ipython3 - import openvino_genai as ov_genai + from minicpm_helper import lm_variant_selector + - ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) + use_int4_lang_model = lm_variant_selector(model_dir / llm_int4_path) + + use_int4_lang_model -Run OpenVINO model inference ----------------------------- -For preparing input data, ``VLMPipeline`` use tokenizer and image -processor inside, we just need to convert image to input OpenVINO tensor -and provide question as string. Additionally, we can provides options -for controlling generation process (e.g. number of maximum generated -tokens or using multinomial sampling for decoding instead of greedy -search approach) using ``GenerationConfig``. +.. parsed-literal:: + + Checkbox(value=True, description='INT4 language model') + -Generation process for long response may be time consuming, for -accessing partial result as soon as it is generated without waiting when -whole process finished, Streaming API can be used. Token streaming is -the mode in which the generative system returns the tokens one by one as -the model generates them. This enables showing progressive generations -to the user rather than waiting for the whole generation. Streaming is -an essential aspect of the end-user experience as it reduces latency, -one of the most critical aspects of a smooth experience. .. code:: ipython3 - import requests - from PIL import Image - from io import BytesIO - import numpy as np - - image_path = "cat.png" - - - config = ov_genai.GenerationConfig() - config.max_new_tokens = 100 - - - def load_image(image_file): - if isinstance(image_file, str) and (image_file.startswith("http") or image_file.startswith("https")): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) - return image, ov.Tensor(image_data) - - - def streamer(subword: str) -> bool: - """ - - Args: - subword: sub-word of the generated text. - - Returns: Return flag corresponds whether generation should be stopped. - - """ - print(subword, end="", flush=True) - + ov_model = init_model(model_dir, llm_path.parent if not use_int4_lang_model.value else llm_int4_path.parent, device.value) + + +.. parsed-literal:: + + applied slice for lm head - if not Path(image_path).exists(): - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" - image = Image.open(requests.get(url, stream=True).raw) - image.save(image_path) .. code:: ipython3 - image, image_tensor = load_image(image_path) + import requests + from PIL import Image + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + image = Image.open(requests.get(url, stream=True).raw) question = "What is unusual on this image?" print(f"Question:\n{question}") @@ -351,19 +497,30 @@ one of the most critical aspects of a smooth experience. -.. image:: minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png +.. image:: minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png .. code:: ipython3 - ov_model.start_chat() - output = ov_model.generate(question, image=image_tensor, generation_config=config, streamer=streamer) + tokenizer = ov_model.processor.tokenizer + + msgs = [{"role": "user", "content": question}] + + + print("Answer:") + res = ov_model.chat(image=image, msgs=msgs, context=None, tokenizer=tokenizer, sampling=False, stream=True, max_new_tokens=50) + + generated_text = "" + for new_text in res: + generated_text += new_text + print(new_text, flush=True, end="") .. parsed-literal:: - The unusual aspect of this image is the cat's relaxed and vulnerable position. Typically, cats avoid exposing their bellies, which are sensitive and vulnerable areas, to potential threats. In this image, the cat is lying on its back in a cardboard box, exposing its belly and hindquarters, which is not a common sight. This behavior could indicate that the cat feels safe and comfortable in its environment, suggesting a strong bond with its owner and a sense of security in its home. + Answer: + The unusual aspect of this image is the cat's relaxed and vulnerable position. Typically, cats avoid exposing their bellies to potential threats or dangers because it leaves them open for attack by predators in nature; however here we see a domesticated pet comfortably lying Interactive demo ---------------- diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.jpg b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.jpg deleted file mode 100644 index c6aeec77cd3cb2..00000000000000 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e -size 60425 diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png deleted file mode 100644 index c6673a757ab5dc..00000000000000 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 -size 854224 diff --git a/docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.jpg b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.jpg similarity index 100% rename from docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.jpg rename to docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.jpg diff --git a/docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.png b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png similarity index 100% rename from docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.png rename to docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png diff --git a/docs/notebooks/mllama-3.2-with-output.rst b/docs/notebooks/mllama-3.2-with-output.rst index 19ebd2d658174e..ba338d67dc677e 100644 --- a/docs/notebooks/mllama-3.2-with-output.rst +++ b/docs/notebooks/mllama-3.2-with-output.rst @@ -53,9 +53,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "torchvision" "Pillow" "tqdm" "datasets>=2.14.6" "gradio>=4.36" "nncf>=2.14.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision" "Pillow" "tqdm" "datasets>=2.14.6" "gradio>=4.36" "nncf>=2.13.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "transformers>=4.45" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -Uq "openvino>=2024.5.0" + %pip install -Uq --pre "openvino>2024.4.0" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly .. code:: ipython3 diff --git a/docs/notebooks/mobileclip-video-search-with-output.rst b/docs/notebooks/mobileclip-video-search-with-output.rst index 6e6dac1e1cf214..a606830470aa94 100644 --- a/docs/notebooks/mobileclip-video-search-with-output.rst +++ b/docs/notebooks/mobileclip-video-search-with-output.rst @@ -62,176 +62,39 @@ Prerequisites .. code:: ipython3 - import requests - - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", - ) - open("cmd_helper.py", "w").write(r.text) - - - - -.. parsed-literal:: - - 1491 - - - -.. code:: ipython3 - - from cmd_helper import clone_repo - - - clone_repo("https://github.com/apple/ml-mobileclip.git") + from pathlib import Path + repo_dir = Path("./ml-mobileclip") + if not repo_dir.exists(): + !git clone https://github.com/apple/ml-mobileclip.git .. parsed-literal:: - PosixPath('ml-mobileclip') - + Cloning into 'ml-mobileclip'... + remote: Enumerating objects: 95, done. + remote: Counting objects: 100% (95/95), done. + remote: Compressing objects: 100% (66/66), done. + remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) + Unpacking objects: 100% (95/95), 469.11 KiB | 3.13 MiB/s, done. .. code:: ipython3 %pip install -q "./ml-mobileclip" --no-deps - %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=2.5.0" "torchvision>=0.20.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "matplotlib>=3.4" "Pillow" "altair" "pandas" "tqdm" "salesforce-lavis==1.0.2" + %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - ERROR: Could not find a version that satisfies the requirement torch>=2.5.0 (from versions: 1.4.0, 1.4.0+cpu, 1.5.0, 1.5.0+cpu, 1.5.1, 1.5.1+cpu, 1.6.0, 1.6.0+cpu, 1.7.0, 1.7.0+cpu, 1.7.1, 1.7.1+cpu, 1.8.0, 1.8.0+cpu, 1.8.1, 1.8.1+cpu, 1.9.0, 1.9.0+cpu, 1.9.1, 1.9.1+cpu, 1.10.0, 1.10.0+cpu, 1.10.1, 1.10.1+cpu, 1.10.2, 1.10.2+cpu, 1.11.0, 1.11.0+cpu, 1.12.0, 1.12.0+cpu, 1.12.1, 1.12.1+cpu, 1.13.0, 1.13.0+cpu, 1.13.1, 1.13.1+cpu, 2.0.0, 2.0.0+cpu, 2.0.1, 2.0.1+cpu, 2.1.0, 2.1.0+cpu, 2.1.1, 2.1.1+cpu, 2.1.2, 2.1.2+cpu, 2.2.0, 2.2.0+cpu, 2.2.1, 2.2.1+cpu, 2.2.2, 2.2.2+cpu, 2.3.0, 2.3.0+cpu, 2.3.1, 2.3.1+cpu, 2.4.0, 2.4.0+cpu, 2.4.1, 2.4.1+cpu) - ERROR: No matching distribution found for torch>=2.5.0 - Note: you may need to restart the kernel to use updated packages. - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> [92 lines of output] - Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment - Collecting setuptools - Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) - Collecting cython<3.0,>=0.25 - Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) - Collecting cymem<2.1.0,>=2.0.2 - Using cached cymem-2.0.10.tar.gz (10 kB) - Installing build dependencies: started - Installing build dependencies: finished with status 'done' - Getting requirements to build wheel: started - Getting requirements to build wheel: finished with status 'done' - Preparing metadata (pyproject.toml): started - Preparing metadata (pyproject.toml): finished with status 'done' - Collecting preshed<3.1.0,>=3.0.2 - Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) - Collecting murmurhash<1.1.0,>=0.28.0 - Using cached murmurhash-1.0.11.tar.gz (13 kB) - Installing build dependencies: started - Installing build dependencies: finished with status 'done' - Getting requirements to build wheel: started - Getting requirements to build wheel: finished with status 'done' - Preparing metadata (pyproject.toml): started - Preparing metadata (pyproject.toml): finished with status 'done' - Collecting thinc<8.4.0,>=8.3.0 - Using cached thinc-8.3.2.tar.gz (193 kB) - Installing build dependencies: started - Installing build dependencies: finished with status 'error' - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> [50 lines of output] - Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment - Collecting setuptools - Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) - Collecting cython<3.0,>=0.25 - Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) - Collecting murmurhash<1.1.0,>=1.0.2 - Using cached murmurhash-1.0.11.tar.gz (13 kB) - Installing build dependencies: started - Installing build dependencies: finished with status 'done' - Getting requirements to build wheel: started - Getting requirements to build wheel: finished with status 'done' - Preparing metadata (pyproject.toml): started - Preparing metadata (pyproject.toml): finished with status 'done' - Collecting cymem<2.1.0,>=2.0.2 - Using cached cymem-2.0.10.tar.gz (10 kB) - Installing build dependencies: started - Installing build dependencies: finished with status 'done' - Getting requirements to build wheel: started - Getting requirements to build wheel: finished with status 'done' - Preparing metadata (pyproject.toml): started - Preparing metadata (pyproject.toml): finished with status 'done' - Collecting preshed<3.1.0,>=3.0.2 - Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) - Collecting blis<1.1.0,>=1.0.0 - Using cached blis-1.0.1.tar.gz (3.6 MB) - Installing build dependencies: started - Installing build dependencies: finished with status 'error' - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> [8 lines of output] - Collecting setuptools - Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) - Collecting cython>=0.25 - Using cached Cython-3.0.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB) - ERROR: Ignored the following versions that require a different python version: 1.25.0 Requires-Python >=3.9; 1.25.1 Requires-Python >=3.9; 1.25.2 Requires-Python >=3.9; 1.26.0 Requires-Python <3.13,>=3.9; 1.26.1 Requires-Python <3.13,>=3.9; 1.26.2 Requires-Python >=3.9; 1.26.3 Requires-Python >=3.9; 1.26.4 Requires-Python >=3.9; 2.0.0 Requires-Python >=3.9; 2.0.1 Requires-Python >=3.9; 2.0.2 Requires-Python >=3.9; 2.1.0 Requires-Python >=3.10; 2.1.0rc1 Requires-Python >=3.10; 2.1.1 Requires-Python >=3.10; 2.1.2 Requires-Python >=3.10; 2.1.3 Requires-Python >=3.10; 2.2.0 Requires-Python >=3.10; 2.2.0rc1 Requires-Python >=3.10; 75.4.0 Requires-Python >=3.9; 75.5.0 Requires-Python >=3.9; 75.6.0 Requires-Python >=3.9 - ERROR: Could not find a version that satisfies the requirement numpy<3.0.0,>=2.0.0 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5, 1.20.0, 1.20.1, 1.20.2, 1.20.3, 1.21.0, 1.21.1, 1.21.2, 1.21.3, 1.21.4, 1.21.5, 1.21.6, 1.22.0, 1.22.1, 1.22.2, 1.22.3, 1.22.4, 1.23.0, 1.23.1, 1.23.2, 1.23.3, 1.23.4, 1.23.5, 1.24.0, 1.24.1, 1.24.2, 1.24.3, 1.24.4) - ERROR: No matching distribution found for numpy<3.0.0,>=2.0.0 - - [end of output] - - note: This error originates from a subprocess, and is likely not a problem with pip. - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> See above for output. - - note: This error originates from a subprocess, and is likely not a problem with pip. - [end of output] - - note: This error originates from a subprocess, and is likely not a problem with pip. - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> See above for output. - - note: This error originates from a subprocess, and is likely not a problem with pip. - [end of output] - - note: This error originates from a subprocess, and is likely not a problem with pip. - error: subprocess-exited-with-error - - × pip subprocess to install build dependencies did not run successfully. - │ exit code: 1 - ╰─> See above for output. - - note: This error originates from a subprocess, and is likely not a problem with pip. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" "openvino>=2024.0.0" "altair" "opencv-python" "opencv-contrib-python" "gradio>=4.19" - - -.. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. @@ -275,37 +138,13 @@ comparison purposes, you can select different models among: faster and 2.8x smaller. More details about model can be found in `research paper `__ and `GitHub repository `__. -- **BLIP-2** - BLIP2 was introduced in the paper `BLIP-2: Bootstrapping - Language-Image Pre-training with Frozen Image Encoders and Large - Language Models `__ by Li et - al. and first released in this - `repository `__. - It is a generic and efficient pre-training strategy that easily - harvests development of pretrained vision models and large language - models (LLMs) for vision-language pretraining. BLIP-2 consists of 3 - models: a CLIP-like image encoder, a Querying Transformer (Q-Former) - and a large language model. .. code:: ipython3 - from pathlib import Path - import ipywidgets as widgets - model_dir = Path("checkpoints") - - def default_image_probs(image_features, text_features): - image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) - return image_probs - - - def blip2_image_probs(image_features, text_features): - image_probs = image_features[:, 0, :] @ text_features[:, 0, :].t() - return image_probs - - supported_models = { "MobileCLIP": { "mobileclip_s0": { @@ -313,35 +152,30 @@ comparison purposes, you can select different models among: "pretrained": model_dir / "mobileclip_s0.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s0.pt", "image_size": 256, - "image_probs": default_image_probs, }, "mobileclip_s1": { "model_name": "mobileclip_s1", "pretrained": model_dir / "mobileclip_s1.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s1.pt", "image_size": 256, - "image_probs": default_image_probs, }, "mobileclip_s2": { "model_name": "mobileclip_s0", "pretrained": model_dir / "mobileclip_s2.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s2.pt", "image_size": 256, - "image_probs": default_image_probs, }, "mobileclip_b": { "model_name": "mobileclip_b", "pretrained": model_dir / "mobileclip_b.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_b.pt", "image_size": 224, - "image_probs": default_image_probs, }, "mobileclip_blt": { "model_name": "mobileclip_b", "pretrained": model_dir / "mobileclip_blt.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_blt.pt", "image_size": 224, - "image_probs": default_image_probs, }, }, "CLIP": { @@ -349,25 +183,21 @@ comparison purposes, you can select different models among: "model_name": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "image_size": 224, - "image_probs": default_image_probs, }, "clip-vit-b-16": { "model_name": "ViT-B-16", "pretrained": "openai", "image_size": 224, - "image_probs": default_image_probs, }, "clip-vit-l-14": { "model_name": "ViT-L-14", "pretrained": "datacomp_xl_s13b_b90k", "image_size": 224, - "image_probs": default_image_probs, }, "clip-vit-h-14": { "model_name": "ViT-H-14", "pretrained": "laion2b_s32b_b79k", "image_size": 224, - "image_probs": default_image_probs, }, }, "SigLIP": { @@ -375,21 +205,11 @@ comparison purposes, you can select different models among: "model_name": "ViT-B-16-SigLIP", "pretrained": "webli", "image_size": 224, - "image_probs": default_image_probs, }, "siglip-vit-l-16": { "model_name": "ViT-L-16-SigLIP-256", "pretrained": "webli", "image_size": 256, - "image_probs": default_image_probs, - }, - }, - "Blip2": { - "blip2_feature_extractor": { - "model_name": "blip2_feature_extractor", - "pretrained": "pretrain_vitL", - "image_size": 224, - "image_probs": blip2_image_probs, }, }, } @@ -403,7 +223,7 @@ comparison purposes, you can select different models among: .. parsed-literal:: - Dropdown(description='Model type:', options=('MobileCLIP', 'CLIP', 'SigLIP', 'Blip2'), value='MobileCLIP') + Dropdown(description='Model type:', options=('MobileCLIP', 'CLIP', 'SigLIP'), value='MobileCLIP') @@ -430,6 +250,14 @@ comparison purposes, you can select different models among: .. code:: ipython3 + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + from notebook_utils import download_file, device_widget model_config = available_models[model_checkpoint.value] @@ -523,29 +351,29 @@ Prepare image gallery .. parsed-literal:: - red_panda.png: 0%| | 0.00/50.6k [00:00`__. - -.. code:: ipython3 - - from pathlib import Path - - cn2en_trans_model_path = "ov_models/cn2en_trans_model" - cn2en_trans_model_id = "Helsinki-NLP/opus-mt-zh-en" - - if not Path(cn2en_trans_model_path).exists(): - !optimum-cli export openvino --model {cn2en_trans_model_id} --task text2text-generation-with-past --trust-remote-code {cn2en_trans_model_path} - - -.. parsed-literal:: - - 2024-12-10 02:25:36.467688: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:25:36.491610: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. - warnings.warn("Recommended: pip install sacremoses.") - Moving the following attributes in the config to the generation config: {'max_length': 512, 'num_beams': 6, 'bad_words_ids': [[65000]]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:207: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:214: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attention_mask.size() != (bsz, 1, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:246: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if ( - model.safetensors: 0%| | 0.00/312M [00:00 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. - model.safetensors: 100%|█████████████████████| 312M/312M [00:04<00:00, 71.1MB/s] + Image encoding took 0.0294 ms + Text encoding took 0.00498 ms -.. code:: ipython3 - - from transformers import AutoTokenizer - from optimum.intel import OVModelForSeq2SeqLM - tr_tokenizer = AutoTokenizer.from_pretrained(cn2en_trans_model_path) - tr_model = OVModelForSeq2SeqLM.from_pretrained(cn2en_trans_model_path) - - -.. parsed-literal:: - - 2024-12-10 02:26:01.092495: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:26:01.118195: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. - warnings.warn("Recommended: pip install sacremoses.") +.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_25_1.png Interactive Demo @@ -934,9 +634,7 @@ Interactive Demo In this part, you can try different supported by tutorial models in searching frames in the video by text query or image. Upload video and provide text query or reference image for search and model will find the -most relevant frames according to provided query. You can also try -querying in Chinese, and translation model will be triggered -automatically for Chinese-to-English translation. Please note, different +most relevant frames according to provided query. Please note, different models can require different optimal threshold for search. .. code:: ipython3 @@ -976,22 +674,7 @@ models can require different optimal threshold for search. ) - def is_english(text): - for char in text: - if not char.isascii(): - return False - return True - - - def translate(text): - if tr_tokenizer: - t = tr_tokenizer(text, return_tensors="pt") - r = tr_model.generate(**t) - text = tr_tokenizer.decode(r[0][1:-1]) - return text - - - def get_preprocess_probs_tokenizer(model_name): + def get_preprocess_and_tokenizer(model_name): if "mobileclip" in model_name: resolution = supported_models["MobileCLIP"][model_name]["image_size"] resize_size = resolution @@ -1006,23 +689,13 @@ models can require different optimal threshold for search. ] preprocess = Compose(aug_list) tokenizer = mobileclip.get_tokenizer(supported_models["MobileCLIP"][model_name]["model_name"]) - image_probs = default_image_probs - elif "blip2" in model_name: - from lavis.models import load_model_and_preprocess - - model, vis_processors, txt_processors = load_model_and_preprocess(name=model_name, model_type=pretrained, is_eval=True) - model = Blip2Model(model.ln_vision, model.visual_encoder, model.query_tokens, model.Qformer, model.vision_proj, model.text_proj, model.tokenizer) - preprocess = vis_processors["eval"] - tokenizer = model.tokenizer - image_probs = blip2_image_probs else: model_configs = supported_models["SigLIP"] if "siglip" in model_name else supported_models["CLIP"] resize_size = model_configs[model_name]["image_size"] preprocess = image_transform((resize_size, resize_size), is_train=False, resize_mode="longest") tokenizer = open_clip.get_tokenizer(model_configs[model_name]["model_name"]) - image_probs = default_image_probs - return preprocess, image_probs, tokenizer + return preprocess, tokenizer def run( @@ -1043,12 +716,11 @@ models can require different optimal threshold for search. global tokenizer global ov_compiled_image_encoder global ov_compiled_text_encoder - global image_probs_function if current_model != model_name or device != current_device: ov_compiled_image_encoder = core.compile_model(ov_models_dir / f"{model_name}_im_encoder.xml", device) ov_compiled_text_encoder = core.compile_model(ov_models_dir / f"{model_name}_text_encoder.xml", device) - preprocess, image_probs_function, tokenizer = get_preprocess_probs_tokenizer(model_name) + preprocess, tokenizer = get_preprocess_and_tokenizer(model_name) current_model = model_name current_device = device # Load video @@ -1062,9 +734,6 @@ models can require different optimal threshold for search. query_features /= query_features.norm(dim=-1, keepdim=True) # Get text query features else: - if not is_english(text_search): - text_search = translate(text_search) - print(f"Translated input text: {text_search}") # Tokenize search phrase text = tokenizer([text_search]) # Encode text query @@ -1079,8 +748,9 @@ models can require different optimal threshold for search. image_features = torch.from_numpy(ov_compiled_image_encoder(image)[0]) image_features /= image_features.norm(dim=-1, keepdim=True) - probs = image_probs_function(image_features, query_features) - probs = probs.cpu().numpy().squeeze(1) if "blip2" in model_name else probs[0] + probs = query_features.cpu().numpy() @ image_features.cpu().numpy().T + probs = probs[0] + # Save frame similarity values df = pd.DataFrame( { @@ -1154,13 +824,13 @@ models can require different optimal threshold for search. .. parsed-literal:: - car-detection.mp4: 0%| | 0.00/2.68M [00:00`__ is a competent +multimodal vision language model (MMVLM) targeted to run on mobile +devices. It is an amalgamation of a myriad of architectural designs and +techniques that are mobile-oriented, which comprises a set of language +models at the scale of 1.4B and 2.7B parameters, trained from scratch, a +multimodal vision model that is pre-trained in the CLIP fashion, +cross-modality interaction via an efficient projector. + +|image0| + +The MobileVLM architecture (right) utilizes +`MobileLLaMA `__ as +its language model, intakes :math:`\mathbf{X}_v` and +:math:`\mathbf{X}_q` which are image and language instructions as +respective inputs and gives :math:`\mathbf{Y}_a` as the output language +response. LDP refers to a lightweight downsample projector (left). + +See more information on official +`GitHub `__ project page +and `paper `__. + + +**Table of contents:** + + +- `Install requirements <#install-requirements>`__ +- `Clone MobileVLM repository <#clone-mobilevlm-repository>`__ +- `Import required packages <#import-required-packages>`__ +- `Load the model <#load-the-model>`__ +- `Convert model to OpenVINO Intermediate Representation + (IR) <#convert-model-to-openvino-intermediate-representation-ir>`__ +- `Inference <#inference>`__ + + - `Load OpenVINO model <#load-openvino-model>`__ + - `Prepare input data <#prepare-input-data>`__ + - `Run generation process <#run-generation-process>`__ + +- `Interactive inference <#interactive-inference>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |image0| image:: https://github.com/Meituan-AutoML/MobileVLM/raw/main/assets/mobilevlm_arch.png + +Install requirements +-------------------- + + + +.. code:: ipython3 + + %pip install -q "torch>=2.1.0" "timm>=0.9.12" --extra-index-url "https://download.pytorch.org/whl/cpu" + %pip install -q "transformers>=4.33.1,<4.35.0" accelerate "sentencepiece>=0.1.99" "openvino>=2023.2.0" "nncf>=2.7.0" ipywidgets numpy "gradio>=4.19" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + Note: you may need to restart the kernel to use updated packages. + + +Clone MobileVLM repository +-------------------------- + + + +.. code:: ipython3 + + from pathlib import Path + import sys + + MOBILEVLM_REPO_DIR = Path("./MobileVLM") + if not MOBILEVLM_REPO_DIR.exists(): + !git clone -q "https://github.com/Meituan-AutoML/MobileVLM.git" + sys.path.insert(0, str(MOBILEVLM_REPO_DIR)) + +Import required packages +------------------------ + + + +.. code:: ipython3 + + import warnings + import itertools + import gc + from typing import Optional, List, Tuple + + from mobilevlm.model.mobilevlm import load_pretrained_model + from mobilevlm.conversation import conv_templates, SeparatorStyle + from mobilevlm.utils import ( + disable_torch_init, + process_images, + tokenizer_image_token, + KeywordsStoppingCriteria, + ) + from mobilevlm.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN + import PIL + import torch + import transformers + import numpy as np + import gradio as gr + import openvino as ov + import nncf + import ipywidgets as widgets + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + 2024-11-05 02:02:06.143728: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:02:06.177889: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:02:06.679118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. code:: ipython3 + + MODELS_DIR = Path("./models") + MODEL_PATH = "mtgv/MobileVLM-1.7B" + + TEMPERATURE = 0.2 + TOP_P = None + NUM_BEAMS = 1 + MAX_NEW_TOKENS = 512 + + IMAGE_PATH = MOBILEVLM_REPO_DIR / "assets" / "samples" / "demo.jpg" + PROMPT_STR = "Who is the author of this book?\nAnswer the question using a single word or phrase." + +Load the model +-------------- + + + +To load the model, we use pre-defined ``load_pretrained_model`` function +in ``mobilevlm`` module. It returns the model itself, tokenizer, and +image processor to convert images to appropriate tensors. + +.. code:: ipython3 + + model_name = MODEL_PATH.split("/")[-1] + disable_torch_init() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + tokenizer, model, image_processor, _ = load_pretrained_model(MODEL_PATH, device="cpu") + model = model.to(dtype=torch.float32) + + +.. parsed-literal:: + + You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc + + +Convert model to OpenVINO Intermediate Representation (IR) +---------------------------------------------------------- + + + +.. code:: ipython3 + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +Please select below whether you would like to run INT4 weight +compression instead of INT8 weight compression. + +.. code:: ipython3 + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + stage1_xml_path = MODELS_DIR / f"stage1_{compression_mode.value}.xml" + stage2_xml_path = MODELS_DIR / f"stage2_{compression_mode.value}.xml" + +.. code:: ipython3 + + if compression_mode.value == "INT4": + wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) + else: + wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) + +.. code:: ipython3 + + class ModelWrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + ): + outputs = self.model.model( + input_ids=input_ids, + attention_mask=attention_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + ) + hidden_states = outputs[0] + logits = self.model.lm_head(hidden_states) + + return (logits,) + outputs[1:] + +.. code:: ipython3 + + def set_input_names(model, past_key_values): + input_names = [ + "input_ids", + "attention_mask", + *itertools.chain.from_iterable([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"] for idx, _ in enumerate(past_key_values)), + ] + assert len(input_names) == len(model.inputs) + for _input, input_name in zip(model.inputs, input_names): + _input.get_tensor().set_names({input_name}) + +.. code:: ipython3 + + def set_output_names(model, past_key_values): + output_names = [ + "logits", + *itertools.chain.from_iterable([f"present.{idx}.key", f"present.{idx}.value"] for idx, _ in enumerate(past_key_values)), + ] + assert len(output_names) == len(model.outputs) + for out, out_name in zip(ov_model.outputs, output_names): + out.get_tensor().set_names({out_name}) + +.. code:: ipython3 + + example_input = { + "inputs_embeds": torch.zeros((1, 205, 2048)), + "attention_mask": torch.ones((1, 205), dtype=torch.long), + } + + wrapped = ModelWrapper(model) + past_key_values = wrapped(**example_input)[1] + + if not stage1_xml_path.exists(): + ov_model = ov.convert_model(wrapped, example_input=example_input) + set_output_names(ov_model, past_key_values) + ov_model = nncf.compress_weights(ov_model, **wc_parameters) + ov.save_model(ov_model, stage1_xml_path) + cleanup_torchscript_cache() + del ov_model + gc.collect() + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + + +.. parsed-literal:: + + WARNING:nncf:NNCF provides best results with torch==2.4.*, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.4.* + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if input_shape[-1] > 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.max_seq_len_cached: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. parsed-literal:: + + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. code:: ipython3 + + example_input = { + "input_ids": torch.ones((1, 1), dtype=torch.long), + "past_key_values": past_key_values, + "attention_mask": torch.ones((1, past_key_values[-1][-1].shape[-2] + 1), dtype=torch.long), + } + + if not stage2_xml_path.exists(): + ov_model = ov.convert_model( + wrapped, + example_input=example_input, + ) + set_input_names(ov_model, past_key_values) + set_output_names(ov_model, past_key_values) + ov_model = nncf.compress_weights(ov_model, **wc_parameters) + ov.save_model(ov_model, stage2_xml_path) + cleanup_torchscript_cache() + del ov_model + gc.collect() + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + if a.grad is not None: + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. parsed-literal:: + + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. code:: ipython3 + + prepare_inputs_labels_for_multimodal = model.prepare_inputs_labels_for_multimodal + prepare_inputs_for_generation = model.prepare_inputs_for_generation + config = model.config + config.save_pretrained(MODELS_DIR) + +.. code:: ipython3 + + del wrapped + del model + gc.collect(); + +Inference +--------- + + + +``OVMobileLlamaForCausalLM`` class provides ease-to-use interface for +using model in generation scenario. It is based on +``transformers.generation.GenerationMixin`` that gives us opportunity to +reuse all reach capabilities for generation implemented in HuggingFace +Transformers library. More details about this interface can be found in +`HuggingFace +documentation `__. + +.. code:: ipython3 + + core = ov.Core() + + + class OVMobileLlamaForCausalLM(transformers.GenerationMixin): + def __init__(self, stage1_path, stage2_path, device): + self.stage1 = core.compile_model(stage1_path, device) + self.stage2 = core.read_model(stage2_path) + + self.generation_config = transformers.GenerationConfig.from_model_config(config) + self.config = transformers.AutoConfig.from_pretrained(MODELS_DIR) + self.main_input_name = "input_ids" + self.device = torch.device("cpu") + self.prepare_inputs_for_generation = prepare_inputs_for_generation + self.num_pkv = 2 + self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.stage2.inputs)} + self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.stage2.outputs)} + self.key_value_input_names = [key for key in self.input_names if "key_values" in key] + self.key_value_output_names = [key for key in self.output_names if "present" in key] + stage2 = core.compile_model(self.stage2, device) + self.request = stage2.create_infer_request() + self._supports_cache_class = False + + def can_generate(self): + """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" + return True + + def __call__( + self, + input_ids: torch.LongTensor, + images: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + prefix_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + **kwargs, + ) -> transformers.modeling_outputs.CausalLMOutputWithPast: + return self.forward(input_ids, images, attention_mask, prefix_mask, past_key_values) + + def forward( + self, + input_ids: torch.LongTensor, + images: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + prefix_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + **kwargs, + ) -> transformers.modeling_outputs.CausalLMOutputWithPast: + """General inference method""" + inputs = {} + if past_key_values is not None: + # Flatten the past_key_values + attention_mask = torch.ones( + (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), + dtype=input_ids.dtype, + ) + past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) + # Add the past_key_values to the decoder inputs + inputs = dict(zip(self.key_value_input_names, past_key_values)) + + else: + return self.forward_with_image(input_ids, images, attention_mask) + inputs["input_ids"] = np.array(input_ids) + + if "attention_mask" in self.input_names: + inputs["attention_mask"] = np.array(attention_mask) + + # Run inference + self.request.start_async(inputs, share_inputs=True) + self.request.wait() + + logits = torch.from_numpy(self.request.get_tensor("logits").data) + + # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) + past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) + # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) + + past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) + + return transformers.modeling_outputs.CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) + + def forward_with_image(self, input_ids, images, attention_mask): + """First step inference method, that resolves multimodal data""" + _, attention_mask, _, input_embed, _ = prepare_inputs_labels_for_multimodal(input_ids, attention_mask, images=images, past_key_values=None, labels=None) + outs = self.stage1({"inputs_embeds": input_embed, "attention_mask": attention_mask}) + logits = outs[0] + pkv = list(outs.values())[1:] + pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) + return transformers.modeling_outputs.CausalLMOutputWithPast(logits=torch.from_numpy(logits), past_key_values=pkv) + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget("CPU", exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ov_model = OVMobileLlamaForCausalLM(stage1_xml_path, stage2_xml_path, device.value) + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + images = [PIL.Image.open(IMAGE_PATH).convert("RGB")] + images_tensor = process_images(images, image_processor, transformers.AutoConfig.from_pretrained(MODELS_DIR)) + +.. code:: ipython3 + + conv = conv_templates["v1"].copy() + conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + PROMPT_STR) + conv.append_message(conv.roles[1], None) + prompt = conv.get_prompt() + stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 + input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) + stopping_criteria = KeywordsStoppingCriteria([stop_str], tokenizer, input_ids) + +.. code:: ipython3 + + print(PROMPT_STR) + images[0] + + +.. parsed-literal:: + + Who is the author of this book? + Answer the question using a single word or phrase. + + + + +.. image:: mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png + + + +Run generation process +~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + output_ids = ov_model.generate( + input_ids, + images=images_tensor, + do_sample=True if TEMPERATURE > 0 else False, + temperature=TEMPERATURE, + top_p=TOP_P, + num_beams=NUM_BEAMS, + max_new_tokens=MAX_NEW_TOKENS, + use_cache=True, + stopping_criteria=[stopping_criteria], + ) + input_token_len = input_ids.shape[1] + n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item() + if n_diff_input_output > 0: + print(f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids") + outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0] + outputs = outputs.strip() + if outputs.endswith(stop_str): + outputs = outputs[: -len(stop_str)] + print(f"🚀 {model_name} with OpenVINO: {outputs.strip()}\n") + + +.. parsed-literal:: + + 🚀 MobileVLM-1.7B with OpenVINO: Susan Wise Bauer + + + +Interactive inference +--------------------- + + + +.. code:: ipython3 + + def generate(img, prompt): + images_tensor = process_images([img], image_processor, transformers.AutoConfig.from_pretrained(MODELS_DIR)) + prompt = DEFAULT_IMAGE_TOKEN + "\n" + prompt + conv = conv_templates["v1"].copy() + conv.append_message(conv.roles[0], prompt) + conv.append_message(conv.roles[1], None) + prompt = conv.get_prompt() + stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 + input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) + stopping_criteria = KeywordsStoppingCriteria([stop_str], tokenizer, input_ids) + + output_ids = ov_model.generate( + input_ids, + images=images_tensor, + do_sample=True if TEMPERATURE > 0 else False, + temperature=TEMPERATURE, + top_p=TOP_P, + num_beams=NUM_BEAMS, + max_new_tokens=MAX_NEW_TOKENS, + use_cache=True, + stopping_criteria=[stopping_criteria], + ) + input_token_len = input_ids.shape[1] + outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0] + outputs = outputs.strip() + if outputs.endswith(stop_str): + outputs = outputs[: -len(stop_str)] + + return outputs.strip() + +.. code:: ipython3 + + demo = gr.Interface( + fn=generate, + inputs=[gr.Image(label="Image", type="pil"), gr.Textbox(label="Prompt")], + outputs=gr.Textbox(), + examples=[ + [ + str(IMAGE_PATH), + PROMPT_STR, + ] + ], + allow_flagging="never", + ) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(debug=False, share=True) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + + +.. code:: ipython3 + + # please uncomment and run this cell for stopping gradio interface + # demo.close() diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg new file mode 100644 index 00000000000000..e42650c7277fc7 --- /dev/null +++ b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56dcd6fe79cd88720a73dcbf31e50faf6d057787713d62b0a35fa49d4789a52 +size 24608 diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png new file mode 100644 index 00000000000000..55c71c94f52e35 --- /dev/null +++ b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74e9cd0ac19f22348008108002eaf4c5c3a666e15c7b205041138107020b3883 +size 162588 diff --git a/docs/notebooks/modelscope-to-openvino-with-output.rst b/docs/notebooks/modelscope-to-openvino-with-output.rst deleted file mode 100644 index aa2bd4150aeff9..00000000000000 --- a/docs/notebooks/modelscope-to-openvino-with-output.rst +++ /dev/null @@ -1,560 +0,0 @@ -Convert models from ModelScope to OpenVINO -========================================== - -.. image:: https://camo.githubusercontent.com/bbda58b4f77b80d9206e3410b533ca5a2582b81070e7dd283ee12fd0d442bd2b/68747470733a2f2f6d6f64656c73636f70652e6f73732d636e2d6265696a696e672e616c6979756e63732e636f6d2f6d6f64656c73636f70652e676966 - -`ModelScope `__ is a -“Model-as-a-Service” (MaaS) platform that seeks to bring together most -advanced machine learning models from the AI community, and to -streamline the process of leveraging AI models in real applications. -Hundreds of models are made publicly available on ModelScope (700+ and -counting), covering the latest development in areas such as NLP, CV, -Audio, Multi-modality, and AI for Science, etc. Many of these models -represent the SOTA in their specific fields, and made their open-sourced -debut on ModelScope. - -This tutorial covers how to use the modelscope ecosystem within -OpenVINO. - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Convert models from ModelScope using OpenVINO Model Conversion - API <#convert-models-from-modelscope-using-openvino-model-conversion-api>`__ - - - `Select inference device for image - classification <#select-inference-device-for-image-classification>`__ - - `Run Image classification <#run-image-classification>`__ - -- `Convert ModelScope models using Optimum - Intel <#convert-modelscope-models-using-optimum-intel>`__ - - - `Select inference device for text - classification <#select-inference-device-for-text-classification>`__ - - `Perform text classification <#perform-text-classification>`__ - -- `Convert ModelScope models for usage with OpenVINO - GenAI <#convert-modelscope-models-for-usage-with-openvino-genai>`__ - - - `Select inference device for text - generation <#select-inference-device-for-text-generation>`__ - - `Run OpenVINO GenAI pipeline <#run-openvino-genai-pipeline>`__ - -Prerequisites -------------- - - - -.. code:: ipython3 - - import platform - - %pip install -q "torch>=2.1.1" "torchvision" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q modelscope addict oss2 simplejson sortedcontainers pillow opencv-python "datasets<=3.0.0" - %pip install -q "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -qU "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5.0" "nncf>=2.14.0" - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" - -.. code:: ipython3 - - import requests - from pathlib import Path - - if not Path("notebook_utils.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - -Convert models from ModelScope using OpenVINO Model Conversion API ------------------------------------------------------------------- - - - -Modelscope package provides API for initializing a model and loading a -set of pre-trained weights using the model text handle. Discovering a -desired model name is straightforward with `Modelscope models web -page `__, one can choose a model -solving a particular machine learning problem and even sort the models -by popularity and novelty. - -OpenVINO supports various types of models and frameworks via conversion -to OpenVINO Intermediate Representation (IR). `OpenVINO model conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original model instance and example input for tracing and returns -``ov.Model`` representing this model in OpenVINO framework. Converted -model can be used for saving on disk using ``ov.save_model`` function or -directly loading on device using ``core.complie_model``. - -As example, we will use -`tinynas `__ -image classification model. The code bellow demonstrates how to load -this model using Modelscope pipelines interface, convert it to OpenVINO -IR and then perform image classification on specified device. - -.. code:: ipython3 - - from pathlib import Path - - from modelscope.pipelines import pipeline - from modelscope.utils.constant import Tasks - import openvino as ov - import torch - import gc - - - cls_model_id = "iic/cv_tinynas_classification" - cls_model_path = Path(cls_model_id.split("/")[-1]) / "openvino_model.xml" - - if not cls_model_path.exists(): - # load Modelcope pipeline with model - image_classification = pipeline(Tasks.image_classification, model=cls_model_id) - # convert model to OpenVINO - ov_model = ov.convert_model(image_classification.model, example_input=torch.zeros((1, 3, 224, 224)), input=[1, 3, 224, 224]) - # save OpenVINO model on disk for next usage - ov.save_model(ov_model, cls_model_path) - del ov_model - del image_classification - gc.collect(); - - -.. parsed-literal:: - - 2024-11-12 19:08:10.199148: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-12 19:08:10.212253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1731424090.226654 1605757 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1731424090.230976 1605757 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-11-12 19:08:10.246563: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - -Select inference device for image classification -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from notebook_utils import device_widget - - cv_cls_device = device_widget("CPU") - - cv_cls_device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -Run Image classification -~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Model inference interface remains compatible with pipeline preprocessing -and postprocessing, so you can reuse these part of pipeline, but for -providing standalone experience, we will demonstrate how to use model -without pipeline. The code bellow defines utilities for image -preprocessing and postprocessing. - -.. code:: ipython3 - - from notebook_utils import download_file - from PIL import Image - from torchvision import transforms - - # prepare input data and output lables - img_url = "https://pailitao-image-recog.oss-cn-zhangjiakou.aliyuncs.com/mufan/img_data/maas_test_data/dog.png" - img_path = Path("dog.png") - - labels_url = "https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/data/dataset_classes/imagenet_2012.txt" - - labels_path = Path("imagenet_2012.txt") - - if not img_path.exists(): - download_file(img_url) - - if not labels_path.exists(): - download_file(labels_url) - - image = Image.open(img_path) - imagenet_classes = labels_path.open("r").read().splitlines() - - - # prepare image preprocessing - transforms_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - transform_list = [ - transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms_normalize, - ] - transformer = transforms.Compose(transform_list) - - # compile model - core = ov.Core() - - ov_model = core.compile_model(cls_model_path, cv_cls_device.value) - -Now, when we make all necessary preparations, we can run model -inference. - -.. code:: ipython3 - - import numpy as np - - # preprocess input - image_tensor = transformer(image) - - # run model inference - result = ov_model(image_tensor.unsqueeze(0))[0] - - # postprocess results - label_id = np.argmax(result[0]) - score = result[0][label_id] - - label = imagenet_classes[label_id] - - # visualize results - display(image) - print(f"Predicted label: {label}, score {score}") - - - -.. image:: modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png - - -.. parsed-literal:: - - Predicted label: n02099601 golden retriever, score 8.060977935791016 - - -Convert ModelScope models using Optimum Intel ---------------------------------------------- - - - -For models compatible with the `HuggingFace -Transformers `__ -library, we can use `Optimum -Intel `__ integration -to convert and run model. Optimum Intel is the interface between the -Transformers and Diffusers libraries and the different tools and -libraries provided by Intel to accelerate end-to-end pipelines on Intel -architectures. - -Optimum Intel provides a simple interface for optimizing your -Transformers and Diffusers models, converting them to the OpenVINO -Intermediate Representation (IR) format, and running inference using -OpenVINO Runtime, among other use cases. For running ModelScope models -using this interface we should download model from hub first. There are -several ways how to download models from Modelscope Hub, one of them is -usage of ``modelscope.snapshot_download`` function. This function -accepts model id from hub and optionally local directory (if not -provided, model will be downloaded to cache directory). - -After that, we can load model to Optimum Intel interface replacing the -``AutoModelForXxx`` class from transformers with the corresponding -``OVModelForXxx``. Model conversion will be performed on the fly. For -avoiding next time conversion, we can save model on disk using -``save_pretrained`` method and in the next time pass directory with -already converted model as argument in ``from_pretrained`` method. We -also specified ``device`` parameter for compiling the model on the -specific device, if not provided, the default device will be used. The -device can be changed later in runtime using ``model.to(device)``, -please note that it may require some time for model compilation on a -newly selected device. In some cases, it can be useful to separate model -initialization and compilation, for example, if you want to reshape the -model using ``reshape`` method, you can postpone compilation, providing -the parameter ``compile=False`` into ``from_pretrained`` method, -compilation can be performed manually using ``compile`` method or will -be performed automatically during first inference run. - -As example, we will use -`nlp_bert_sentiment-analysis_english-base `__. -This model was trained for classification input text on 3 sentiment -categories: negative, positive and neutral. In transformers, -``AutoModelForSequenceClassification`` should be used for model -initialization, so for usage model with OpenVINO, it is enough just -replace ``AutoModelForSequenceClassification`` to -``OVModelForSequenceClassification``. - -.. code:: ipython3 - - from modelscope import snapshot_download - - text_model_id = "iic/nlp_bert_sentiment-analysis_english-base" - text_model_path = Path(text_model_id.split("/")[-1]) - ov_text_model_path = text_model_path / "ov" - - - if not text_model_path.exists(): - snapshot_download(text_model_id, local_dir=text_model_path) - -Select inference device for text classification -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from notebook_utils import device_widget - - text_cls_device = device_widget("CPU", "NPU") - - text_cls_device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -Perform text classification -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from transformers import AutoTokenizer - from optimum.intel.openvino import OVModelForSequenceClassification - - - tokenizer = AutoTokenizer.from_pretrained(text_model_path) - - if not ov_text_model_path.exists(): - # model will be automatically exported to OpenVINO format during loading - ov_model = OVModelForSequenceClassification.from_pretrained(text_model_path, text_cls_device.value) - ov_model.save_pretrained(ov_text_model_path) - # save converted model using save_pretrained for avoid conversion in next time - tokenizer.save_pretrained(ov_text_model_path) - else: - # load converted model directly if availa ble - ov_model = OVModelForSequenceClassification.from_pretrained(ov_text_model_path, device=text_cls_device.value) - - # prepare input - input_text = "Good night." - input_data = tokenizer(input_text, return_tensors="pt") - - # run model inference - output = ov_model(**input_data) - # postprocess results - predicted_label_id = output.logits[0].argmax().item() - - predicted_label = ov_model.config.id2label[predicted_label_id] - - print(f"predicted label: {predicted_label}") - - -.. parsed-literal:: - - predicted label: Positive - - -Convert ModelScope models for usage with OpenVINO GenAI -------------------------------------------------------- - - - -OpenVINO™ GenAI is a library of the most popular Generative AI model -pipelines, optimized execution methods, and samples that run on top of -highly performant `OpenVINO -Runtime `__. - -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality -(e.g. tokenization via openvino-tokenizers). - -You can also load and run models from ModelScope with OpenVINO GenAI -`supported -pipelines `__. - -This inference approach is also based on model representation obtained -using Optimum Intel and also requires to download ModelScope model -first. As example we will be -`qwen2.5-1.5b-instruct `__ -model for text generation, that is part of powerful Qwen2 LLMs family. -If in previous chapter we are focused with usage python API for -downloading and converting models, in this one - we are also considering -CLI usage for the same actions. - -Downloading ModelScope models using CLI can be performed using following -command: - -.. code:: bash - - modelscope download --local_dir - -where ```` is model id from Hub and ```` is -output directory for model saving. - -``optimum-cli`` provides command line interface for exporting models -using Optimum. General OpenVINO export command format: - -.. code:: bash - - optimum-cli export openvino --model --task - -where task is task to export the model for. Available tasks depend on -the model, but are among: [‘default’, ‘fill-mask’, ‘text-generation’, -‘text2text-generation’, ‘text-classification’, ‘token-classification’, -‘multiple-choice’, ‘object-detection’, ‘question-answering’, -‘image-classification’, ‘image-segmentation’, ‘masked-im’, -‘semantic-segmentation’, ‘automatic-speech-recognition’, -‘audio-classification’, ‘audio-frame-classification’, -‘automatic-speech-recognition’, ‘audio-xvector’, ‘image-to-text’, -‘stable-diffusion’, ‘zero-shot-object-detection’]. - -You can find a mapping between tasks and model classes in Optimum -TaskManager -`documentation `__. - -Additionally, you can specify weights compression using -``--weight-format`` argument with one of following options: ``fp32``, -``fp16``, ``int8`` and ``int4``. Fro int8 and int4 nncf will be used for -weight compression. For models that required remote code execution, -``--trust-remote-code`` flag should be provided. - -Full list of supported arguments available via ``--help`` - -.. code:: ipython3 - - from IPython.display import Markdown, display - - model_id = "Qwen/Qwen2.5-1.5B-Instruct" - - llm_path = Path("Qwen2.5-1.5B-Instruct") - ov_llm_path = llm_path / "ov" - download_command = f"modelscope download {model_id} --local_dir {llm_path}" - display(Markdown("**Download command:**")) - display(Markdown(f"`{download_command}`")) - - if not llm_path.exists(): - !{download_command} - - - -**Download command:** - - - -``modelscope download Qwen/Qwen2.5-1.5B-Instruct --local_dir Qwen2.5-1.5B-Instruct`` - - -.. code:: ipython3 - - export_command = f"optimum-cli export openvino -m {llm_path} --task text-generation-with-past --weight-format int4 {ov_llm_path}" - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - - if not ov_llm_path.exists(): - !{export_command} - - - -**Export command:** - - - -``optimum-cli export openvino -m Qwen2.5-1.5B-Instruct --task text-generation-with-past --weight-format int4 Qwen2.5-1.5B-Instruct/ov`` - - -Select inference device for text generation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from notebook_utils import device_widget - - llm_device = device_widget("CPU") - - llm_device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -Run OpenVINO GenAI pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -For running text generation using OpenVINO GenAI, we should use -``LLMPipeline`` class initialized with providing converted model -directory and inference device. You can find more detailed example how -to use OpenVINO GenAI ``LLMPipeline`` for chatbot scenario in this -`tutorial `__. - -.. code:: ipython3 - - import openvino_genai as ov_genai - - - def streamer(subword): - print(subword, end="", flush=True) - # Return flag corresponds whether generation should be stopped. - # False means continue generation. - return False - - - llm_pipe = ov_genai.LLMPipeline(ov_llm_path, llm_device.value) - - llm_pipe.generate("The Sun is yellow because", max_new_tokens=200, streamer=streamer) - - -.. parsed-literal:: - - it has a spectrum of colors, and you are also looking at it. What color would the sun be if you could see its light without being able to see any other objects? If we imagine that someone had never seen or heard about the sun before, what would they expect to see? - - 1. **Color of the Sun**: The sun appears yellow when viewed from Earth due to the way our atmosphere scatters sunlight. This phenomenon occurs as follows: - - - **Sunlight Scattering**: When sunlight passes through the Earth's atmosphere, different wavelengths (colors) of light travel at slightly different speeds due to their varying energies. - - **Air Mass Height**: At higher altitudes where air density decreases with altitude, shorter wavelength (blue) photons have more energy and thus escape faster into space compared to longer wavelength (red) photons which remain in the atmosphere longer. - - **Sky Color**: As a result, blue light is scattered more than red light by molecules in the upper layers of the atmosphere - - - -.. parsed-literal:: - - " it has a spectrum of colors, and you are also looking at it. What color would the sun be if you could see its light without being able to see any other objects? If we imagine that someone had never seen or heard about the sun before, what would they expect to see?\n\n1. **Color of the Sun**: The sun appears yellow when viewed from Earth due to the way our atmosphere scatters sunlight. This phenomenon occurs as follows:\n\n - **Sunlight Scattering**: When sunlight passes through the Earth's atmosphere, different wavelengths (colors) of light travel at slightly different speeds due to their varying energies.\n - **Air Mass Height**: At higher altitudes where air density decreases with altitude, shorter wavelength (blue) photons have more energy and thus escape faster into space compared to longer wavelength (red) photons which remain in the atmosphere longer.\n - **Sky Color**: As a result, blue light is scattered more than red light by molecules in the upper layers of the atmosphere" - - - -.. code:: ipython3 - - import gc - - del llm_pipe - gc.collect(); diff --git a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg deleted file mode 100644 index 97ae56df8a8721..00000000000000 --- a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1745fd9f64ac9914621f7eee3668e86daa8121bc83d1a2c7f27963c85026f104 -size 66633 diff --git a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png deleted file mode 100644 index d1c0d309736c1a..00000000000000 --- a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6235ab7dd2cb4318435320004320ffc6de773044c51cadcd581a7996faca313a -size 636558 diff --git a/docs/notebooks/multilora-image-generation-with-output.rst b/docs/notebooks/multilora-image-generation-with-output.rst deleted file mode 100644 index 7b6f4bc381ff27..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output.rst +++ /dev/null @@ -1,468 +0,0 @@ -Multi LoRA Image Generation -=========================== - -LoRA, or `Low-Rank Adaptation `__, is -a popular and lightweight training technique used for fine-tuning Large -Language and Stable Diffusion Models without needing full model -training. Full fine-tuning of larger models (consisting of billions of -parameters) is inherently expensive and time-consuming. LoRA works by -adding a smaller number of new weights to the model for training, rather -than retraining the entire parameter space of the model. This makes -training with LoRA much faster, memory-efficient, and produces smaller -model weights (a few hundred MBs), which are easier to store and share. - -At its core, LoRA leverages the concept of low-rank matrix -factorization. Instead of updating all the parameters in a neural -network, LoRA decomposes the parameter space into two low-rank matrices. -This decomposition allows the model to capture essential information -with fewer parameters, significantly reducing the amount of data and -computation required for fine-tuning. - -|image0| - -By incorporating LoRA into Stable Diffusion models, we can enhance their -ability to understand complex relationships and patterns in data. This -approach opens up numerous possibilities: \* **Art and Design**: Artists -can fine-tune models to generate images that align with their unique -styles, creating personalized artwork effortlessly. \* **Content -Creation**: Businesses can customize image generation models to produce -branded visuals, enhancing marketing and media production. \* -**Entertainment**: Game developers and filmmakers can use fine-tuned -models to create realistic and imaginative worlds, streamlining the -creative process. - -In this tutorial we explore possibilities to use LoRA with OpenVINO -Generative API. - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Convert Diffusion Model using Optimum - Intel <#convert-diffusion-model-using-optimum-intel>`__ - - - `Applying LoRA to Original Diffusers pipeline before - conversion <#applying-lora-to-original-diffusers-pipeline-before-conversion>`__ - -- `Image Generation using OpenVINO - GenAI <#image-generation-using-openvino-genai>`__ - - - `Integration LoRA into - pipeline <#integration-lora-into-pipeline>`__ - - `Prepare LoRA Adapters <#prepare-lora-adapters>`__ - - `Create Inference Pipeline <#create-inference-pipeline>`__ - - `Selection specific adapter during - generation <#selection-specific-adapter-during-generation>`__ - - `Use multiple adapters - simultaneously <#use-multiple-adapters-simultaneously>`__ - - `Disable adapters <#disable-adapters>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -.. |image0| image:: https://github.com/user-attachments/assets/bf823c71-13b4-402c-a7b4-d6fc30a60d88 - -.. code:: ipython3 - - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision transformers accelerate "diffusers>0.25.0" pillow "gradio>=4.19" "peft>=0.7.0" - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5.0" - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" - -.. code:: ipython3 - - import requests - from pathlib import Path - - notebook_utils_path = Path("notebook_utils.py") - lora_config_path = Path("lora_config.py") - - if not notebook_utils_path.exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - notebook_utils_path.open("w").write(r.text) - - if not lora_config_path.exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/multilora-image-generation/lora_config.py", - ) - lora_config_path.open("w").write(r.text) - -Convert Diffusion Model using Optimum Intel -------------------------------------------- - - - -`Optimum Intel `__ is -the interface between the -`Transformers `__ and -`Diffusers `__ libraries -and OpenVINO to accelerate end-to-end pipelines on Intel architectures. -It provides ease-to-use -`interface `__ -for exporting models to `OpenVINO Intermediate Representation -(IR) `__ -format. - -Applying LoRA to Original Diffusers pipeline before conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -LoRA can be easily added to `Diffusers -pipeline `__ -before export. At the export stage, LoRA weights will be fused to -original model weights and converted model will preserve LoRA provided -behavior. This approach is suitable when you need model with adapter -capabilities by default and it does not required configuration at -inference time (e.g. changing weight coefficient for adapter). For -example, we can use this method for speedup generation process with -integration `LCM LoRA `__. -Previously, we already considered with approach in this -`tutorial `__. - -Using ``optimum-cli`` for exporting models requires to provide model id -on HuggingFace Hub or local directory with saved model. In case, if -model stored in multiple separated repositories or directories (e.g. you -want to replace VAE component or add LoRA), it should be merged and -saved on disk before export. For avoiding this, we will use -``export_from_model`` function that accepts initialized model. -Additionally, for using model with OpenVINO GenAI, we need to export -tokenizers to OpenVINO format using `OpenVINO -Tokenizers `__ -library. - -In this tutorial we will use `Stable Diffusion -XL `__ -model, but the same steps are also applicable to other models of Stable -Diffusion family. - -.. code:: ipython3 - - from pathlib import Path - from diffusers import DiffusionPipeline, AutoencoderKL, LCMScheduler - from optimum.exporters.openvino import export_from_model - from optimum.intel.openvino import OVConfig - from optimum.exporters.openvino.convert import export_tokenizer - import gc - - model_dir = Path("sdxl-lcm") - - if not model_dir.exists(): - model_id = "stabilityai/stable-diffusion-xl-base-1.0" - adapter_id = "latent-consistency/lcm-lora-sdxl" - vae_id = "madebyollin/sdxl-vae-fp16-fix" - vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix") - pipe = DiffusionPipeline.from_pretrained(model_id, vae=vae, variant="fp16", use_safetensors=True) - pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) - pipe.load_lora_weights(adapter_id) - pipe.fuse_lora() - export_from_model(pipe, model_dir, task="text-to-image", stateful=False, ov_config=OVConfig(dtype="fp16")) - for tokenizer in ["tokenizer", "tokenizer_2"]: - tokenizer_model = getattr(pipe, tokenizer, None) - if tokenizer_model is not None: - export_tokenizer(tokenizer_model, model_dir / tokenizer, task="text-to-image") - del vae - del pipe - gc.collect() - - -.. parsed-literal:: - - 2024-11-08 16:49:48.963221: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-08 16:49:48.977712: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1731070188.992824 718925 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1731070188.997386 718925 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-11-08 16:49:49.014687: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - -Image Generation using OpenVINO GenAI -------------------------------------- - - - -`OpenVINO™ GenAI `__ -is a library of the most popular Generative AI model pipelines, -optimized execution methods, and samples that run on top of highly -performant `OpenVINO -Runtime `__. - -This library is friendly to PC and laptop execution, and optimized for -resource consumption. It requires no external dependencies to run -generative models as it already includes all the core functionality. - -``openvino_genai.Text2ImagePipeline`` class supports inference of -`Diffusers -models `__. -For pipeline initialization, we should provide directory with converted -by Optimum Intel pipeline and specify inference device. Optionally, we -can provide configuration for LoRA Adapters using ``adapter_config``. -For starting generation process ``generate`` method should be used. -Basically, it required to provide input text prompt for image -generation. You can provide additional arguments like negative prompt, -number of steps, guidance scale, image width and height to control -generation process. - -Integration LoRA into pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Similarly to Diffusers pipeline, you can store separately and load LoRA -into base pipeline before inference using OpenVINO GenAI. -``openvino_genai.AdapterConfig`` serves for adapters management in -``openvino_genai.Text2ImagePipeline``. It can be used for adding and -removing adapters or changing their weight coefficient for blending into -pipeline. You can add one or multiple adapters into config and also -specify alpha blending coefficients for their addition. OpenVINO GenAI -supports LoRA adapters saved in Safetensors format. You can use one of -publicly available pretrained adapters from -`CivitAI `__ or `HuggingFace -Hub `__ or train your own. > **Important -Note**: Before loading pretrained adapters, please make sure that they -are compatible with your base model architecture. E.g. if you use SDXL -model, you need to provide adapters trained for this model type and -loading adapter, for example, trained for FLUX is not allowed. - -Generally, process of adapters configuration consists of 2 steps: 1. -Register adapters in pipeline constructor. At this moment, it is -recommended to provide all adapters that you plan to use on this stage. -2. Choose which adapter (or a combination of adapters) to apply in each -``generate`` call. It is not obligated to use all of provided in -constructor adapters simultaneously, you can select one or combination -of several among them for each generation cycle. - -Prepare LoRA Adapters -~~~~~~~~~~~~~~~~~~~~~ - - - -.. _prepare-lora-adapters-1: - -Prepare LoRA Adapters -~~~~~~~~~~~~~~~~~~~~~ - -.. code:: ipython3 - - from lora_config import LORA - - # uncomment this line to see predefined LoRA adapters configuration used in this notebook - # LORA - -.. code:: ipython3 - - from huggingface_hub import hf_hub_download - - lora_dir = Path("lora") - adapter_paths = [] - - for lora in LORA: - lora_model_dir = lora_dir / lora["name"].lower().replace(" ", "_") - file_name = lora["file_name"] - if not (lora_model_dir / file_name).exists(): - hf_hub_download(repo_id=lora["model_id"], filename=file_name, local_dir=lora_model_dir) - adapter_paths.append(lora_model_dir / file_name) - -.. code:: ipython3 - - import openvino_genai as ov_genai - - - def prepare_adapter_config(scales=None): - if scales is None: - scales = [1 / len(adapter_paths)] * len(adapter_paths) - if isinstance(scales, float): - scales = [scales] * len(adapter_paths) - adapter_config = ov_genai.AdapterConfig() - for adapter, scale in zip(adapter_paths, scales): - adapter_config.add(ov_genai.Adapter(adapter), scale) - - return adapter_config - - - adapters_config = prepare_adapter_config(0.0) - adapters = adapters_config.get_adapters() - -Create Inference Pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -diffusion process involves random for preparing initial state for -denoising. For reproducibility of generation results, we will use -``Generator`` class. - -.. code:: ipython3 - - from notebook_utils import device_widget - - device = device_widget(default="CPU", exclude=["NPU"]) - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -.. code:: ipython3 - - import openvino as ov - import torch - - - class Generator(ov_genai.Generator): - def __init__(self, seed): - ov_genai.Generator.__init__(self) - self.generator = torch.Generator(device="cpu").manual_seed(seed) - - def next(self): - return torch.randn(1, generator=self.generator, dtype=torch.float32).item() - - def randn_tensor(self, shape: ov.Shape): - torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32) - return ov.Tensor(torch_tensor.numpy()) - - - pipe = ov_genai.Text2ImagePipeline(model_dir, "CPU", adapters=adapters_config) - -Selection specific adapter during generation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -As it was already mention before, it is not necessary to use all -adapters specified at initialization stage for generation in the same -time. Providing adapters argument with ``openvino_genai.AdapterConfig`` -into ``generate`` allow to select one or several from them. For example, -let’s select LoRA for generation images in X-Ray style. - -.. code:: ipython3 - - subject = "a cute cat in sunglasses" - prompt_template = LORA[0].get("prompt", "") - adapter_weight = LORA[0].get("weight", 1.0) - prompt = prompt_template.replace("", subject) - adapter_config = ov_genai.AdapterConfig() - adapter_config.add(adapters[0], adapter_weight) - image_tensor = pipe.generate(prompt, num_inference_steps=4, guidance_scale=0, adapters=adapter_config, generator=Generator(421235)) - -.. code:: ipython3 - - from PIL import Image - - image = Image.fromarray(image_tensor.data[0]) - image - - - - -.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png - - - -Use multiple adapters simultaneously -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -You also can use combination of adapters that will be applied in the -same time. Let’s see what happens if traditional Japanese art will meet -modern illustration pointillistic style. - -.. code:: ipython3 - - prompt_template1 = LORA[1].get("prompt", "") - prompt_template2 = LORA[2].get("prompt", "") - adapter1_weight = LORA[1].get("weight", 1.0) - adapter2_weight = LORA[2].get("weight", 1.0) - - prompt = prompt_template2.replace("", prompt_template1.replace("", subject)) - adapter_config = ov_genai.AdapterConfig() - adapter_config.add(adapters[1], adapter1_weight) - adapter_config.add(adapters[2], adapter2_weight) - image_tensor = pipe.generate(prompt, num_inference_steps=4, guidance_scale=0, adapters=adapter_config, generator=Generator(421235)) - -.. code:: ipython3 - - image = Image.fromarray(image_tensor.data[0]) - image - - - - -.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png - - - -Disable adapters -~~~~~~~~~~~~~~~~ - - - -You can disable adapters providing empty ``AdapterConfig`` into generate - -.. code:: ipython3 - - image_tensor = pipe.generate(subject, num_inference_steps=4, guidance_scale=0, adapters=ov_genai.AdapterConfig(), generator=Generator(421235)) - -.. code:: ipython3 - - image = Image.fromarray(image_tensor.data[0]) - image - - - - -.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png - - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - gradio_helper_path = Path("gradio_helper.py") - - if not gradio_helper_path.exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/multilora-image-generation/gradio_helper.py", - ) - lora_config_path.open("w").write(r.text) - -.. code:: ipython3 - - from gradio_helper import make_demo - - demo = make_demo(pipe, Generator, adapters, LORA) - - try: - demo.launch(debug=False) - except Exception: - demo.launch(share=True, debug=False) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg deleted file mode 100644 index 1427e6afb594ac..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:378fce8c53832fa402e94c50995aa5f188d16a6a6886c08fe4f8323bcf7daabe -size 42135 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png deleted file mode 100644 index 873721f87cc2a3..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27ac6d45499eb6e67ddf78f8f3493fd3e9dc3885cec2b4fda8067f9b1f7a9ebf -size 1252162 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg deleted file mode 100644 index 1b6a88d2cde069..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de879c60657ad9c471ccc971d63cc2ac25be5b477c6ebcd8b2e1a2a438b2f3c1 -size 146062 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png deleted file mode 100644 index 9b26d20ef04ab8..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae8cec0bac904c1868d7786121978b2ca819ead5c8b02cf09bb07f75b927a3a1 -size 1940316 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg deleted file mode 100644 index 199be9b483e18f..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:564848925f540cf500457a4996631ba616cc6547b63d377ce22ac8c3e9431c04 -size 87425 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png deleted file mode 100644 index bbf4eaaf030a42..00000000000000 --- a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b573ab59972699e762f8a52c0ce17a0db060230effe78e2ae3408290a9173103 -size 1417021 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index 2d63515872694f..4adc89b9ff79e7 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,9 +124,14 @@ Imports .. parsed-literal:: - 2024-12-10 02:28:39.145741: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:28:39.170431: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:04:23.419260: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:04:23.453089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:04:24.059462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( MusicGen in HF Transformers @@ -165,134 +170,12 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/encodec/modeling_encodec.py:124: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). - self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) - Config of the text_encoder: is overwritten by shared text_encoder config: T5Config { - "_name_or_path": "t5-base", - "architectures": [ - "T5ForConditionalGeneration" - ], - "classifier_dropout": 0.0, - "d_ff": 3072, - "d_kv": 64, - "d_model": 768, - "decoder_start_token_id": 0, - "dense_act_fn": "relu", - "dropout_rate": 0.1, - "eos_token_id": 1, - "feed_forward_proj": "relu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": false, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "n_positions": 512, - "num_decoder_layers": 12, - "num_heads": 12, - "num_layers": 12, - "output_past": true, - "pad_token_id": 0, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "task_specific_params": { - "summarization": { - "early_stopping": true, - "length_penalty": 2.0, - "max_length": 200, - "min_length": 30, - "no_repeat_ngram_size": 3, - "num_beams": 4, - "prefix": "summarize: " - }, - "translation_en_to_de": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to German: " - }, - "translation_en_to_fr": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to French: " - }, - "translation_en_to_ro": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to Romanian: " - } - }, - "transformers_version": "4.46.3", - "use_cache": true, - "vocab_size": 32128 - } - - Config of the audio_encoder: is overwritten by shared audio_encoder config: EncodecConfig { - "_name_or_path": "facebook/encodec_32khz", - "architectures": [ - "EncodecModel" - ], - "audio_channels": 1, - "chunk_length_s": null, - "codebook_dim": 128, - "codebook_size": 2048, - "compress": 2, - "dilation_growth_rate": 2, - "hidden_size": 128, - "kernel_size": 7, - "last_kernel_size": 7, - "model_type": "encodec", - "norm_type": "weight_norm", - "normalize": false, - "num_filters": 64, - "num_lstm_layers": 2, - "num_residual_layers": 1, - "overlap": null, - "pad_mode": "reflect", - "residual_kernel_size": 3, - "sampling_rate": 32000, - "target_bandwidths": [ - 2.2 - ], - "torch_dtype": "float32", - "transformers_version": "4.46.3", - "trim_right_ratio": 1.0, - "upsampling_ratios": [ - 8, - 5, - 4, - 4 - ], - "use_causal_conv": false, - "use_conv_shortcut": false - } - - Config of the decoder: is overwritten by shared decoder config: MusicgenDecoderConfig { - "activation_dropout": 0.0, - "activation_function": "gelu", - "attention_dropout": 0.0, - "audio_channels": 1, - "bos_token_id": 2048, - "classifier_dropout": 0.0, - "dropout": 0.1, - "ffn_dim": 4096, - "hidden_size": 1024, - "initializer_factor": 0.02, - "layerdrop": 0.0, - "max_position_embeddings": 2048, - "model_type": "musicgen_decoder", - "num_attention_heads": 16, - "num_codebooks": 4, - "num_hidden_layers": 24, - "pad_token_id": 2048, - "scale_embedding": false, - "tie_word_embeddings": false, - "transformers_version": "4.46.3", - "use_cache": true, - "vocab_size": 2048 - } - + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") In the cell below user is free to change the desired music sample @@ -346,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -431,9 +314,6 @@ runtime .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. 2. Convert MusicGen Language Model @@ -775,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 9cefe7216f2076..337458e35bbf0c 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -16,6 +16,7 @@ OpenVINO. Additionally, we will optimize model using - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ +- `Download PyTorch model <#download-pytorch-model>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ - `Convert model to OpenVINO IR @@ -50,23 +51,23 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "transformers>=4.45" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.14" - %pip install -q -U "openvino-tokenizers[transformers]>=2024.5.0" "openvino>=2024.5.0" - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.13" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly "openvino-tokenizers[transformers]" "openvino>=2024.4.0" + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" .. parsed-literal:: + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. - ERROR: Ignored the following versions that require a different python version: 2.14.0 Requires-Python >=3.9 - ERROR: Could not find a version that satisfies the requirement nncf>=2.14 (from versions: 1.4, 1.4.1, 1.5.0, 1.6.0, 1.7.0, 1.7.1, 2.0.0, 2.0.1, 2.0.2, 2.1.0, 2.2.0, 2.3.0, 2.4.0, 2.5.0, 2.6.0, 2.7.0, 2.8.0, 2.8.1, 2.9.0, 2.10.0, 2.11.0, 2.12.0, 2.13.0) - ERROR: No matching distribution found for nncf>=2.14 Note: you may need to restart the kernel to use updated packages. - ERROR: Ignored the following versions that require a different python version: 2024.5.0.0 Requires-Python >=3.9 - ERROR: Could not find a version that satisfies the requirement openvino-tokenizers>=2024.5.0 (from versions: 2023.3.0.0, 2024.0.0.0, 2024.1.0.0, 2024.1.0.2, 2024.2.0.0, 2024.3.0.0, 2024.4.0.0, 2024.4.1.0.dev20240926) - ERROR: No matching distribution found for openvino-tokenizers>=2024.5.0 + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -76,7 +77,6 @@ Prerequisites import requests helper_file = Path("ov_nano_llava_helper.py") - cmd_helper_file = Path("cmd_helper.py") if not helper_file.exists(): r = requests.get( @@ -84,10 +84,6 @@ Prerequisites ) helper_file.open("w").write(r.text) - if not cmd_helper_file.exists(): - r = requests.get(url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{cmd_helper_file.name}") - cmd_helper_file.open("w").write(r.text) - Select Model ------------ @@ -131,12 +127,40 @@ Download PyTorch model .. code:: ipython3 - from ov_nano_llava_helper import converted_model_exists, copy_model_files + from ov_nano_llava_helper import download_original_model, converted_model_exists, copy_model_files model_id = model_dropdown.value model_dir = Path(model_id.split("/")[-1]) ov_model_dir = Path("ov_" + model_dir.name) / "FP16" + if not converted_model_exists(ov_model_dir): + download_original_model(model_id, model_dir) + + + +.. parsed-literal:: + + Fetching 14 files: 0%| | 0/14 [00:00 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:169: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): - Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. - + OpenVINO and OpenVINO Tokenizers versions are not binary compatible. + OpenVINO version: 2024.5.0-16993 + OpenVINO Tokenizers version: 2024.5.0.0 + First 3 numbers should be the same. Update OpenVINO Tokenizers to compatible version. It is recommended to use the same day builds for pre-release version. To install both OpenVINO and OpenVINO Tokenizers release version perform: + pip install --force-reinstall openvino openvino-tokenizers + To update both OpenVINO and OpenVINO Tokenizers to the latest pre-release version perform: + pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + Tokenizer won't be converted. + Traceback (most recent call last): + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in + sys.exit(main()) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + service.run() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run + main_export( + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export + core = Core() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init + self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object + RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: + Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE -.. parsed-literal:: - - [ WARNING ] Unexpectedly found already patched module model.layers.22.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.22.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.mm_projector.0 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module model.mm_projector.2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module lm_head while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. - [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. Compress Model weights to 4 and 8 bits using NNCF @@ -530,11 +380,12 @@ image encoder model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. warning_deprecated( - 2024-12-10 02:33:42.983675: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:33:43.008813: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:09:38.791476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:09:38.825207: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:09:39.427301: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -657,11 +508,10 @@ Select device import requests - if not Path("notebook_utils.py").exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) from notebook_utils import device_widget @@ -708,14 +558,8 @@ can use the same tokenizer and image processor that provided with model. messages = [{"role": "user", "content": f"\n{prompt}"}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - test_image = Path("nanollava.png") - - if not test_image.exists(): - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" - image = Image.open(requests.get(url, stream=True).raw) - image.save(test_image) - else: - image = Image.open(test_image) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" + image = Image.open(requests.get(url, stream=True).raw) image_tensor = process_images(image, None, processor) input_ids, attention_mask = process_text_input(text, tokenizer) @@ -725,7 +569,7 @@ can use the same tokenizer and image processor that provided with model. print(f"Question:\n{prompt}") print("Answer:") - output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, pixel_values=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) @@ -742,7 +586,8 @@ can use the same tokenizer and image processor that provided with model. Question: Describe this image in detail Answer: - The image features a white, fluffy lamb with a big, bright smile, standing next to a fire. The lamb's face is detailed, with black eyes that are slightly squinty, and a mouth that's slightly open. It seems to be enjoying the heat from the fire, as it is seen looking down. The lamb's legs are also visible, and they appear to be furry. The lamb's tail is long and appears to be fluffy as well. The lamb's ears are also visible and are pink. The lamb's face is depicted in detail, with small black eyes and black nostrils. The lamb's nose is also + The image features a small, adorable white lamb standing amidst a fire. The lamb's fur is fluffy and white, and it is adorned with tiny black eyes that are bright and lively. The lamb's face is cute, with a small black nose and a small mouth. It seems like the lamb is looking straight at the camera, making it appear even more adorable. + The lamb's right ear is visible, and it is white and pink. The lamb's right eye is also black and pink. The lamb's face is quite detailed, with the nose and mouth visible. There are also details like the lamb's right foot, which is white Interactive demo @@ -834,7 +679,7 @@ Interactive demo generation_kwargs = dict( input_ids=input_ids, attention_mask=attention_mask, - pixel_values=image_tensor, + images=image_tensor, streamer=streamer, max_new_tokens=128, stopping_criteria=[stopping_criteria], diff --git a/docs/notebooks/notebooks_with_binder_buttons.txt b/docs/notebooks/notebooks_with_binder_buttons.txt index 58f31aaae508c8..ce9cb50da47907 100644 --- a/docs/notebooks/notebooks_with_binder_buttons.txt +++ b/docs/notebooks/notebooks_with_binder_buttons.txt @@ -7,6 +7,7 @@ convert-to-openvino cross-lingual-books-alignment depth-anything detectron2-to-openvino +distilbert-sequence-classification fast-segment-anything handwritten-ocr hello-detection diff --git a/docs/notebooks/notebooks_with_colab_buttons.txt b/docs/notebooks/notebooks_with_colab_buttons.txt index 2361fbe9a19c69..0f45238db3a4fb 100644 --- a/docs/notebooks/notebooks_with_colab_buttons.txt +++ b/docs/notebooks/notebooks_with_colab_buttons.txt @@ -1,4 +1,5 @@ 3D-segmentation-point-clouds +amused-lightweight-text-to-image async-api auto-device clip-language-saliency-map @@ -7,6 +8,7 @@ cross-lingual-books-alignment depth-anything depth-anything-v2 detectron2-to-openvino +distilbert-sequence-classification explainable-ai-1-basic explainable-ai-2-deep-dive explainable-ai-3-map-interpretation @@ -22,7 +24,6 @@ knowledge-graphs-conve language-quantize-bert magika-content-type-recognition mobileclip-video-search -modelscope-to-openvino music-generation named-entity-recognition nano-llava-multimodal-chatbot diff --git a/docs/notebooks/nuextract-structure-extraction-with-output.rst b/docs/notebooks/nuextract-structure-extraction-with-output.rst index 8dd88ca62bd161..fc2d250626fba4 100644 --- a/docs/notebooks/nuextract-structure-extraction-with-output.rst +++ b/docs/notebooks/nuextract-structure-extraction-with-output.rst @@ -391,9 +391,9 @@ LLMPipeline. .. code:: ipython3 - import openvino_genai as ov_genai + from openvino_genai import LLMPipeline - pipe = ov_genai.LLMPipeline(model_dir.as_posix(), device.value) + pipe = LLMPipeline(model_dir.as_posix(), device.value) def run_structure_extraction(text: str, schema: str) -> str: diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index fc055f6e7ae63e..a34f72f5d8ff1e 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -84,7 +84,7 @@ Install requirements .. parsed-literal:: - 24624 + 24692 @@ -136,12 +136,12 @@ Download and convert the Model .. parsed-literal:: - 100%|██████████| 6.25M/6.25M [00:00<00:00, 26.8MB/s] + 100%|██████████| 6.25M/6.25M [00:00<00:00, 25.9MB/s] .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs PyTorch: starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) @@ -150,7 +150,7 @@ Download and convert the Model OpenVINO: export success ✅ 1.3s, saved as 'yolov8n_openvino_model/' (6.4 MB) Export complete (1.5s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam Predict: yolo predict task=detect model=yolov8n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml half Visualize: https://netron.app @@ -222,7 +222,7 @@ best performance. For that purpose, just use ``AUTO``. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolov8n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... diff --git a/docs/notebooks/omniparser-with-output.rst b/docs/notebooks/omniparser-with-output.rst deleted file mode 100644 index e22ce49105f78d..00000000000000 --- a/docs/notebooks/omniparser-with-output.rst +++ /dev/null @@ -1,680 +0,0 @@ -Screen Parsing with OmniParser and OpenVINO -=========================================== - -Recent breakthrough in Visual Language Processing and Large Language -models made significant strides in understanding and interacting with -the world through text and images. However, accurately parsing and -understanding complex graphical user interfaces (GUIs) remains a -significant challenge. OmniParser is a comprehensive method for parsing -user interface screenshots into structured and easy-to-understand -elements. This enables more accurate and efficient interaction with -GUIs, empowering AI agents to perform tasks across various platforms and -applications. - -|image0| - -More details about model can be found in `Microsoft blog -post `__, -`paper `__, `original -repo `__ and `model -card `__. In this tutorial -we consider how to run OmniParser using OpenVINO. - -**Table of contents:** - -- `Prerequisites <#prerequisites>`__ -- `Prepare models <#prepare-models>`__ - - - `Convert models to OpenVINO Intermediate representation - format <#convert-models-to-openvino-intermediate-representation-format>`__ - - - `Icon Detector <#icon-detector>`__ - - `Screen captioning model <#screen-captioning-model>`__ - -- `Run OmniParser using OpenVINO <#run-omniparser-using-openvino>`__ - - - `Icon Detector <#icon-detector>`__ - - - `Select inference device for icon - detector <#select-inference-device-for-icon-detector>`__ - - - `Screen regions captioning <#screen-regions-captioning>`__ - - - `Select device for screen region - captioning <#select-device-for-screen-region-captioning>`__ - - - `Recognition text on the - screen <#recognition-text-on-the-screen>`__ - - - `Select device for OCR <#select-device-for-ocr>`__ - - - `Run model inference <#run-model-inference>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -.. |image0| image:: https://microsoft.github.io/OmniParser/static/images/flow_merged0.png - -Prerequisites -------------- - - - -.. code:: ipython3 - - import platform - - %pip install -q "torch>=2.1" easyocr torchvision accelerate "supervision==0.18.0" "transformers>=4.45" timm "einops==0.8.0" "ultralytics==8.1.24" pillow opencv-python "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.4.0" - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - from pathlib import Path - import requests - - notebook_utils_path = Path("notebook_utils.py") - florence_helper_path = Path("ov_florence2_helper.py") - omniparser_helper_path = Path("ov_omniparser_helper.py") - - if not notebook_utils_path.exists(): - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - notebook_utils_path.open("w", encoding="utf-8").write(r.text) - - if not florence_helper_path.exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/florence2/ov_florence2_helper.py") - florence_helper_path.open("w", encoding="utf-8").write(r.text) - - if not omniparser_helper_path.exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/omniparser/ov_omniparser_helper.py") - omniparser_helper_path.open("w", encoding="utf-8").write(r.text) - -Prepare models --------------- - - - -OmniParser leverages a two-step process: 1. Interactable Region -Detection: - Identifies clickable elements like buttons and icons within -a UI. - Employs a specialized model trained on a diverse dataset of web -pages. - Accurately detects interactive elements, even in complex UIs. - -2. Semantic Captioning: - - - Assigns meaningful descriptions to detected elements. - - Combines optical character recognition (OCR) and a captioning - model. - - Provides context for accurate action generation. - -Convert models to OpenVINO Intermediate representation format -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For starting work with OpenVINO -we should convert models to OpenVINO Intermediate Representation format -first. - -`OpenVINO model conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original model instance and example input for tracing and returns -``ov.Model`` representing this model in OpenVINO framework. Converted -model can be used for saving on disk using ``ov.save_model`` function or -directly loading on device using ``core.complie_model``. - -Let’s consider each pipeline part. - -Icon Detector -^^^^^^^^^^^^^ - - - -Icon detector in OmniParser is represented by YOLO based model trained -on curated by model authors interactable icon detection dataset. - -For conversion and model inference we will utilize Ultralytics provided -API. You can find more examples of this API usage in these -`tutorials `__ - -.. code:: ipython3 - - from ov_omniparser_helper import download_omniparser_icon_detector - - icon_detector_dir = download_omniparser_icon_detector() - - -.. parsed-literal:: - - 2024-12-10 02:35:42.631431: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:35:42.657651: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - - -.. parsed-literal:: - - best.pt: 0%| | 0.00/11.7M [00:00`__. - -.. code:: ipython3 - - from ov_omniparser_helper import download_omniparser_florence_model - - florence_caption_dir = download_omniparser_florence_model() - - - -.. parsed-literal:: - - Fetching 15 files: 0%| | 0/15 [00:00`__ is a python module for -extracting text from image. It is a general OCR that can read both -natural scene text and dense text in document and supports 80+ -languages. EasyOCR utilizes AI for detection text regions and recognize -text inside of predicted regions. We will also utilize both text -detection and recognition models using OpenVINO. - -Select device for OCR -^^^^^^^^^^^^^^^^^^^^^ - - - -.. code:: ipython3 - - import ipywidgets as widgets - - device_detector = device_widget(exclude=["NPU"], description="Detector device:") - device_recognizer = device_widget(exclude=["NPU"], description="Recognizer device:") - - device_box = widgets.VBox([device_detector, device_recognizer]) - device_box - - - - -.. parsed-literal:: - - VBox(children=(Dropdown(description='Detector device:', index=1, options=('CPU', 'AUTO'), value='AUTO'), Dropd… - - - -.. code:: ipython3 - - from ov_omniparser_helper import easyocr_reader - - # Uncomment the line to see easyocr_reader helper code - # ??easyocr_reader - -.. code:: ipython3 - - reader = easyocr_reader("weights/easyocr", device_detector.value, device_recognizer.value) - - -.. parsed-literal:: - - Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU. - - - - -.. code:: ipython3 - - from PIL import Image - - test_image_path = Path("examples/windows_home.png") - test_image_path.parent.mkdir(exist_ok=True, parents=True) - - if not test_image_path.exists(): - Image.open(requests.get("https://github.com/microsoft/OmniParser/blob/master/imgs/windows_home.png?raw=true", stream=True).raw).save(test_image_path) - -Run model inference -~~~~~~~~~~~~~~~~~~~ - - - -``process_image`` function defined in ``ov_omniparser_helper.py`` -provides easy-to-use interface for screen parsing process. - -.. code:: ipython3 - - from ov_omniparser_helper import process_image - - # Uncomment this line to see process_image code - # ??process_image - -.. code:: ipython3 - - procesed_image, label_coordinates, icon_descriptions = process_image( - test_image_path, ov_icon_detector, {"model": ov_icon_caption_gen, "processor": processor}, reader - ) - - -.. parsed-literal:: - - - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/omniparser/examples/windows_home.png: 640x640 32 0s, 37.7ms - Speed: 2.5ms preprocess, 37.7ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640) - finish processing - - -Function returns image with drawn detected boxes, boxes coordinates and -description for each region. - -.. code:: ipython3 - - display(procesed_image.resize((1200, 1200))) - print(icon_descriptions) - - - -.. image:: omniparser-with-output_files/omniparser-with-output_32_0.png - - -.. parsed-literal:: - - Text Box ID 0: 3.46 PM - Text Box ID 1: Search - Text Box ID 2: Microsoft - Text Box ID 3: 10/25/2024 - Icon Box ID 4: Microsoft Outlook. - Icon Box ID 5: Image - Icon Box ID 6: Microsoft OneNote. - Icon Box ID 7: Microsoft Office. - Icon Box ID 8: a folder for organizing files. - Icon Box ID 9: Microsoft Office. - Icon Box ID 10: Security shield. - Icon Box ID 11: Microsoft 365. - Icon Box ID 12: Microsoft Edge browser. - Icon Box ID 13: Microsoft Edge browser. - Icon Box ID 14: Decrease - Icon Box ID 15: the Windows operating system. - Icon Box ID 16: mountains and a beach. - Icon Box ID 17: a search function. - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from gradio_helper import make_demo - - - def process_image_gradio(image, box_threshold, iou_threshold, imgsz): - image_result, _, parsed_text = process_image( - image, - ov_icon_detector, - {"model": ov_icon_caption_gen, "processor": processor}, - reader, - box_threshold=box_threshold, - iou_threshold=iou_threshold, - imgsz=imgsz, - ) - return image_result, parsed_text - - - demo = make_demo(process_image_gradio) - - try: - demo.launch(debug=False, height=600) - except Exception: - demo.launch(debug=False, share=True, height=600) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg deleted file mode 100644 index 513db4e6d0da5d..00000000000000 --- a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c83af55e4296ff1dadb270b93c31084e983048437f848323c0e9677d2c3ed22 -size 161384 diff --git a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png deleted file mode 100644 index a09fc0a47cd036..00000000000000 --- a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:382e19a8751851ad8a151bea1f4f7bc4be62b47c7a8a4f70da0a3dae257b0c20 -size 1411816 diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index 3ff77e50d857db..b2b4c8c0f04fdd 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -188,20 +188,20 @@ notebooks. .. parsed-literal:: - classification.xml: 0%| | 0.00/179k [00:00=2.12.0, but you have keras 2.13.1 which is incompatible. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. - tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. - tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. + torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. +.. code:: ipython3 + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import download_file, device_widget + Download checkpoints and load PyTorch model ------------------------------------------- @@ -247,9 +243,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -263,9 +259,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -415,41 +411,40 @@ documentation 0 + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 - %5559 : Float(1, 192, 150, strides=[28800, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - Tensor-likes are not close! - - Mismatched elements: 38094 / 39424 (96.6%) - Greatest absolute difference: 0.7026380896568298 at index (0, 0, 4174) (up to 1e-05 allowed) - Greatest relative difference: 43899.56701030928 at index (0, 0, 2529) (up to 1e-05 allowed) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 39424]) != torch.Size([1, 1, 38656]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - Tensor-likes are not close! - - Mismatched elements: 42 / 6622 (0.6%) - Greatest absolute difference: 1.0 at index (0, 0, 7, 1) (up to 1e-05 allowed) - Greatest relative difference: inf at index (0, 0, 7, 2) (up to 1e-05 allowed) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154, 43]) != torch.Size([1, 1, 151, 43]). + _check_trace( + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154]) != torch.Size([1, 1, 151]). _check_trace( + 2024-11-05 02:13:33.268258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -482,16 +477,16 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -719,7 +714,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -874,7 +869,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -892,7 +887,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1081,7 +1076,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 764bad414c61e9..7dae2290312e68 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -131,13 +131,13 @@ again. .. parsed-literal:: - horizontal-text-detection-0001.bin: 0%| | 0.00/3.70M [00:00 + @@ -375,7 +384,7 @@ may be specified is input data .. parsed-literal:: - + @@ -413,7 +422,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -575,7 +584,7 @@ Compare results on one image .. parsed-literal:: - imagenet_2012.txt: 0%| | 0.00/30.9k [00:00= 3.10. Please make - sure that your environment fulfill to this requirement before running - it - -`OuteTTS-0.1-350M `__ is -a novel text-to-speech synthesis model that leverages pure language -modeling without external adapters or complex architectures, built upon -the LLaMa architecture. It demonstrates that high-quality speech -synthesis is achievable through a straightforward approach using crafted -prompts and audio tokens. - -More details about model can be found in `original -repo `__. - -In this tutorial we consider how to run OuteTTS pipeline using OpenVINO. - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Convert model <#convert-model>`__ -- `Run model inference <#run-model-inference>`__ - - - `Text-to-Speech generation <#text-to-speech-generation>`__ - - `Text-to-Speech generation with Voice - Cloning <#text-to-speech-generation-with-voice-cloning>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -.. code:: ipython3 - - import platform - - %pip install -q "torch>=2.1" "torchaudio" "einops" "transformers>=4.46.1" "loguru" "inflect" "pesq" "torchcrepe" "natsort" "polars" uroman mecab-python3 unidic-lite --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "gradio>=4.19" "openvino>=2024.4.0" "tqdm" "pyyaml" "librosa" "soundfile" - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() == "Darwin": - %pip install -q "numpy<2.0.0" - -.. code:: ipython3 - - import requests - from pathlib import Path - - utility_files = ["cmd_helper.py", "notebook_utils.py"] - base_utility_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" - - for utility_file in utility_files: - if not Path(utility_file).exists(): - r = requests.get(base_utility_url + utility_file) - with Path(utility_file).open("w") as f: - f.write(r.text) - - - helper_files = ["gradio_helper.py", "ov_outetts_helper.py"] - base_helper_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/outetts-text-to-speech" - - for helper_file in helper_files: - if not Path(helper_file).exists(): - r = requests.get(base_helper_url + helper_file) - with Path(helper_file).open("w") as f: - f.write(r.text) - -.. code:: ipython3 - - from cmd_helper import clone_repo - - repo_path = clone_repo("https://github.com/edwko/OuteTTS.git") - - interface_path = repo_path / "outetts/version/v1/interface.py" - - updated_version = interface_path.exists() - - if not updated_version: - interface_pth = repo_path / "outetts/v0_1/interface.py" - orig_interface_path = interface_path.parent / "_orig_interface.py" - - if not updated_version and not orig_interface_path.exists(): - interface_path.rename(orig_interface_path) - # sounddevice requires to install manually additional libraries, as we do not plan to use it for audio playing - # move it closer to its usage for avoid errors - with orig_interface_path.open("r") as in_file: - content = in_file.read() - upd_content = content.replace("import sounddevice as sd", "") - upd_content = upd_content.replace("sd.play", "import sounddevice as sd\n sd.play") - with interface_path.open("w") as out_file: - out_file.write(upd_content) - - %pip install -q {repo_path} --extra-index-url https://download.pytorch.org/whl/cpu - -Convert model -------------- - - - -OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation format. For convenience, we will use OpenVINO integration -with HuggingFace Optimum. `Optimum -Intel `__ is the -interface between the Transformers and Diffusers libraries and the -different tools and libraries provided by Intel to accelerate end-to-end -pipelines on Intel architectures. - -Among other use cases, Optimum Intel provides a simple interface to -optimize your Transformers and Diffusers models, convert them to the -OpenVINO Intermediate Representation (IR) format and run inference using -OpenVINO Runtime. ``optimum-cli`` provides command line interface for -model conversion and optimization. - -General command format: - -.. code:: bash - - optimum-cli export openvino --model --task - -where task is task to export the model for, if not specified, the task -will be auto-inferred based on the model. You can find a mapping between -tasks and model classes in Optimum TaskManager -`documentation `__. -Additionally, you can specify weights compression using -``--weight-format`` argument with one of following options: ``fp32``, -``fp16``, ``int8`` and ``int4``. Fro int8 and int4 -`nncf `__ will be used for -weight compression. More details about model export provided in `Optimum -Intel -documentation `__. - -As OuteTTS utilizes pure language modeling approach, model conversion -process remains the same like conversion LLaMa models family for text -generation purposes. - -.. code:: ipython3 - - from cmd_helper import optimum_cli - - model_id = "OuteAI/OuteTTS-0.1-350M" - model_dir = Path(model_id.split("/")[-1] + "-ov") - - if not model_dir.exists(): - optimum_cli(model_id, model_dir, additional_args={"task": "text-generation-with-past"}) - -Run model inference -------------------- - - - -OpenVINO integration with Optimum Intel provides ready-to-use API for -model inference that can be used for smooth integration with -transformers-based solutions. For loading model, we will use -``OVModelForCausalLM`` class that have compatible interface with -Transformers LLaMa implementation. For loading a model, -``from_pretrained`` method should be used. It accepts path to the model -directory or model_id from HuggingFace hub (if model is not converted to -OpenVINO format, conversion will be triggered automatically). -Additionally, we can provide an inference device, quantization config -(if model has not been quantized yet) and device-specific OpenVINO -Runtime configuration. More details about model inference with Optimum -Intel can be found in -`documentation `__. -We will use ``OVModelForCausalLM`` as replacement of original -``AutoModelForCausalLM`` in ``InterfaceHF``. - -.. code:: ipython3 - - from notebook_utils import device_widget - - device = device_widget(exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - from ov_outetts_helper import InterfaceOV, OVHFModel # noqa: F401 - - # Uncomment these lines to see pipeline details - # ??InterfaceOV - # ??OVHFModel - - -.. parsed-literal:: - - 2024-11-29 11:48:51.975233: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-29 11:48:51.989550: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1732866532.005718 2314480 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1732866532.010517 2314480 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-11-29 11:48:52.027376: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - -.. code:: ipython3 - - interface = InterfaceOV(model_dir, device.value) - - -.. parsed-literal:: - - making attention of type 'vanilla' with 768 in_channels - - -Text-to-Speech generation -~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Now let’s see model in action. Providing input text to ``generate`` -method of interface, model returns tensor that represents output audio -with random speaker characteristics. - -.. code:: ipython3 - - output = interface.generate(text="Hello, I'm working!", temperature=0.1, repetition_penalty=1.1, max_length=4096) - - -.. parsed-literal:: - - The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - Setting `pad_token_id` to `eos_token_id`:None for open-end generation. - The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - - -.. code:: ipython3 - - import IPython.display as ipd - - ipd.Audio(output.audio[0].numpy(), rate=output.sr) - - - - -.. raw:: html - - - - - - - -Text-to-Speech generation with Voice Cloning -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Additionally, we can specify reference voice for generation by providing -reference audio and transcript for it. ``interface.create_speaker`` -processes reference audio and text to set of features used for audio -description. - -.. code:: ipython3 - - from notebook_utils import download_file - - ref_audio_url = "https://huggingface.co/OuteAI/OuteTTS-0.1-350M/resolve/main/samples/2.wav" - - file_path = download_file(ref_audio_url) - - -.. parsed-literal:: - - '2.wav' already exists. - - -.. code:: ipython3 - - ipd.Audio(file_path) - - - - -.. raw:: html - - - - - - - -.. code:: ipython3 - - speaker = interface.create_speaker(file_path, "Hello, I can speak pretty well, but sometimes I make some mistakes.") - - # Save the speaker to a file - interface.save_speaker(speaker, "speaker.pkl") - - # Load the speaker from a file - speaker = interface.load_speaker("speaker.pkl") - - # Generate TTS with the custom voice - output = interface.generate(text="This is a cloned voice speaking", speaker=speaker, temperature=0.1, repetition_penalty=1.1, max_length=4096) - - -.. parsed-literal:: - - The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - Setting `pad_token_id` to `eos_token_id`:None for open-end generation. - - -.. code:: ipython3 - - ipd.Audio(output.audio[0].numpy(), rate=output.sr) - - - - -.. raw:: html - - - - - - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from gradio_helper import make_demo - - demo = make_demo(interface) - - try: - demo.launch(debug=True) - except Exception: - demo.launch(share=True, debug=True) diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index aa054a40e73a07..9f7510cd5efe96 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -76,12 +76,7 @@ Guide =2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. - tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. - tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -214,7 +209,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - ch_PP-OCRv3_det_infer.tar: 0%| | 0.00/3.65M [00:00=2.5.1,<2.6.0" + import platform + + if platform.system() == "Windows": + %pip install -q "paddlepaddle>=2.5.1,<2.6.0" + else: + %pip install -q "paddlepaddle>=2.5.1" %pip install -q "paddleclas>=2.5.2" --no-deps - %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" "opencv-python" "scikit-learn" + %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" # Install openvino package %pip install -q "openvino>=2023.1.0" @@ -68,13 +73,31 @@ Imports Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - paddleclas 2.6.0 requires easydict, which is not installed. - paddleclas 2.6.0 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - paddleclas 2.6.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. + paddleclas 2.5.2 requires easydict, which is not installed. + paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. + paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. +.. code:: ipython3 + + if platform.system() == "Linux": + !wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + !sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + + +.. parsed-literal:: + + --2024-11-05 02:15:59-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 + Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. + Proxy request sent, awaiting response... 404 Not Found + 2024-11-05 02:16:00 ERROR 404: Not Found. + + dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory + + .. code:: ipython3 import time @@ -101,8 +124,8 @@ Imports .. parsed-literal:: - 2024-12-10 02:42:23 INFO: Loading faiss with AVX512 support. - 2024-12-10 02:42:23 INFO: Successfully loaded faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Loading faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -150,13 +173,13 @@ PaddleHub. This may take a while. .. parsed-literal:: - coco_close.png: 0%| | 0.00/133k [00:00 + -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png To decode the labels predicted by the model to names of classes, we need @@ -380,7 +403,7 @@ Notebook `__ for more information. -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png Timing and Comparison @@ -439,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0071 seconds per image, FPS: 141.67 + PaddlePaddle model on CPU: 0.0074 seconds per image, FPS: 134.37 PaddlePaddle result: Labrador retriever, 0.75138 @@ -450,7 +473,7 @@ Note that many optimizations are possible to improve the performance. -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png Select inference device @@ -500,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 376.00 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 373.31 OpenVINO result: Labrador retriever, 0.74909 @@ -511,7 +534,7 @@ select device from dropdown list for running inference using OpenVINO -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png References diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png deleted file mode 100644 index 35e0c81123f0a1..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99b8398ef76f2959d210e2d30bb44420f8d34a885a4480bc26e2af6627ba7119 -size 120883 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png new file mode 100644 index 00000000000000..97c14460591759 --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba922b89ca992098fd516d86f4d0c97858a8264664f9a49d431978b790a9135f +size 120883 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png deleted file mode 100644 index 35c91e327be1ce..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png new file mode 100644 index 00000000000000..74feaaeb12e5bc --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png deleted file mode 100644 index 35c91e327be1ce..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png new file mode 100644 index 00000000000000..74feaaeb12e5bc --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png deleted file mode 100644 index 35c91e327be1ce..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png new file mode 100644 index 00000000000000..74feaaeb12e5bc --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png deleted file mode 100644 index 35c91e327be1ce..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png new file mode 100644 index 00000000000000..74feaaeb12e5bc --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba +size 224886 diff --git a/docs/notebooks/paint-by-example-with-output.rst b/docs/notebooks/paint-by-example-with-output.rst new file mode 100644 index 00000000000000..2f1371652c5750 --- /dev/null +++ b/docs/notebooks/paint-by-example-with-output.rst @@ -0,0 +1,1359 @@ +Paint By Example: Exemplar-based Image Editing with Diffusion Models +==================================================================== + + +**Table of contents:** + + +- `Stable Diffusion in Diffusers + library <#stable-diffusion-in-diffusers-library>`__ +- `Download default images <#download-default-images>`__ +- `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ +- `Prepare Inference pipeline <#prepare-inference-pipeline>`__ +- `Select inference device <#select-inference-device>`__ +- `Configure Inference Pipeline <#configure-inference-pipeline>`__ +- `Quantization <#quantization>`__ + + - `Prepare Inference pipeline <#prepare-inference-pipeline>`__ + - `Run quantization <#run-quantization>`__ + - `Run inference and compare inference + time <#run-inference-and-compare-inference-time>`__ + - `Compare UNet file size <#compare-unet-file-size>`__ + +- `Interactive inference <#interactive-inference>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Stable Diffusion in Diffusers library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To work with Stable Diffusion, +we will use the Hugging Face +`Diffusers `__ library. To +experiment with in-painting we can use Diffusers which exposes the +`StableDiffusionInpaintPipeline `__ +similar to the `other Diffusers +pipelines `__. +The code below demonstrates how to create +``StableDiffusionInpaintPipeline`` using +``stable-diffusion-2-inpainting``. To create the drawing tool we will +install Gradio for handling user interaction. + +This is the overall flow of the application: + +.. figure:: https://user-images.githubusercontent.com/103226580/236954918-f364b227-293c-4f78-a9bf-9dcebcb1034a.png + :alt: Flow Diagram + + Flow Diagram + +.. code:: ipython3 + + %pip install -q "torch>=2.1" torchvision --extra-index-url "https://download.pytorch.org/whl/cpu" + %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" "matplotlib>=3.4" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm + +Download the model from `HuggingFace +Paint-by-Example `__. +This might take several minutes because it is over 5GB + +.. code:: ipython3 + + from diffusers import DiffusionPipeline + from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler + + + pipeline = DiffusionPipeline.from_pretrained("Fantasy-Studio/Paint-By-Example") + + scheduler_inpaint = DDIMScheduler.from_config(pipeline.scheduler.config) + +.. code:: ipython3 + + import gc + + extractor = pipeline.feature_extractor + image_encoder = pipeline.image_encoder + image_encoder.eval() + unet_inpaint = pipeline.unet + unet_inpaint.eval() + vae_inpaint = pipeline.vae + vae_inpaint.eval() + + del pipeline + gc.collect(); + +Download default images +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Download default images. + +.. code:: ipython3 + + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import download_file, device_widget, quantization_widget + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377210-edc98e97-0e43-4796-b771-dacd074c39ea.png", + "0.png", + "data/image", + ) + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377233-b2c2d902-d379-415a-8183-5bdd37c52429.png", + "1.png", + "data/image", + ) + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377248-da1db61e-3521-4cdb-85c8-1386d360ce22.png", + "2.png", + "data/image", + ) + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377279-fa496f17-e850-4351-87c5-2552dfbc4633.jpg", + "bird.jpg", + "data/reference", + ) + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377298-06a25ff2-84d8-4d46-95cd-8c25efa690d8.jpg", + "car.jpg", + "data/reference", + ) + + download_file( + "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377318-8841a801-1933-4523-a433-7d2fb64c47e6.jpg", + "dog.jpg", + "data/reference", + ) + +Convert models to OpenVINO Intermediate representation (IR) format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Adapted from `Stable Diffusion v2 Infinite Zoom +notebook `__ + +.. code:: ipython3 + + from pathlib import Path + import torch + import numpy as np + import openvino as ov + + model_dir = Path("model") + model_dir.mkdir(exist_ok=True) + sd2_inpainting_model_dir = Path("model/paint_by_example") + sd2_inpainting_model_dir.mkdir(exist_ok=True) + +Functions to convert to OpenVINO IR format + +.. code:: ipython3 + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + + def convert_image_encoder(image_encoder: torch.nn.Module, ir_path: Path): + """ + Convert Image Encoder model to IR. + Function accepts pipeline, prepares example inputs for conversion + Parameters: + image_encoder (torch.nn.Module): image encoder PyTorch model + ir_path (Path): File for storing model + Returns: + None + """ + + class ImageEncoderWrapper(torch.nn.Module): + def __init__(self, image_encoder): + super().__init__() + self.image_encoder = image_encoder + + def forward(self, image): + image_embeddings, negative_prompt_embeds = self.image_encoder(image, return_uncond_vector=True) + return image_embeddings, negative_prompt_embeds + + if not ir_path.exists(): + image_encoder = ImageEncoderWrapper(image_encoder) + image_encoder.eval() + input_ids = torch.randn((1, 3, 224, 224)) + # switch model to inference mode + + # disable gradients calculation for reducing memory consumption + with torch.no_grad(): + ov_model = ov.convert_model(image_encoder, example_input=input_ids, input=([1, 3, 224, 224],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print("Image Encoder successfully converted to IR") + + + def convert_unet( + unet: torch.nn.Module, + ir_path: Path, + num_channels: int = 4, + width: int = 64, + height: int = 64, + ): + """ + Convert Unet model to IR format. + Function accepts pipeline, prepares example inputs for conversion + Parameters: + unet (torch.nn.Module): UNet PyTorch model + ir_path (Path): File for storing model + num_channels (int, optional, 4): number of input channels + width (int, optional, 64): input width + height (int, optional, 64): input height + Returns: + None + """ + dtype_mapping = {torch.float32: ov.Type.f32, torch.float64: ov.Type.f64} + if not ir_path.exists(): + # prepare inputs + encoder_hidden_state = torch.ones((2, 1, 768)) + latents_shape = (2, num_channels, width, height) + latents = torch.randn(latents_shape) + t = torch.from_numpy(np.array(1, dtype=np.float32)) + unet.eval() + dummy_inputs = (latents, t, encoder_hidden_state) + input_info = [] + for input_tensor in dummy_inputs: + shape = ov.PartialShape(tuple(input_tensor.shape)) + element_type = dtype_mapping[input_tensor.dtype] + input_info.append((shape, element_type)) + + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print("U-Net successfully converted to IR") + + + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path, width: int = 512, height: int = 512): + """ + Convert VAE model to IR format. + Function accepts VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE PyTorch model + ir_path (Path): File for storing model + width (int, optional, 512): input width + height (int, optional, 512): input height + Returns: + None + """ + + class VAEEncoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, image): + latents = self.vae.encode(image).latent_dist.sample() + return latents + + if not ir_path.exists(): + vae_encoder = VAEEncoderWrapper(vae) + vae_encoder.eval() + image = torch.zeros((1, 3, width, height)) + with torch.no_grad(): + ov_model = ov.convert_model(vae_encoder, example_input=image, input=([1, 3, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print("VAE encoder successfully converted to IR") + + + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path, width: int = 64, height: int = 64): + """ + Convert VAE decoder model to IR format. + Function accepts VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE model + ir_path (Path): File for storing model + width (int, optional, 64): input width + height (int, optional, 64): input height + Returns: + None + """ + + class VAEDecoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, latents): + latents = 1 / 0.18215 * latents + return self.vae.decode(latents) + + if not ir_path.exists(): + vae_decoder = VAEDecoderWrapper(vae) + latents = torch.zeros((1, 4, width, height)) + + vae_decoder.eval() + with torch.no_grad(): + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=([1, 4, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print("VAE decoder successfully converted to ") + +Do the conversion of the in-painting model: + +.. code:: ipython3 + + IMAGE_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "image_encoder.xml" + + if not IMAGE_ENCODER_OV_PATH_INPAINT.exists(): + convert_image_encoder(image_encoder, IMAGE_ENCODER_OV_PATH_INPAINT) + else: + print(f"Image encoder will be loaded from {IMAGE_ENCODER_OV_PATH_INPAINT}") + + del image_encoder + gc.collect(); + +Do the conversion of the Unet model + +.. code:: ipython3 + + UNET_OV_PATH_INPAINT = sd2_inpainting_model_dir / "unet.xml" + if not UNET_OV_PATH_INPAINT.exists(): + convert_unet(unet_inpaint, UNET_OV_PATH_INPAINT, num_channels=9, width=64, height=64) + del unet_inpaint + gc.collect() + else: + del unet_inpaint + print(f"U-Net will be loaded from {UNET_OV_PATH_INPAINT}") + gc.collect(); + +Do the conversion of the VAE Encoder model + +.. code:: ipython3 + + VAE_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "vae_encoder.xml" + + if not VAE_ENCODER_OV_PATH_INPAINT.exists(): + convert_vae_encoder(vae_inpaint, VAE_ENCODER_OV_PATH_INPAINT, 512, 512) + else: + print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH_INPAINT}") + + VAE_DECODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "vae_decoder.xml" + if not VAE_DECODER_OV_PATH_INPAINT.exists(): + convert_vae_decoder(vae_inpaint, VAE_DECODER_OV_PATH_INPAINT, 64, 64) + else: + print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH_INPAINT}") + + del vae_inpaint + gc.collect(); + +Prepare Inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Function to prepare the mask and masked image. + +Adapted from `Stable Diffusion v2 Infinite Zoom +notebook `__ + +The main difference is that instead of encoding a text prompt it will +now encode an image as the prompt. + +This is the detailed flowchart for the pipeline: + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/103226580/cde2d5c4-2540-4a45-ad9c-339f7a69459d + :alt: pipeline-flowchart + + pipeline-flowchart + +.. code:: ipython3 + + import inspect + from typing import Optional, Union, Dict + + import PIL + import cv2 + + from transformers import CLIPImageProcessor + from diffusers.pipelines.pipeline_utils import DiffusionPipeline + from openvino.runtime import Model + + + def prepare_mask_and_masked_image(image: PIL.Image.Image, mask: PIL.Image.Image): + """ + Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be + converted to ``np.array`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the + ``image`` and ``1`` for the ``mask``. + + The ``image`` will be converted to ``np.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be + binarized (``mask > 0.5``) and cast to ``np.float32`` too. + + Args: + image (Union[np.array, PIL.Image]): The image to inpaint. + It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` + mask (_type_): The mask to apply to the image, i.e. regions to inpaint. + It can be a ``PIL.Image``, or a ``height x width`` ``np.array``. + + Returns: + tuple[np.array]: The pair (mask, masked_image) as ``torch.Tensor`` with 4 + dimensions: ``batch x channels x height x width``. + """ + if isinstance(image, (PIL.Image.Image, np.ndarray)): + image = [image] + + if isinstance(image, list) and isinstance(image[0], PIL.Image.Image): + image = [np.array(i.convert("RGB"))[None, :] for i in image] + image = np.concatenate(image, axis=0) + elif isinstance(image, list) and isinstance(image[0], np.ndarray): + image = np.concatenate([i[None, :] for i in image], axis=0) + + image = image.transpose(0, 3, 1, 2) + image = image.astype(np.float32) / 127.5 - 1.0 + + # preprocess mask + if isinstance(mask, (PIL.Image.Image, np.ndarray)): + mask = [mask] + + if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image): + mask = np.concatenate([np.array(m.convert("L"))[None, None, :] for m in mask], axis=0) + mask = mask.astype(np.float32) / 255.0 + elif isinstance(mask, list) and isinstance(mask[0], np.ndarray): + mask = np.concatenate([m[None, None, :] for m in mask], axis=0) + + mask = 1 - mask + + mask[mask < 0.5] = 0 + mask[mask >= 0.5] = 1 + + masked_image = image * mask + + return mask, masked_image + +Class for the pipeline which will connect all the models together: VAE +decode –> image encode –> tokenizer –> Unet –> VAE model –> scheduler + +.. code:: ipython3 + + class OVStableDiffusionInpaintingPipeline(DiffusionPipeline): + def __init__( + self, + vae_decoder: Model, + image_encoder: Model, + image_processor: CLIPImageProcessor, + unet: Model, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + vae_encoder: Model = None, + ): + """ + Pipeline for text-to-image generation using Stable Diffusion. + Parameters: + vae_decoder (Model): + Variational Auto-Encoder (VAE) Model to decode images to and from latent representations. + image_encoder (Model): + https://huggingface.co/Fantasy-Studio/Paint-by-Example/blob/main/image_encoder/config.json + tokenizer (CLIPTokenizer): + Tokenizer of class CLIPTokenizer(https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet (Model): Conditional U-Net architecture to denoise the encoded image latents. + vae_encoder (Model): + Variational Auto-Encoder (VAE) Model to encode images to latent representation. + scheduler (SchedulerMixin): + A scheduler to be used in combination with unet to denoise the encoded image latents. Can be one of + DDIMScheduler, LMSDiscreteScheduler, or PNDMScheduler. + """ + super().__init__() + self.scheduler = scheduler + self.vae_decoder = vae_decoder + self.vae_encoder = vae_encoder + self.image_encoder = image_encoder + self.unet = unet + self.register_to_config(unet=unet) + self._unet_output = unet.output(0) + self._vae_d_output = vae_decoder.output(0) + self._vae_e_output = vae_encoder.output(0) if vae_encoder is not None else None + self.height = self.unet.input(0).shape[2] * 8 + self.width = self.unet.input(0).shape[3] * 8 + self.image_processor = image_processor + + def prepare_mask_latents( + self, + mask, + masked_image, + height=512, + width=512, + do_classifier_free_guidance=True, + ): + """ + Prepare mask as Unet nput and encode input masked image to latent space using vae encoder + + Parameters: + mask (np.array): input mask array + masked_image (np.array): masked input image tensor + heigh (int, *optional*, 512): generated image height + width (int, *optional*, 512): generated image width + do_classifier_free_guidance (bool, *optional*, True): whether to use classifier free guidance or not + Returns: + mask (np.array): resized mask tensor + masked_image_latents (np.array): masked image encoded into latent space using VAE + """ + mask = torch.nn.functional.interpolate(torch.from_numpy(mask), size=(height // 8, width // 8)) + mask = mask.numpy() + + # encode the mask image into latents space so we can concatenate it to the latents + masked_image_latents = self.vae_encoder(masked_image)[self._vae_e_output] + masked_image_latents = 0.18215 * masked_image_latents + + mask = np.concatenate([mask] * 2) if do_classifier_free_guidance else mask + masked_image_latents = np.concatenate([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + return mask, masked_image_latents + + def __call__( + self, + image: PIL.Image.Image, + mask_image: PIL.Image.Image, + reference_image: PIL.Image.Image, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + eta: Optional[float] = 0, + output_type: Optional[str] = "pil", + seed: Optional[int] = None, + ): + """ + Function invoked when calling the pipeline for generation. + Parameters: + image (PIL.Image.Image): + Source image for inpainting. + mask_image (PIL.Image.Image): + Mask area for inpainting + reference_image (PIL.Image.Image): + Reference image to inpaint in mask area + num_inference_steps (int, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (float, *optional*, defaults to 7.5): + Guidance scale as defined in Classifier-Free Diffusion Guidance(https://arxiv.org/abs/2207.12598). + guidance_scale is defined as `w` of equation 2. + Higher guidance scale encourages to generate images that are closely linked to the text prompt, + usually at the expense of lower image quality. + eta (float, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [DDIMScheduler], will be ignored for others. + output_type (`str`, *optional*, defaults to "pil"): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): PIL.Image.Image or np.array. + seed (int, *optional*, None): + Seed for random generator state initialization. + Returns: + Dictionary with keys: + sample - the last generated image PIL.Image.Image or np.array + """ + if seed is not None: + np.random.seed(seed) + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # get reference image embeddings + image_embeddings = self._encode_image(reference_image, do_classifier_free_guidance=do_classifier_free_guidance) + + # prepare mask + mask, masked_image = prepare_mask_and_masked_image(image, mask_image) + # set timesteps + accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) + extra_set_kwargs = {} + if accepts_offset: + extra_set_kwargs["offset"] = 1 + + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, 1) + latent_timestep = timesteps[:1] + + # get the initial random noise unless the user supplied it + latents, meta = self.prepare_latents(latent_timestep) + mask, masked_image_latents = self.prepare_mask_latents( + mask, + masked_image, + do_classifier_free_guidance=do_classifier_free_guidance, + ) + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + for t in self.progress_bar(timesteps): + # expand the latents if we are doing classifier free guidance + latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + latent_model_input = np.concatenate([latent_model_input, masked_image_latents, mask], axis=1) + # predict the noise residual + noise_pred = self.unet([latent_model_input, np.array(t, dtype=np.float32), image_embeddings])[self._unet_output] + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred[0], noise_pred[1] + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step( + torch.from_numpy(noise_pred), + t, + torch.from_numpy(latents), + **extra_step_kwargs, + )["prev_sample"].numpy() + # scale and decode the image latents with vae + image = self.vae_decoder(latents)[self._vae_d_output] + + image = self.postprocess_image(image, meta, output_type) + return {"sample": image} + + def _encode_image(self, image: PIL.Image.Image, do_classifier_free_guidance: bool = True): + """ + Encodes the image into image encoder hidden states. + + Parameters: + image (PIL.Image.Image): base image to encode + do_classifier_free_guidance (bool): whether to use classifier free guidance or not + Returns: + image_embeddings (np.ndarray): image encoder hidden states + """ + processed_image = self.image_processor(image) + processed_image = processed_image["pixel_values"][0] + processed_image = np.expand_dims(processed_image, axis=0) + + output = self.image_encoder(processed_image) + image_embeddings = output[self.image_encoder.output(0)] + negative_embeddings = output[self.image_encoder.output(1)] + + image_embeddings = np.concatenate([negative_embeddings, image_embeddings]) + + return image_embeddings + + def prepare_latents(self, latent_timestep: torch.Tensor = None): + """ + Function for getting initial latents for starting generation + + Parameters: + latent_timestep (torch.Tensor, *optional*, None): + Predicted by scheduler initial step for image generation, required for latent image mixing with nosie + Returns: + latents (np.ndarray): + Image encoded in latent space + """ + latents_shape = (1, 4, self.height // 8, self.width // 8) + noise = np.random.randn(*latents_shape).astype(np.float32) + # if we use LMSDiscreteScheduler, let's make sure latents are mulitplied by sigmas + if isinstance(self.scheduler, LMSDiscreteScheduler): + noise = noise * self.scheduler.sigmas[0].numpy() + return noise, {} + + def postprocess_image(self, image: np.ndarray, meta: Dict, output_type: str = "pil"): + """ + Postprocessing for decoded image. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), + normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format + + Parameters: + image (np.ndarray): + Generated image + meta (Dict): + Metadata obtained on latents preparing step, can be empty + output_type (str, *optional*, pil): + Output format for result, can be pil or numpy + Returns: + image (List of np.ndarray or PIL.Image.Image): + Postprocessed images + """ + if "padding" in meta: + pad = meta["padding"] + (_, end_h), (_, end_w) = pad[1:3] + h, w = image.shape[2:] + unpad_h = h - end_h + unpad_w = w - end_w + image = image[:, :, :unpad_h, :unpad_w] + image = np.clip(image / 2 + 0.5, 0, 1) + image = np.transpose(image, (0, 2, 3, 1)) + # 9. Convert to PIL + if output_type == "pil": + image = self.numpy_to_pil(image) + if "src_height" in meta: + orig_height, orig_width = meta["src_height"], meta["src_width"] + image = [img.resize((orig_width, orig_height), PIL.Image.Resampling.LANCZOS) for img in image] + else: + if "src_height" in meta: + orig_height, orig_width = meta["src_height"], meta["src_width"] + image = [cv2.resize(img, (orig_width, orig_width)) for img in image] + return image + + def get_timesteps(self, num_inference_steps: int, strength: float): + """ + Helper function for getting scheduler timesteps for generation + In case of image-to-image generation, it updates number of steps according to strength + + Parameters: + num_inference_steps (int): + number of inference steps for generation + strength (float): + value between 0.0 and 1.0, that controls the amount of noise that is added to the input image. + Values that approach 1.0 allow for lots of variations but will also produce images that are not semantically consistent with the input. + """ + # get the original timestep using init_timestep + init_timestep = min(int(num_inference_steps * strength), num_inference_steps) + + t_start = max(num_inference_steps - init_timestep, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps, num_inference_steps - t_start + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + + + +Configure Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Configuration steps: 1. Load models on device 2. Configure tokenizer and +scheduler 3. Create instance of OvStableDiffusionInpaintingPipeline +class + +This can take a while to run. + +.. code:: ipython3 + + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + core = ov.Core() + + + def get_ov_pipeline(): + image_encoder_inpaint = core.compile_model(IMAGE_ENCODER_OV_PATH_INPAINT, device.value) + unet_model_inpaint = core.compile_model(UNET_OV_PATH_INPAINT, device.value) + vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value, ov_config) + vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value, ov_config) + + ov_pipe_inpaint = OVStableDiffusionInpaintingPipeline( + image_processor=extractor, + image_encoder=image_encoder_inpaint, + unet=unet_model_inpaint, + vae_encoder=vae_encoder_inpaint, + vae_decoder=vae_decoder_inpaint, + scheduler=scheduler_inpaint, + ) + + return ov_pipe_inpaint + + + ov_pipe_inpaint = get_ov_pipeline() + +Quantization +------------ + + + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +According to ``StableDiffusionInpaintingPipeline`` structure, UNet used +for iterative denoising of input. It means that model runs in the cycle +repeating inference on each diffusion step, while other parts of +pipeline take part only once. That is why computation cost and speed of +UNet denoising becomes the critical path in the pipeline. Quantizing the +rest of the SD pipeline does not significantly improve inference +performance but can lead to a substantial degradation of accuracy. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize()`` to obtain quantized model. +3. Save the ``INT8`` model using ``openvino.save_model()`` function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + UNET_INT8_OV_PATH = Path("model/unet_int8.xml") + int8_ov_pipe_inpaint = None + + + to_quantize = quantization_widget() + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + # Fetch `skip_kernel_extension` module + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + if to_quantize.value and "GPU" in device.value: + to_quantize.value = False + + %load_ext skip_kernel_extension + +Prepare calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +We use 3 examples from +`Paint-by-Example `__ +to create a calibration dataset. + +.. code:: ipython3 + + import PIL + import requests + from io import BytesIO + + + def download_image(url): + response = requests.get(url) + return PIL.Image.open(BytesIO(response.content)).convert("RGB") + + + example1 = [ + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_1.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_1.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_1.jpg?raw=true", + ] + example2 = [ + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_2.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_2.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_2.jpg?raw=true", + ] + example3 = [ + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_3.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_3.png?raw=true", + "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_3.jpg?raw=true", + ] + examples = [example1, example2, example3] + + + img_examples = [] + for init_image_url, mask_image_url, example_image_url in examples: + init_image = download_image(init_image_url).resize((512, 512)) + mask_image = download_image(mask_image_url).resize((512, 512)) + example_image = download_image(example_image_url).resize((512, 512)) + img_examples.append((init_image, mask_image, example_image)) + +To collect intermediate model inputs for calibration we should customize +``CompiledModel``. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from tqdm.notebook import tqdm + from transformers import set_seed + from typing import Any, Dict, List + + + class CompiledModelDecorator(ov.CompiledModel): + def __init__(self, compiled_model, data_cache: List[Any] = None): + super().__init__(compiled_model) + self.data_cache = data_cache if data_cache else [] + + def __call__(self, *args, **kwargs): + self.data_cache.append(*args) + return super().__call__(*args, **kwargs) + + + def collect_calibration_data(pipeline) -> List[Dict]: + original_unet = pipeline.unet + pipeline.unet = CompiledModelDecorator(original_unet) + pipeline.set_progress_bar_config(disable=True) + prev_example_image = None + for init_image, mask_image, example_image in img_examples: + + _ = pipeline( + image=init_image, + mask_image=mask_image, + reference_image=example_image, + ) + if prev_example_image: + _ = pipeline( + image=init_image, + mask_image=mask_image, + reference_image=prev_example_image, + ) + prev_example_image = example_image + + + calibration_dataset = pipeline.unet.data_cache + pipeline.set_progress_bar_config(disable=False) + pipeline.unet = original_unet + + return calibration_dataset + +.. code:: ipython3 + + %%skip not $to_quantize.value + + UNET_INT8_OV_PATH = Path("model/unet_int8.xml") + if not UNET_INT8_OV_PATH.exists(): + unet_calibration_data = collect_calibration_data(ov_pipe_inpaint) + +Run quantization +~~~~~~~~~~~~~~~~ + + + +Create a quantized model from the pre-trained converted OpenVINO model. + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import nncf + + + def get_quantized_pipeline(): + if UNET_INT8_OV_PATH.exists(): + print("Loading quantized model") + quantized_unet = core.read_model(UNET_INT8_OV_PATH) + else: + unet = core.read_model(UNET_OV_PATH_INPAINT) + quantized_unet = nncf.quantize( + model=unet, + preset=nncf.QuantizationPreset.MIXED, + calibration_dataset=nncf.Dataset(unet_calibration_data), + model_type=nncf.ModelType.TRANSFORMER, + ) + ov.save_model(quantized_unet, UNET_INT8_OV_PATH) + + unet_optimized = core.compile_model(UNET_INT8_OV_PATH, device.value) + + image_encoder_inpaint = core.compile_model(IMAGE_ENCODER_OV_PATH_INPAINT, device.value) + vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value, ov_config) + vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value, ov_config) + + int8_ov_pipe_inpaint = OVStableDiffusionInpaintingPipeline( + image_processor=extractor, + image_encoder=image_encoder_inpaint, + unet=unet_optimized, + vae_encoder=vae_encoder_inpaint, + vae_decoder=vae_decoder_inpaint, + scheduler=scheduler_inpaint, + ) + + return int8_ov_pipe_inpaint + + + int8_ov_pipe_inpaint = get_quantized_pipeline() + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + +.. parsed-literal:: + + INFO:nncf:121 ignored nodes were found by name in the NNCFGraph + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + +Run inference and compare inference time +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OV pipeline: + +.. code:: ipython3 + + init_image, mask_image, example_image = img_examples[1] + + + ov_image = ov_pipe_inpaint(image=init_image, mask_image=mask_image, reference_image=example_image, seed=2) + +Quantized pipeline: + +.. code:: ipython3 + + %%skip not $to_quantize.value + + int8_image = int8_ov_pipe_inpaint(image=init_image, mask_image=mask_image, reference_image=example_image, seed=2) + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import matplotlib.pyplot as plt + from PIL import Image + + def visualize_results(orig_img:Image.Image, optimized_img:Image.Image): + """ + Helper function for results visualization + + Parameters: + orig_img (Image.Image): generated image using FP16 models + optimized_img (Image.Image): generated image using quantized models + Returns: + fig (matplotlib.pyplot.Figure): matplotlib generated figure contains drawing result + """ + orig_title = "FP16 pipeline" + control_title = "INT8 pipeline" + figsize = (20, 20) + fig, axs = plt.subplots(1, 2, figsize=figsize, sharex='all', sharey='all') + list_axes = list(axs.flat) + for a in list_axes: + a.set_xticklabels([]) + a.set_yticklabels([]) + a.get_xaxis().set_visible(False) + a.get_yaxis().set_visible(False) + a.grid(False) + list_axes[0].imshow(np.array(orig_img)) + list_axes[1].imshow(np.array(optimized_img)) + list_axes[0].set_title(orig_title, fontsize=15) + list_axes[1].set_title(control_title, fontsize=15) + + fig.subplots_adjust(wspace=0.01, hspace=0.01) + fig.tight_layout() + return fig + + + visualize_results(ov_image["sample"][0], int8_image["sample"][0]) + + + +.. image:: paint-by-example-with-output_files/paint-by-example-with-output_41_0.png + + +.. code:: ipython3 + + %%skip $to_quantize.value + + display(ov_image["sample"][0]) + +Compare UNet file size +~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + %%skip not $to_quantize.value + + fp16_ir_model_size = UNET_OV_PATH_INPAINT.with_suffix(".bin").stat().st_size / 1024 + quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 + + print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") + print(f"INT8 model size: {quantized_model_size:.2f} KB") + print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") + + +.. parsed-literal:: + + FP16 model size: 1678780.62 KB + INT8 model size: 840725.98 KB + Model compression rate: 1.997 + + +Interactive inference +--------------------- + + + +Choose what model do you want to use in the interactive interface. You +can choose both, FP16 and INT8. + +.. code:: ipython3 + + import ipywidgets as widgets + + available_models = ["FP16"] + + if UNET_INT8_OV_PATH.exists(): + available_models.append("INT8") + + model_to_use = widgets.Select( + options=available_models, + value="FP16", + description="Select model:", + disabled=False, + ) + + model_to_use + + + + +.. parsed-literal:: + + Select(description='Select model:', options=('FP16', 'INT8'), value='FP16') + + + +.. code:: ipython3 + + if "INT8" == model_to_use.value: + chosen_pipeline = int8_ov_pipe_inpaint or get_quantized_pipeline() + ov_pipe_inpaint = None + else: + chosen_pipeline = ov_pipe_inpaint or get_ov_pipeline() + int8_ov_pipe_inpaint = None + + + gc.collect() + +.. code:: ipython3 + + # Code adapated from https://huggingface.co/spaces/Fantasy-Studio/Paint-by-Example/blob/main/app.py + + import os + + + def predict(input_dict, reference, seed, steps): + """ + This function runs when the 'paint' button is pressed. It takes 3 input images. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), + normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format + + Parameters: + input_dict (Dict): + Contains two images in a dictionary + 'image' is the image that will be painted on + 'mask' is the black/white image specifying where to paint (white) and not to paint (black) + image (PIL.Image.Image): + Reference image that will be used by the model to know what to paint in the specified area + seed (int): + Used to initialize the random number generator state + steps (int): + The number of denoising steps to run during inference. Low = fast/low quality, High = slow/higher quality + use_quantize_model (bool): + Use fp16 or int8 model + Returns: + image (PIL.Image.Image): + Postprocessed images + """ + width, height = input_dict["image"].size + + # If the image is not 512x512 then resize + if width < height: + factor = width / 512.0 + width = 512 + height = int((height / factor) / 8.0) * 8 + else: + factor = height / 512.0 + height = 512 + width = int((width / factor) / 8.0) * 8 + + init_image = input_dict["image"].convert("RGB").resize((width, height)) + mask = input_dict["mask"].convert("RGB").resize((width, height)) + + # If the image is not a 512x512 square then crop + if width > height: + buffer = (width - height) / 2 + input_image = init_image.crop((buffer, 0, width - buffer, 512)) + mask = mask.crop((buffer, 0, width - buffer, 512)) + elif width < height: + buffer = (height - width) / 2 + input_image = init_image.crop((0, buffer, 512, height - buffer)) + mask = mask.crop((0, buffer, 512, height - buffer)) + else: + input_image = init_image + + if not os.path.exists("output"): + os.mkdir("output") + input_image.save("output/init.png") + mask.save("output/mask.png") + reference.save("output/ref.png") + + mask = [mask] + + result = chosen_pipeline( + image=input_image, + mask_image=mask, + reference_image=reference, + seed=seed, + num_inference_steps=steps, + )[ + "sample" + ][0] + + out_dir = Path("output") + out_dir.mkdir(exist_ok=True) + result.save("output/result.png") + + return result + +Choose a source image and a reference image, draw a mask in source image +and push “Paint!” + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/paint-by-example/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo + + demo = make_demo(fn=predict) + + # Launching the Gradio app + try: + demo.launch(debug=False, height=680) + except Exception: + demo.queue().launch(share=True, debug=False, height=680) + # if you are launching remotely, specify server_name and server_port + # image_blocks.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + +.. code:: ipython3 + + # please uncomment and run this cell for stopping gradio interface + # demo.close() diff --git a/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png b/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png new file mode 100644 index 00000000000000..be911bee3ee1a5 --- /dev/null +++ b/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1da225f5d53354c6bcdb34a891dcd1ef77e23b7bd76bee3367414d7efcde6e +size 2092300 diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 2be3c2a4a2c7ed..323959aa17e8ef 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -9,7 +9,7 @@ with synthetic annotations `__ by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. -|image0| +.. image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w Text-to-speech models trained on large-scale datasets have demonstrated impressive in-context learning capabilities and naturalness. However, @@ -53,8 +53,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w - Prerequisites ------------- @@ -66,32 +64,8 @@ Prerequisites os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - %pip uninstall -q -y torch torchvision torchaudio %pip install -q "openvino>=2024.2.0" - %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" "torchaudio" --extra-index-url https://download.pytorch.org/whl/cpu - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - easyocr 1.7.2 requires torchvision>=0.5, which is not installed. - mobileclip 0.1.0 requires clip-benchmark>=1.4.0, which is not installed. - mobileclip 0.1.0 requires torchvision==0.14.1, which is not installed. - open-clip-torch 2.22.0 requires torchvision, which is not installed. - timm 1.0.12 requires torchvision, which is not installed. - ultralytics 8.1.24 requires torchvision>=0.9.0, which is not installed. - open-clip-torch 2.22.0 requires protobuf<4, but you have protobuf 4.25.5 which is incompatible. - tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. - tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. - tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. - tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.25.5 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - + %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" --extra-index-url https://download.pytorch.org/whl/cpu Load the original model and inference ------------------------------------- @@ -121,135 +95,6 @@ Load the original model and inference audio_arr = generation.cpu().numpy().squeeze() sf.write("parler_tts_out.wav", audio_arr, model.config.sampling_rate) - -.. parsed-literal:: - - 2024-12-10 02:43:30.030324: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:43:30.055592: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - Flash attention 2 is not installed - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. - WeightNorm.apply(module, name, dim) - Config of the text_encoder: is overwritten by shared text_encoder config: T5Config { - "_name_or_path": "google/flan-t5-base", - "architectures": [ - "T5ForConditionalGeneration" - ], - "classifier_dropout": 0.0, - "d_ff": 2048, - "d_kv": 64, - "d_model": 768, - "decoder_start_token_id": 0, - "dense_act_fn": "gelu_new", - "dropout_rate": 0.1, - "eos_token_id": 1, - "feed_forward_proj": "gated-gelu", - "initializer_factor": 1.0, - "is_encoder_decoder": true, - "is_gated_act": true, - "layer_norm_epsilon": 1e-06, - "model_type": "t5", - "n_positions": 512, - "num_decoder_layers": 12, - "num_heads": 12, - "num_layers": 12, - "output_past": true, - "pad_token_id": 0, - "relative_attention_max_distance": 128, - "relative_attention_num_buckets": 32, - "task_specific_params": { - "summarization": { - "early_stopping": true, - "length_penalty": 2.0, - "max_length": 200, - "min_length": 30, - "no_repeat_ngram_size": 3, - "num_beams": 4, - "prefix": "summarize: " - }, - "translation_en_to_de": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to German: " - }, - "translation_en_to_fr": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to French: " - }, - "translation_en_to_ro": { - "early_stopping": true, - "max_length": 300, - "num_beams": 4, - "prefix": "translate English to Romanian: " - } - }, - "tie_word_embeddings": false, - "transformers_version": "4.46.1", - "use_cache": true, - "vocab_size": 32128 - } - - Config of the audio_encoder: is overwritten by shared audio_encoder config: DACConfig { - "_name_or_path": "ylacombe/dac_44khZ_8kbps", - "architectures": [ - "DACModel" - ], - "codebook_size": 1024, - "frame_rate": 86, - "latent_dim": 1024, - "model_bitrate": 8, - "model_type": "dac_on_the_hub", - "num_codebooks": 9, - "sampling_rate": 44100, - "torch_dtype": "float32", - "transformers_version": "4.46.1" - } - - Config of the decoder: is overwritten by shared decoder config: ParlerTTSDecoderConfig { - "_name_or_path": "/fsx/yoach/tmp/artefacts/decoder_400M/", - "activation_dropout": 0.0, - "activation_function": "gelu", - "add_cross_attention": true, - "architectures": [ - "ParlerTTSForCausalLM" - ], - "attention_dropout": 0.0, - "bos_token_id": 1025, - "codebook_weights": null, - "cross_attention_implementation_strategy": null, - "dropout": 0.1, - "eos_token_id": 1024, - "ffn_dim": 4096, - "hidden_size": 1024, - "initializer_factor": 0.02, - "is_decoder": true, - "layerdrop": 0.0, - "max_position_embeddings": 4096, - "model_type": "parler_tts_decoder", - "num_attention_heads": 16, - "num_codebooks": 9, - "num_cross_attention_key_value_heads": 16, - "num_hidden_layers": 24, - "num_key_value_heads": 16, - "pad_token_id": 1024, - "rope_embeddings": false, - "rope_theta": 10000.0, - "scale_embedding": false, - "tie_word_embeddings": false, - "torch_dtype": "float32", - "transformers_version": "4.46.1", - "use_cache": true, - "use_fused_lm_heads": false, - "vocab_size": 1088 - } - - You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers - The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - - .. code:: ipython3 import IPython.display as ipd @@ -263,10 +108,10 @@ Load the original model and inference - + @@ -314,20 +159,6 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. text_encoder_ov_model = convert(model.text_encoder, TEXT_ENCODER_OV_PATH, example_input) - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - - The Decoder Model performs in generation pipeline and we can separate it into two stage. In the first stage the model generates ``past_key_values`` into output for the second stage. In the second @@ -362,17 +193,6 @@ stage the model produces tokens during several runs. decoder_1_ov_model = convert(DecoderStage1Wrapper(model.decoder.model.decoder), DECODER_STAGE_1_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:367: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1713: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:916: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - - .. code:: ipython3 DECODER_STAGE_2_OV_PATH = Path("models/decoder_stage_2_ir.xml") @@ -411,15 +231,6 @@ stage the model produces tokens during several runs. decoder_2_ov_model = convert(DecoderStage2Wrapper(model.decoder.model.decoder), DECODER_STAGE_2_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:458: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - - Compiling models and inference ------------------------------ @@ -447,7 +258,7 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') @@ -549,10 +360,10 @@ and run inference. - + @@ -595,27 +406,13 @@ Interactive inference demo = make_demo(fn=infer) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(share=True, debug=False) + demo.queue().launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index 6ac8ff43e05ab2..653a9b376edf7e 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -148,7 +148,7 @@ Imports import collections from pathlib import Path import time - + import numpy as np import cv2 from IPython import display @@ -158,17 +158,17 @@ Imports .. code:: ipython3 # Import local modules - + if not Path("./notebook_utils.py").exists(): # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) - + import notebook_utils as utils from deepsort_utils.tracker import Tracker from deepsort_utils.nn_matching import NearestNeighborDistanceMetric @@ -200,36 +200,36 @@ by the cosine distance. .. code:: ipython3 from notebook_utils import download_ir_model - + # A directory where the model will be downloaded. base_model_dir = "model" precision = "FP16" # The name of the model from Open Model Zoo detection_model_name = "person-detection-0202" - - + + download_det_model_url = ( f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model_name}/{precision}/{detection_model_name}.xml" ) - + detection_model_path = download_ir_model(download_det_model_url, Path(base_model_dir) / detection_model_name / precision) - + reidentification_model_name = "person-reidentification-retail-0287" download_reid_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" - + reidentification_model_path = download_ir_model(download_reid_model_url, Path(base_model_dir) / reidentification_model_name / precision) .. parsed-literal:: - person-detection-0202.bin: 0%| | 0.00/3.47M [00:00 200: processing_times.popleft() - + _, f_width = frame.shape[:2] # Mean processing time [ms]. processing_time = np.mean(processing_times) * 1100 fps = 1000 / processing_time - + # Get poses from detection results. bbox_xywh, score, label = process_results(h, w, results=output) - + img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img = frame[y1:y2, x1:x2] img_crops.append(img) - + # Get reidentification feature of each person. if img_crops: # preprocess @@ -614,17 +615,17 @@ video file. features = extractor.predict(img_batch) else: features = np.array([]) - + # Wrap the detection and reidentification results together bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [Detection(bbox_tlwh[i], features[i]) for i in range(features.shape[0])] - + # predict the position of tracking target tracker.predict() - + # update tracker tracker.update(detections) - + # update bbox identities outputs = [] for track in tracker.tracks: @@ -636,14 +637,14 @@ video file. outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int32)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) - + # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(frame, bbox_xyxy, identities) - + cv2.putText( img=frame, text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -654,7 +655,7 @@ video file. thickness=1, lineType=cv2.LINE_AA, ) - + if use_popup: cv2.imshow(winname=title, mat=frame) key = cv2.waitKey(1) @@ -669,7 +670,7 @@ video file. # Display the image in this notebook. display.clear_output(wait=True) display.display(i) - + # ctrl-c except KeyboardInterrupt: print("Interrupted") @@ -723,11 +724,11 @@ will work. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_file = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4" source = cam_id if USE_WEBCAM else video_file - + run_person_tracking(source=source, flip=USE_WEBCAM, use_popup=False) diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png index 1be4ba9fa45c92..20280b15f5dc07 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eacdcf6e619052ffe8bea1810c93678559cf210808d871e0e2b8a81939e1fd26 +oid sha256:3983e7e27f73b5e03e02cfb02950ce8aef26d9d6a19a7376c51a6f0b00913732 size 106259 diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index 972cc9e5977684..b5ff9a7ccdcd2c 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c04ed0e53cb210bd7853d3daa7f77a0a087b8e08099b837d3237b025c223b5d -size 218593 +oid sha256:aa4b6d563d3c164036182f80dfc247e3c19a892fae85b49f7eb51518f0bc0141 +size 219418 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index dc588206768c93..778fc5aa7d6bc7 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -260,9 +260,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): @@ -365,7 +375,7 @@ documentation =2.1" "diffusers>=0.26,<0.30" "gradio>=4.19" "openvino>=2024.0.0" "einops" torchvision "peft>=0.6.2" "nncf>=2.9.0" "protobuf==3.20.3" "insightface" "onnxruntime" + if not Path("PhotoMaker").exists(): + !git clone https://github.com/TencentARC/PhotoMaker.git + %cd PhotoMaker + !git checkout "1e78aa6514c11a84ef1be27b56c7c72d6c70f8fc" + %cd .. .. parsed-literal:: + Cloning into 'PhotoMaker'... + remote: Enumerating objects: 306, done. + remote: Counting objects: 100% (151/151), done. + remote: Compressing objects: 100% (98/98), done. + remote: Total 306 (delta 132), reused 53 (delta 53), pack-reused 155 (from 1) + Receiving objects: 100% (306/306), 10.24 MiB | 23.03 MiB/s, done. + Resolving deltas: 100% (164/164), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker Note: switching to '1e78aa6514c11a84ef1be27b56c7c72d6c70f8fc'. You are in 'detached HEAD' state. You can look around, make experimental @@ -136,20 +119,24 @@ Install required packages Turn off this advice by setting config variable advice.detachedHead to false HEAD is now at 1e78aa6 Update README.md + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker + + +Install required packages + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \ + transformers "torch>=2.1" "diffusers>=0.26,<0.30" "gradio>=4.19" "openvino>=2024.0.0" "einops" torchvision "peft>=0.6.2" "nncf>=2.9.0" "protobuf==3.20.3" "insightface" "onnxruntime" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - paddleclas 2.6.0 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - paddleclas 2.6.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. - parler-tts 0.2.2 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. - tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. + paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. + paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. + parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. - tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. - tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. - tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -210,9 +197,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-12-10 02:49:18.726948: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:49:18.751780: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:22:09.727876: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:22:09.761823: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:22:10.482979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -230,12 +218,6 @@ PhotoMaker to generate the original PhotoMaker pipeline. Loading pipeline components...: 0%| | 0/7 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -584,15 +575,15 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -695,6 +686,8 @@ Select inference device for Stable Diffusion pipeline .. code:: ipython3 + import requests + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -928,7 +921,7 @@ Running Text-to-Image Generation with OpenVINO -.. image:: photo-maker-with-output_files/photo-maker-with-output_34_0.png +.. image:: photo-maker-with-output_files/photo-maker-with-output_33_0.png Interactive Demo diff --git a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png new file mode 100644 index 00000000000000..28ccdbf331406d --- /dev/null +++ b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21913b4e38eb996bd7d8faedb660aa56fdbf4a6c1ef71157d5e845c9b8a31e7e +size 357743 diff --git a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png deleted file mode 100644 index 5c425ae841f4c7..00000000000000 --- a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99978c67369aac55e26840e7e4b59aa54bcbf4cda132774760a9e3da86803cb9 -size 357743 diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index fed1f6b3dada41..c1c9a4b4e8ec57 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,9 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-12-10 02:57:23.724286: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 02:57:23.749610: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:30:04.644117: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:30:04.680089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:30:05.360275: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -129,17 +130,17 @@ directly in latent space, achieving super fast inference with few steps. Loading pipeline components...: 0%| | 0/5 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: - ['caption_projection.y_embedding'] +.. parsed-literal:: + Loading checkpoint shards: 0%| | 0/4 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: + ['caption_projection.y_embedding'] @@ -193,13 +194,13 @@ PixArt-α consists of pure transformer blocks for latent diffusion: It can directly generate 1024px images from text prompts within a single sampling process. -|image02|. +|image01|. During inference it uses text encoder ``T5EncoderModel``, transformer ``Transformer2DModel`` and VAE decoder ``AutoencoderKL``. Let’s convert the models from the pipeline one by one. -.. |image02| image:: https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS/resolve/main/asset/images/model.png +.. |image01| image:: https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS/resolve/main/asset/images/model.png .. code:: ipython3 @@ -228,7 +229,7 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. @@ -271,11 +272,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -300,9 +301,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -448,7 +449,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -563,7 +564,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1621,16 +1622,16 @@ pipelines. Loading pipeline components...: 0%| | 0/5 [00:00 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. - Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in - sys.exit(main()) - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main - service.run() - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 390, in run - main_export( - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 476, in main_export - _weight_only_quantization(submodel, quantization_config) - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/intel/openvino/quantization.py", line 938, in _weight_only_quantization - return nncf.compress_weights( - TypeError: compress_weights() got an unexpected keyword argument 'backup_mode' - + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (1 / 281) │ 0% (0 / 280) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (280 / 281) │ 100% (280 / 280) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:12 • 0:00:00 + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (3 / 172) │ 0% (0 / 169) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (169 / 172) │ 100% (169 / 169) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (1 / 1) │ 0% (0 / 0) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:03 • 0:00:00 + Run model inference ------------------- @@ -528,9 +545,10 @@ Intel can be found in .. parsed-literal:: - 2024-12-10 03:48:41.700649: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-10 03:48:41.726260: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 03:26:01.941542: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 03:26:01.977558: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 03:26:02.650242: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -574,7 +592,7 @@ Intel can be found in .. parsed-literal:: - The unusual aspect of this image is that the cat is lying on its back inside a cardboard box. This is not a typical position for a cat, as they usually prefer to curl up or lie on their sides when resting. Additionally, cats are known for their love of small, enclosed spaces, but it is less common to see a cat lying on its back in such a setting. The image captures a playful and relaxed moment, highlighting the cat's comfort and curiosity. + The unusual aspect of this image is that the cat is lying inside a cardboard box, which is not a typical setting for a cat. Cats are often known for their affinity for boxes, but it is still considered unusual to see a cat comfortably resting inside a box in a living room setting. The cat appears relaxed and content, which adds to the charm of the scene. The presence of a sofa in the background further emphasizes the domestic and cozy atmosphere of the image. Interactive demo @@ -601,6 +619,8 @@ Interactive demo Running on local URL: http://127.0.0.1:7860 + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/pose-estimation-with-output.rst b/docs/notebooks/pose-estimation-with-output.rst index 112b6037d4907f..e827bd19acfd34 100644 --- a/docs/notebooks/pose-estimation-with-output.rst +++ b/docs/notebooks/pose-estimation-with-output.rst @@ -126,13 +126,13 @@ precision in the code below. .. parsed-literal:: - human-pose-estimation-0001.xml: 0%| | 0.00/474k [00:00=0.24.0" transformers "torch>=2.1" "gradio>=4.19" qrcode opencv-python "peft>=0.6.2" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.1.0" "nncf>=2.7.0" "matplotlib>=3.4" + %pip install -q "openvino>=2023.1.0" "nncf>=2.7.0" Instantiating Generation Pipeline --------------------------------- @@ -122,30 +122,6 @@ controlnet model and ``stable-diffusion-v1-5``: controlnet=controlnet, ) - -.. parsed-literal:: - - 2024-12-05 09:21:58.637418: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-12-05 09:21:58.649752: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - WARNING: All log messages before absl::InitializeLog() is called are written to STDERR - E0000 00:00:1733376118.663808 222102 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - E0000 00:00:1733376118.667978 222102 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-12-05 09:21:58.683751: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - - - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00`__ - -**Table of contents:** +`NNCF `__ #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ @@ -79,11 +78,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_audio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-audio/ov_qwen2_audio_helper.py") open("ov_qwen2_audio_helper.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -212,13 +211,13 @@ documentation target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors @@ -387,16 +396,16 @@ Intel `__ .. code:: ipython3 from ov_qwen2_audio_helper import OVQwen2AudioForConditionalGeneration - + # Uncomment below lines to see the model inference class code # OVQwen2AudioForConditionalGeneration?? .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -422,20 +431,20 @@ Run model inference from transformers import AutoProcessor, TextStreamer import librosa import IPython.display as ipd - - + + processor = AutoProcessor.from_pretrained(model_dir) - + audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac" audio_chat_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav" audio_file = Path(audio_url.split("/")[-1]) audio_chat_file = Path(audio_chat_url.split("/")[-1]) - + if not audio_file.exists(): r = requests.get(audio_url) with audio_file.open("wb") as f: f.write(r.content) - + if not audio_chat_file.exists(): r = requests.get(audio_chat_url) with audio_chat_file.open("wb") as f: @@ -457,14 +466,14 @@ Voice chat ], }, ] - + text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]] - + inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True) display(ipd.Audio(audio_chat_file)) print("Answer:") - + generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -476,7 +485,7 @@ Voice chat .. raw:: html - +