.github/workflows/windows-ort.yml

name: Build (Windows-ORT)

on:
  push:
    paths:
      - 'common/**'
      - 'vsort/**'
      - '.github/workflows/windows-ort.yml'
  workflow_call:
    inputs:
      tag:
        description: 'which tag to upload to'
        required: true
        type: string
  workflow_dispatch:
    inputs:
      tag:
        description: 'which tag to upload to'
        default: ''

jobs:
  build-windows:
    runs-on: windows-2022

    defaults:
      run:
        shell: cmd
        working-directory: vsort

    steps:
    - name: Checkout repo
      uses: actions/checkout@v4
      with:
        fetch-depth: 0

    - name: Setup MSVC
      uses: ilammy/msvc-dev-cmd@v1

    - name: Setup Ninja
      run: pip install ninja

    - name: Cache protobuf
      id: cache-protobuf
      uses: actions/cache@v4
      with:
        path: vsort/protobuf/install
        key: ${{ runner.os }}-vsort-protobuf-v4

    - name: Checkout protobuf
      uses: actions/checkout@v4
      if: steps.cache-protobuf.outputs.cache-hit != 'true'
      with:
        repository: protocolbuffers/protobuf
        # follows protobuf in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/onnxruntime_external_deps.cmake#L203
        # if you change this, remember to bump the version of the cache key.
        ref: v3.21.12
        fetch-depth: 1
        path: vsort/protobuf

    - name: Configure protobuf
      if: steps.cache-protobuf.outputs.cache-hit != 'true'
      run: cmake -S protobuf -B protobuf\build_rel -G Ninja -LA
        -D CMAKE_BUILD_TYPE=Release
        -D protobuf_BUILD_SHARED_LIBS=OFF  -D protobuf_BUILD_TESTS=OFF

    - name: Build protobuf
      if: steps.cache-protobuf.outputs.cache-hit != 'true'
      run: cmake --build protobuf\build_rel --verbose

    - name: Install protobuf
      if: steps.cache-protobuf.outputs.cache-hit != 'true'
      run: cmake --install protobuf\build_rel --prefix protobuf\install

    - name: Cache onnx
      id: cache-onnx
      uses: actions/cache@v4
      with:
        path: vsort/onnx/install
        key: ${{ runner.os }}-vsort-onnx-v5

    - name: Checkout onnx
      if: steps.cache-onnx.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: onnx/onnx
        # follows onnx in https://github.com/AmusementClub/onnxruntime/tree/master/cmake/external
        # if you change this, remember to bump the version of the cache key.
        ref: 990217f043af7222348ca8f0301e17fa7b841781
        fetch-depth: 1
        path: vsort/onnx

    - name: Configure onnx
      if: steps.cache-onnx.outputs.cache-hit != 'true'
      run: cmake -S onnx -B onnx\build -G Ninja -LA
        -D CMAKE_BUILD_TYPE=Release
        -D Protobuf_PROTOC_EXECUTABLE=protobuf\install\bin\protoc
        -D Protobuf_LITE_LIBRARY=protobuf\install\lib
        -D Protobuf_LIBRARIES=protobuf\install\lib
        -D ONNX_USE_LITE_PROTO=ON -D ONNX_USE_PROTOBUF_SHARED_LIBS=OFF
        -D ONNX_GEN_PB_TYPE_STUBS=OFF -D ONNX_ML=0
        -D ONNX_USE_MSVC_STATIC_RUNTIME=1

    - name: Build onnx
      if: steps.cache-onnx.outputs.cache-hit != 'true'
      run: cmake --build onnx\build --verbose

    - name: Install onnx
      if: steps.cache-onnx.outputs.cache-hit != 'true'
      run: cmake --install onnx\build --prefix onnx\install

    - name: Download VapourSynth headers
      run: |
        curl -s -o vs.zip -L https://github.com/vapoursynth/vapoursynth/archive/refs/tags/R54.zip
        unzip -q vs.zip
        mv vapoursynth-*/ vapoursynth/

    - name: Download ONNX Runtime Precompilation
      run: |
        curl -s -o ortgpu.zip -LJO https://github.com/AmusementClub/onnxruntime/releases/download/orttraining_rc2-8036-geb41d57f21-240425-0428/onnxruntime-gpu-win64.zip
        unzip -q ortgpu.zip

    - name: Cache CUDA
      id: cache-cuda
      uses: actions/cache@v4
      with:
        path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
        key: ${{ runner.os }}-cuda-12.4.1

    - name: Setup CUDA
      if: steps.cache-cuda.outputs.cache-hit != 'true'
      run: |
        curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe
        cuda_installer.exe -s nvcc_12.4 cudart_12.4

    - name: Configure
      run: cmake -S . -B build -G Ninja -LA
        -D CMAKE_BUILD_TYPE=Release
        -D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded
        -D VAPOURSYNTH_INCLUDE_DIRECTORY=vapoursynth\include
        -D protobuf_DIR=protobuf\install\cmake
        -D ONNX_DIR=onnx\install\lib\cmake\ONNX
        -D ONNX_RUNTIME_API_DIRECTORY=onnxruntime-gpu\include\onnxruntime
        -D ONNX_RUNTIME_LIB_DIRECTORY=onnxruntime-gpu\lib
        -D ENABLE_CUDA=1
        -D CUDAToolkit_ROOT="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
        -D ENABLE_DML=1
        -D CMAKE_CXX_STANDARD=20

    - name: Build
      run: cmake --build build --verbose

    - name: Install
      run: |
        cmake --install build --prefix install
        mkdir artifact
        mkdir artifact\vsort
        copy install\bin\vsort.dll artifact\
        copy onnxruntime-gpu\bin\*.dll artifact\vsort\
        copy onnxruntime-gpu\lib\*.dll artifact\vsort\

    - name: Download DirectML Library
      # follows DirectML in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/dml.cmake#L44
      run: |
        curl -s -o directml.nupkg -LJO https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.14.1
        unzip -q directml.nupkg -d dml
        copy dml\bin\x64-win\DirectML.dll artifact\vsort\

    - name: Upload
      uses: actions/upload-artifact@v4
      with:
        name: VSORT-Windows-x64
        path: vsort/artifact

    - name: Setup Python portable
      run: |
        curl -s -o python.zip -LJO https://www.python.org/ftp/python/3.9.10/python-3.9.10-embed-amd64.zip
        7z x python.zip -ovs_portable

    - name: Install VapourSynth portable
      run: |
        curl -s -o vs.7z -LJO https://github.com/vapoursynth/vapoursynth/releases/download/R54/VapourSynth64-Portable-R54.7z
        7z x vs.7z -ovs_portable -y

    - name: Copy plugin
      run: |
        copy artifact\*.dll vs_portable\vapoursynth64\plugins
        mkdir vs_portable\vapoursynth64\plugins\vsort\
        copy artifact\vsort\*.dll vs_portable\vapoursynth64\plugins\vsort\

    - name: Install waifu2x model
      run: |
        curl -s -o waifu2x.7z -LJO https://github.com/AmusementClub/vs-mlrt/releases/download/model-20211209/waifu2x_v3.7z
        7z x waifu2x.7z -ovs_portable\vapoursynth64\plugins\models

    - name: Download x265
      run: |
        curl -s -o x265.7z -LJO https://github.com/AmusementClub/x265/releases/download/Yuuki-3.5-AC3/x265-win64-x86-64-clang.Yuuki-3.5-AC3.7z
        7z x x265.7z -ovs_portable\

    - name: Create script
      shell: bash
      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);print(core.ort.Version(),file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test.vpy

    - name: Run vspipe
      shell: bash
      run: |
        set -ex
        vs_portable/vspipe -i test.vpy -
        vs_portable/vspipe --y4m -p -e 9 test.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
        ls -l out.hevc x265.log
        cat x265.log
        grep -F 'encoded 10 frames' x265.log || exit 2
        grep -i 'error' x265.log && exit 1
        exit 0

    - name: Create script (fp16)
      shell: bash
      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16.vpy

    - name: Run vspipe (fp16)
      shell: bash
      run: |
        set -ex
        vs_portable/vspipe -i test_fp16.vpy -
        vs_portable/vspipe --y4m -p -e 9 test_fp16.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
        ls -l out.hevc x265.log
        cat x265.log
        grep -F 'encoded 10 frames' x265.log || exit 2
        grep -i 'error' x265.log && exit 1
        exit 0

    - name: Create script (fp16 input)
      shell: bash
      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBH).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_input.vpy

    - name: Run vspipe (fp16 input)
      shell: bash
      run: |
        set -ex
        vs_portable/vspipe -i test_fp16_input.vpy -
        vs_portable/vspipe --y4m -p -e 9 test_fp16_input.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
        ls -l out.hevc x265.log
        cat x265.log
        grep -F 'encoded 10 frames' x265.log || exit 2
        grep -i 'error' x265.log && exit 1
        exit 0

    - name: Create script (fp16 output)
      shell: bash
      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True, output_format=1);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_output.vpy

    - name: Run vspipe (fp16 output)
      shell: bash
      run: |
        set -ex
        vs_portable/vspipe -i test_fp16_output.vpy -
        vs_portable/vspipe --y4m -p -e 9 test_fp16_output.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
        ls -l out.hevc x265.log
        cat x265.log
        grep -F 'encoded 10 frames' x265.log || exit 2
        grep -i 'error' x265.log && exit 1
        exit 0

    - name: Create script (flexible output)
      shell: bash
      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);print(core.ort.Version(),file=sys.stderr);prop='test';output=core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, flexible_output_prop=prop);core.std.ShufflePlanes([output['clip'].std.PropToClip(prop=f'{prop}{i}') for i in range(output['num_planes'])], [0, 0, 0], vs.RGB).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_flexible_output.vpy

    - name: Run vspipe (flexible output)
      shell: bash
      run: |
        set -ex
        vs_portable/vspipe -i test_flexible_output.vpy -
        vs_portable/vspipe --y4m -p -e 9 test_flexible_output.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
        ls -l out.hevc x265.log
        cat x265.log
        grep -F 'encoded 10 frames' x265.log || exit 2
        grep -i 'error' x265.log && exit 1
        exit 0

    - name: Describe
      run: git describe --tags --long

    - name: Dump dependencies
      run: dumpbin /dependents artifact\vsort.dll

    - name: Compress artifact for release
      if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
      run: |
        cd artifact
        7z a -t7z -mx=7 ../../VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z .

    - name: Release
      uses: softprops/action-gh-release@v2
      if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
      with:
        tag_name: ${{ inputs.tag }}
        files: VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z
        fail_on_unmatched_files: true
        generate_release_notes: false
        prerelease: true