Merge pull request #591 from chachaleo/feat/QlinearConv

feat: qlinear conv
gizatechxyz · Apr 21, 2024 · 508c48f · 508c48f
2 parents 882cef3 + 561cd66
commit 508c48f
Show file tree

Hide file tree

Showing 35 changed files with 1,073 additions and 27 deletions.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
@@ -113,6 +113,7 @@
     * [tensor.qlinear\_matmul](framework/operators/tensor/tensor.qlinear\_matmul.md)
     * [tensor.qlinear\_concat](framework/operators/tensor/tensor.qlinear\_concat.md)
     * [tensor.qlinear\_leakyrelu](framework/operators/tensor/tensor.qlinear\_leakyrelu.md)
+    * [tensor.qlinear\_conv](framework/operators/tensor/tensor.qlinear\_conv.md)
     * [tensor.nonzero](framework/operators/tensor/tensor.nonzero.md)
     * [tensor.squeeze](framework/operators/tensor/tensor.squeeze.md)
     * [tensor.unsqueeze](framework/operators/tensor/tensor.unsqueeze.md)

diff --git a/docs/framework/compatibility.md b/docs/framework/compatibility.md
@@ -67,6 +67,7 @@ You can see below the list of current supported ONNX Operators:
 |             [QlinearAdd](operators/tensor/tensor.qlinear\_add.md)             | :white\_check\_mark: |
 |             [QlinearMul](operators/tensor/tensor.qlinear\_mul.md)             | :white\_check\_mark: |
 |       [QLinearLeakyRelu](operators/tensor/tensor.qlinear\_leakyrelu.md)       | :white\_check\_mark: |
+|            [QLinearConv](operators/tensor/tensor.qlinear\_conv_.md)           | :white\_check\_mark: |
 |                 [Nonzero](operators/tensor/tensor.nonzero.md)                 | :white\_check\_mark: |
 |                 [Squeeze](operators/tensor/tensor.squeeze.md)                 | :white\_check\_mark: |
 |               [Unsqueeze](operators/tensor/tensor.unsqueeze.md)               | :white\_check\_mark: |

diff --git a/docs/framework/operators/tensor/README.md b/docs/framework/operators/tensor/README.md
@@ -86,6 +86,7 @@ use orion::operators::tensor::TensorTrait;
 | [`tensor.qlinear_matmul`](tensor.qlinear\_matmul.md) | Performs the product of two quantized i8 Tensors. |
 | [`tensor.qlinear_concat`](tensor.qlinear\_concat.md) | Concatenate a list of tensors after dequantizing them with their respective scales and zero_points and returns the quantized result. |
 | [`tensor.qlinear_leakyrelu`](tensor.qlinear\_leakyrelu.md) | Applies the Leaky Relu operator to a quantized Tensor |
+| [`tensor.qlinear_conv`](tensor.qlinear\_conv.md) | Performs convolution on quantized Tensors |
 | [`tensor.gather`](tensor.gather.md) | Gather entries of the axis dimension of data. |
 | [`tensor.nonzero`](tensor.nonzero.md) | Produces indices of the elements that are non-zero (in row-major order - by dimension). |
 | [`tensor.squeeze`](tensor.squeeze.md) | Removes dimensions of size 1 from the shape of a tensor. |

diff --git a/docs/framework/operators/tensor/tensor.qlinear_conv.md b/docs/framework/operators/tensor/tensor.qlinear_conv.md
@@ -0,0 +1,159 @@
+# tensor.qlinear_conv
+
+```rust
+
+qlinear_conv(
+    self: @Tensor<Q>,
+    X_scale: @Tensor<T>,
+    X_zero_point: @Tensor<T>,
+    W: @Tensor<Q>,
+    W_scale: @Tensor<T>,
+    W_zero_point: @Tensor<T>,
+    B: Option<Span<Q>>,
+    auto_pad: Option<AUTO_PAD>,
+    dilations: Option<Span<usize>>,
+    group: Option<usize>,
+    kernel_shape: Option<Span<usize>>,
+    pads: Option<Span<usize>>,
+    strides: Option<Span<usize>>,
+    y_scale: @Tensor<T>,
+    y_zero_point: @Tensor<T>,
+) -> Tensor<Q> 
+```
+
+Performs convolution on quantized Tensors
+
+The convolution operator consumes a quantized input tensor, its scale and zero point, a quantized filter, its scale and zero point, 
+and output's scale and zero point, and computes the quantized output. Each scale and zero-point pair must have same shape. 
+It means they must be either scalars (per tensor) or 1-D tensors (per output channel). Each input or output and its related zero point must have same type. 
+
+## Args
+
+* `X`(`@Tensor<i8>`) - Quantized input data tensor, has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and width. Note that this is for the 2D image. Otherwise the size is (N x C x D1 x D2 ... x Dn).
+* `X_scale`(`@Tensor<T>`) - Scale for input `X`.
+* `X_zero_point`(`@Tensor<T>`) - Zero point for input `X`.
+* `W`(`@Tensor<i8>`) - Quantized weight tensor that will be used in the convolutions; has size (M x C/group x kH x kW), where C is the number of channels, and kH and kW are the height and width of the kernel, and M is the number of feature maps. For more than 2 dimensions, the kernel shape will be (M x C/group x k1 x k2 x ... x kn), where (k1 x k2 x ... kn) is the dimension of the kernel. 
+* `W_scale`(`@Tensor<T>`) - Scale for input `W`.
+* `W_zero_point`(`@Tensor<T>`) - Zero point for input `W`. 
+* `B`(`Option<@Tensor<T>>`) - Optional 1D bias to be added to the convolution, has size of M. Bias must be quantized using scale = x_scale * w_scale and zero_point = 0.
+* `auto_pad`(`Option<AUTO_PAD>`) - Default is NOTSET, auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. NOTSET means explicit padding is used. SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] = ceil(input_shape[i] / strides[i])` for each axis `i`.
+* `dilations`(`Option<Span<usize>>`) - Dilation value along each spatial axis of the filter. If not present, the dilation defaults to 1 along each spatial axis.
+* `group`(`Option<usize>`) - Default is 1, number of groups input channels and output channels are divided into.
+* `kernel_shape`(`Option<Span<usize>>`) - The shape of the convolution kernel. If not present, should be inferred from input W.
+* `pads`(`Option<Span<usize>>`) - Padding for the beginning and ending along each spatial axis, it can take any value greater than or equal to 0. The value represent the number of pixels added to the beginning and end part of the corresponding axis. `pads` format should be as follow [x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels added at the beginning of axis `i` and xi_end, the number of pixels added at the end of axis `i`. This attribute cannot be used simultaneously with auto_pad attribute. If not present, the padding defaults to 0 along start and end of each spatial axis.
+* `strides`(`Option<Span<usize>>`) - Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.
+* `y_scale`(`@Tensor<T>`) - Scale for output.
+* `y_zero_point`(`@Tensor<T>`) - Zero point for output.   
+
+## Returns
+
+A new `Tensor<i8>`, containing the quantized result of the convolution of the dequantized inputs.
+
+## Type Constraints
+
+u32 tensor, not supported.
+fp8x23wide tensor, not supported.
+fp16x16wide tensor, not supported.
+
+## Example
+
+```rust
+   use orion::operators::tensor::{TensorTrait, Tensor};
+   use orion::operators::tensor::I8TensorPartialEq;
+   use orion::utils::{assert_eq, assert_seq_eq};
+   use orion::operators::tensor::{I8Tensor, I8TensorAdd};
+   use orion::operators::tensor::FP16x16TensorPartialEq;
+   use orion::operators::tensor::{FP16x16Tensor, FP16x16TensorAdd};
+   use core::array::{ArrayTrait, SpanTrait};
+   use orion::operators::tensor::implementations::tensor_fp16x16::{TensorI8IntoTensorFP16x16, FP16x16TensorSub,FP16x16TensorDiv,FP16x16TensorMul};
+   use orion::numbers::{FP16x16, I8IntoFP16x16};
+
+   fn qlinear_conv_example() -> Tensor<i8> {
+       let mut shape = ArrayTrait::<usize>::new();
+       shape.append(1);
+       shape.append(1);
+       shape.append(3);
+       shape.append(3);
+
+       let mut data = ArrayTrait::new();
+       data.append(1);
+       data.append(2);
+       data.append(3);
+       data.append(4);
+       data.append(5);
+       data.append(6);
+       data.append(7);
+       data.append(8);
+       data.append(9);
+       let mut X = TensorTrait::new(shape.span(), data.span());
+
+       let mut shape = ArrayTrait::<usize>::new();
+       shape.append(1);
+       shape.append(1);
+       shape.append(1);
+       shape.append(1);
+
+       let mut data = ArrayTrait::new();
+       data.append(0_i8);
+       let mut W = TensorTrait::new(shape.span(), data.span());
+
+       let mut shape = ArrayTrait::<usize>::new();
+       shape.append(6);
+
+       let mut data = ArrayTrait::new();
+       data.append(FP16x16 { mag: 32768, sign: false });
+       data.append(FP16x16 { mag: 131072, sign: false });
+       data.append(FP16x16 { mag: 26214, sign: false });
+       data.append(FP16x16 { mag: 196608, sign: false });
+       data.append(FP16x16 { mag: 13107, sign: false });
+       data.append(FP16x16 { mag: 262144, sign: false });
+       let mut param = TensorTrait::new(shape.span(), data.span());
+
+       let X_scale = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(0)].span(),
+       );
+       let X_zero_point = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(1)].span(),
+       );
+       let W_scale = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(2)].span(),
+       );
+       let W_zero_point = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(3)].span(),
+       );
+       let y_scale = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(4)].span(),
+       );
+       let y_zero_point = TensorTrait::new(
+           shape: array![1].span(), data: array![*param.data.at(5)].span(),
+       );
+
+       return X
+           .qlinear_conv(
+               @X_scale,
+               @X_zero_point,
+               @W,
+               @W_scale,
+               @W_zero_point,
+               Option::None,
+               Option::None,
+               Option::None,
+               Option::None,
+               Option::None,
+               Option::None,
+               Option::None,
+               @y_scale,
+               @y_zero_point,
+           );
+   }
+
+>>> [
+        [
+            [
+                [  7,   4,   1],
+                [ -2,  -5,  -8],
+                [-11, -14, -17],
+            ]
+        ]
+    ]
+```
diff --git a/nodegen/node/conv.py b/nodegen/node/conv.py
@@ -37,6 +37,9 @@ def conv(
         pads = [0 for s in X.shape[2:]] * 2
     if strides is None:
         strides = [1 for s in X.shape[2:]]
+
+    if group is None:
+        group=1
 
     if X.shape[1] != W.shape[1] * group or W.shape[0] % group != 0:
         raise ValueError(

diff --git a/nodegen/node/qlinear_conv.py b/nodegen/node/qlinear_conv.py
@@ -0,0 +1,101 @@
+import numpy as np
+from nodegen.node import RunAll
+from ..helpers import make_test, to_fp, Tensor, Dtype, FixedImpl
+from .conv import conv
+
+
+def qlinear_conv(
+    x,
+    x_scale,
+    x_zero_point,
+    w,
+    w_scale,
+    w_zero_point,
+    y_scale,
+    y_zero_point,
+    B=None,
+    auto_pad=None,
+    dilations=None,
+    group=None,
+    kernel_shape=None,
+    pads=None,
+    strides=None,
+):
+    X = x.astype(np.int32)
+    if x_zero_point is not None:
+        X -= x_zero_point
+    W = w.astype(np.int32)
+    if w_zero_point is not None:
+        if len(w_zero_point.shape) == 1 and w_zero_point.shape[0] == W.shape[0]:
+            missing = (w_zero_point.shape[0],) + (1,) * (len(W.shape) - 1)
+            W -= w_zero_point.reshape(missing)
+        else:
+            W -= w_zero_point
+    res = conv(
+        X, W, B, auto_pad, dilations, group, kernel_shape, pads, strides
+    ).astype(np.int32)
+    R = res * (x_scale * w_scale / y_scale)
+    if y_zero_point is not None:
+        R += y_zero_point
+        if y_zero_point.dtype == np.int8:
+            R = np.clip(R, -128, 127)
+        else:
+            R = np.clip(R, 0, 255)
+        return (np.rint(R).astype(y_zero_point.dtype),)
+    if x.dtype == np.int8:
+        R = np.clip(R, -128, 127)
+    else:
+        R = np.clip(R, 0, 255)
+    return (np.rint(R).astype(x.dtype),)
+
+
+class Qlinear_conv(RunAll):
+    @staticmethod
+    def export_qlinear_conv() -> None:
+        x = np.array([
+                [1, 2, 3],
+                [4, 5, 6],
+                [7, 8, 9]],
+
+            dtype=np.int8,
+        ).reshape((1, 1, 3, 3))
+        x_scale = np.float32(0.5)
+        x_zero_point = np.int8(2)
+
+        w = np.array([0], dtype=np.int8).reshape((1, 1, 1, 1))
+        w_scale = np.array([0.4], dtype=np.float32)
+        w_zero_point = np.array([3], dtype=np.int8)
+
+        y_scale = np.float32(0.2)
+        y_zero_point = np.int8(4)
+
+        param = np.array([0.5, 2, 0.4, 3, 0.2, 4])
+
+        y = qlinear_conv(x,x_scale,x_zero_point,w,w_scale,w_zero_point,y_scale,y_zero_point,)
+        y = np.array(y)
+
+        x = Tensor(Dtype.I8, x.shape, x.flatten())
+        w = Tensor(Dtype.I8, w.shape, w.flatten())
+        y = Tensor(Dtype.I8, y.shape, y.flatten())
+        param = Tensor(Dtype.FP16x16, param.shape, to_fp(param.flatten(), FixedImpl.FP16x16))
+
+
+        name = "qlinear_conv"
+        func_sig = "qlinear_conv("
+        func_sig += "@input_0,"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(0)].span(),),"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(1)].span(),),"
+        func_sig += "@input_1,"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(2)].span(),),"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(3)].span(),),"
+        func_sig += "Option::None," 
+        func_sig += "Option::None," 
+        func_sig += "Option::None,"
+        func_sig += "Option::None," 
+        func_sig += "Option::None," 
+        func_sig += "Option::None,"
+        func_sig += "Option::None,"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(4)].span(),),"
+        func_sig += "@TensorTrait::new(shape: array![1].span(), data: array![*input_2.data.at(5)].span(),))"
+        make_test(
+            [x, w, param], y, func_sig, name) 
diff --git a/src/operators/nn/core.cairo b/src/operators/nn/core.cairo
@@ -947,7 +947,7 @@ trait NNTrait<T> {
         X: @Tensor<T>,
         W: @Tensor<T>,
         B: Option<Span<T>>,
-        auto_pad: Option<orion::operators::nn::functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/functional/conv.cairo b/src/operators/nn/functional/conv.cairo
@@ -1,30 +1,14 @@
-use core::debug::PrintTrait;
-
 use orion::numbers::NumberTrait;
 use orion::numbers::{U32IntoI32, I32IntoU32, I32Div, I32Number};
 use orion::operators::tensor::{TensorTrait, Tensor, U32Tensor,};
 use orion::operators::vec::{NullableVec, NullableVecImpl};
 use orion::operators::tensor::core::{stride};
 
-#[derive(Copy, Drop)]
-enum AUTO_PAD {
-    NOTSET,
-    SAME_UPPER,
-    SAME_LOWER,
-    VALID
-}
+use orion::operators::nn::AUTO_PAD;
+
 
 fn conv<
-    T,
-    MAG,
-    +TensorTrait<T>,
-    +NumberTrait<T, MAG>,
-    +Copy<T>,
-    +Drop<T>,
-    +Add<T>,
-    +Mul<T>,
-    +AddEq<T>,
-    +PrintTrait<T>,
+    T, MAG, +TensorTrait<T>, +NumberTrait<T, MAG>, +Copy<T>, +Drop<T>, +Add<T>, +Mul<T>, +AddEq<T>,
 >(
     X: @Tensor<T>,
     W: @Tensor<T>,

diff --git a/src/operators/nn/implementations/nn_fp16x16.cairo b/src/operators/nn/implementations/nn_fp16x16.cairo
@@ -135,7 +135,7 @@ impl FP16x16NN of NNTrait<FP16x16> {
         X: @Tensor<FP16x16>,
         W: @Tensor<FP16x16>,
         B: Option<Span<FP16x16>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/implementations/nn_fp32x32.cairo b/src/operators/nn/implementations/nn_fp32x32.cairo
@@ -129,7 +129,7 @@ impl FP32x32NN of NNTrait<FP32x32> {
         X: @Tensor<FP32x32>,
         W: @Tensor<FP32x32>,
         B: Option<Span<FP32x32>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/implementations/nn_fp64x64.cairo b/src/operators/nn/implementations/nn_fp64x64.cairo
@@ -129,7 +129,7 @@ impl FP64x64NN of NNTrait<FP64x64> {
         X: @Tensor<FP64x64>,
         W: @Tensor<FP64x64>,
         B: Option<Span<FP64x64>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/implementations/nn_fp8x23.cairo b/src/operators/nn/implementations/nn_fp8x23.cairo
@@ -131,7 +131,7 @@ impl FP8x23NN of NNTrait<FP8x23> {
         X: @Tensor<FP8x23>,
         W: @Tensor<FP8x23>,
         B: Option<Span<FP8x23>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/implementations/nn_i32.cairo b/src/operators/nn/implementations/nn_i32.cairo
@@ -122,7 +122,7 @@ impl I32NN of NNTrait<i32> {
         X: @Tensor<i32>,
         W: @Tensor<i32>,
         B: Option<Span<i32>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,

diff --git a/src/operators/nn/implementations/nn_i8.cairo b/src/operators/nn/implementations/nn_i8.cairo
@@ -122,7 +122,7 @@ impl I8NN of NNTrait<i8> {
         X: @Tensor<i8>,
         W: @Tensor<i8>,
         B: Option<Span<i8>>,
-        auto_pad: Option<functional::conv::AUTO_PAD>,
+        auto_pad: Option<AUTO_PAD>,
         dilations: Option<Span<usize>>,
         group: Option<usize>,
         kernel_shape: Option<Span<usize>>,