Merge branch 'kws/dynamicaug_nas' of github.com:alicangok/ai8x-synthe…

…sis into kws/dynamicaug_nas
analogdevicesinc · Jan 21, 2024 · b4a5096 · b4a5096
2 parents 3a46729 + bc21d2f
commit b4a5096
Show file tree

Hide file tree

Showing 28 changed files with 25,060 additions and 4,663 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # ADI MAX78000/MAX78002 Model Training and Synthesis
 
-November 13, 2023
+December 22, 2023
 
 ADI’s MAX78000/MAX78002 project is comprised of five repositories:
 
@@ -3340,6 +3340,8 @@ Additional information about the evaluation kits, and the software development k
 
 [AHB Addresses for MAX78000 and MAX78002](docs/AHBAddresses.md)
 
+[Facial Recognition System](https://github.com/MaximIntegratedAI/ai8x-training/blob/develop/docs/FacialRecognitionSystem.md)
+
 
 ---
 

diff --git a/README.pdf b/README.pdf
diff --git a/izer/add_fake_passthrough.py b/izer/add_fake_passthrough.py
@@ -28,17 +28,22 @@ def parse_arguments():
                         help='depth of the passthrough layer')
     parser.add_argument('--layer-name-after-pt', metavar='S', required=True,
                         help='name of the layer just after the passthrough layer is added')
+    parser.add_argument('--low-memory-footprint', action='store_true', default=False,
+                        help='enables 2-bit quantization for weights')
 
     args = parser.parse_args()
     return args
 
 
-def passthrough_faker(n_channels):
+def passthrough_faker(n_channels, low_memory_footprint=False):
     """Creates passthrough layer"""
     a = nn.Conv2d(in_channels=n_channels, out_channels=n_channels, kernel_size=1, bias=False)
     a.weight.data = torch.zeros_like(a.weight.data)
     for i in range(a.weight.data.shape[0]):
-        a.weight.data[i, i, :, :] = 64
+        if low_memory_footprint:
+            a.weight.data[i, i, :, :] = 1
+        else:
+            a.weight.data[i, i, :, :] = 64
     return a
 
 
@@ -48,7 +53,7 @@ def main():
     device = torch.device('cpu')
 
     checkpoint = torch.load(args.input_checkpoint_path)
-    passthrough_kernel = passthrough_faker(args.layer_depth)
+    passthrough_kernel = passthrough_faker(args.layer_depth, args.low_memory_footprint)
 
     new_checkpoint = copy.deepcopy(checkpoint)
 
@@ -61,7 +66,10 @@ def main():
         new_state_dict[name] = v
 
     new_state_dict[f'{args.layer_name}.output_shift'] = torch.Tensor([1.]).to(device)
-    new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([8.]).to(device)
+    if args.low_memory_footprint:
+        new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([2.]).to(device)
+    else:
+        new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([8.]).to(device)
     new_state_dict[f'{args.layer_name}.bias_bits'] = torch.Tensor([8.]).to(device)
     new_state_dict[f'{args.layer_name}.quantize_activation'] = torch.Tensor([1.]).to(device)
     new_state_dict[f'{args.layer_name}.adjust_output_shift'] = torch.Tensor([0.]).to(device)

diff --git a/networks/ai85-dotprod.yaml b/networks/ai85-dotprod.yaml
@@ -0,0 +1,11 @@
+---
+arch: AI85DotProd
+dataset: dotprod_test
+
+layers:
+  # layer0: mlp
+  - op: mlp
+    activate: none
+    out_offset: 0x4000
+    processors: 0xffff.ffff.ffff.ffff
+    data_format: HWC
diff --git a/networks/ai85-faceid_112.yaml b/networks/ai85-faceid_112.yaml
@@ -0,0 +1,159 @@
+---
+# Faceid_112 for face recognition. Compatible with MAX78000.
+
+arch: ai85faceidnet_112
+dataset: vggface2_faceid
+
+layers:
+  # Layer 0: pre_stage. in 3ch, out 32 ch
+  - processors: 0x0000000000000007
+    in_offset: 0x0000
+    out_offset: 0x2000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: ReLU
+    streaming: true
+  # Layer 1: pre_stage_2. in 32ch, out 32 ch
+  - processors: 0xffffffff00000000
+    output_processors: 0x00000000ffffffff
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: ReLU
+    max_pool: 2
+    pool_stride: 2
+    streaming: true
+  # Layer 2: Bottleneck-0, n=0, conv1. in 32ch, out 64 ch
+  - processors: 0x00000000ffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 3: Bottleneck-0, n=0, conv2. in 64ch, out 48 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0x0000ffffffffffff
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: None
+    max_pool: 2
+    pool_stride: 2
+  # Layer 4: Bottleneck-1, n=0, conv1. in 48ch, out 192 ch
+  - processors: 0x0000ffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 5: Bottleneck-1, n=0, conv2. in 192 ch, out 64 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: None
+    max_pool: 2
+    pool_stride: 2
+  # Layer 6: Bottleneck-2, n=0, conv1. in 64ch, out 128 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x2000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 7: Bottleneck-2, n=0, conv2. in 128 ch, out 64 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 3x3
+    write_gap: 1
+    pad: 1
+    activate: None
+  # Layer 8: Bottleneck-2, n=0, Reform input layer
+  - in_offset: 0x4000
+    out_offset: 0x0004
+    processors: 0xffffffffffffffff
+    operation: passthrough
+    write_gap: 1
+    in_sequences: [5]
+  # Layer 9: Bottleneck-2, n=0, Residual add
+  - in_offset: 0x0000
+    out_offset: 0x2000
+    processors: 0xffffffffffffffff
+    operation: none
+    eltwise: add
+    in_sequences: [7, 8]
+  # Layer 10: Bottleneck-3, n=0, conv1. in 64, out 256 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 11: Bottleneck-3, n=0, conv2. in 256 ch, out 96 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffff0000
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: None
+    max_pool: 2
+    pool_stride: 2
+  # Layer 12: Bottleneck-4, n=0, conv1. in 96 ch, out 192 ch
+  - processors: 0xffffffffffff0000
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 13: Bottleneck-4, n=0, conv2. in 192 ch, out 128 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: None
+  # Layer 14: post_stage in 128 ch, out 128 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: ReLU
+  # Layer 15: pre_avg in 128 ch, out 128 ch
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x4000
+    operation: Conv2d
+    kernel_size: 3x3
+    pad: 1
+    activate: None
+  # Layer 16: Fake Fused Avg Pool
+  - processors: 0xffffffffffffffff
+    output_processors: 0xffffffffffffffff
+    out_offset: 0x0000
+    operation: Conv2d
+    kernel_size: 1x1
+    pad: 0
+    activate: None
+    avg_pool: [7, 7]
+    pool_stride: 1
+  # Layer 17: output layer in 128 features, out 64 features
+  - out_offset: 0x2000
+    processors: 0xffffffffffffffff
+    operation: MLP
+    activate: None