Skip to content

Commit

Permalink
Merge branch 'kws/dynamicaug_nas' of github.com:alicangok/ai8x-synthe…
Browse files Browse the repository at this point in the history
…sis into kws/dynamicaug_nas
  • Loading branch information
alicangok committed Jan 21, 2024
2 parents 3a46729 + bc21d2f commit b4a5096
Show file tree
Hide file tree
Showing 28 changed files with 25,060 additions and 4,663 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ADI MAX78000/MAX78002 Model Training and Synthesis

November 13, 2023
December 22, 2023

ADI’s MAX78000/MAX78002 project is comprised of five repositories:

Expand Down Expand Up @@ -3340,6 +3340,8 @@ Additional information about the evaluation kits, and the software development k
[AHB Addresses for MAX78000 and MAX78002](docs/AHBAddresses.md)
[Facial Recognition System](https://github.com/MaximIntegratedAI/ai8x-training/blob/develop/docs/FacialRecognitionSystem.md)
---
Expand Down
Binary file modified README.pdf
Binary file not shown.
16 changes: 12 additions & 4 deletions izer/add_fake_passthrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,22 @@ def parse_arguments():
help='depth of the passthrough layer')
parser.add_argument('--layer-name-after-pt', metavar='S', required=True,
help='name of the layer just after the passthrough layer is added')
parser.add_argument('--low-memory-footprint', action='store_true', default=False,
help='enables 2-bit quantization for weights')

args = parser.parse_args()
return args


def passthrough_faker(n_channels):
def passthrough_faker(n_channels, low_memory_footprint=False):
"""Creates passthrough layer"""
a = nn.Conv2d(in_channels=n_channels, out_channels=n_channels, kernel_size=1, bias=False)
a.weight.data = torch.zeros_like(a.weight.data)
for i in range(a.weight.data.shape[0]):
a.weight.data[i, i, :, :] = 64
if low_memory_footprint:
a.weight.data[i, i, :, :] = 1
else:
a.weight.data[i, i, :, :] = 64
return a


Expand All @@ -48,7 +53,7 @@ def main():
device = torch.device('cpu')

checkpoint = torch.load(args.input_checkpoint_path)
passthrough_kernel = passthrough_faker(args.layer_depth)
passthrough_kernel = passthrough_faker(args.layer_depth, args.low_memory_footprint)

new_checkpoint = copy.deepcopy(checkpoint)

Expand All @@ -61,7 +66,10 @@ def main():
new_state_dict[name] = v

new_state_dict[f'{args.layer_name}.output_shift'] = torch.Tensor([1.]).to(device)
new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([8.]).to(device)
if args.low_memory_footprint:
new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([2.]).to(device)
else:
new_state_dict[f'{args.layer_name}.weight_bits'] = torch.Tensor([8.]).to(device)
new_state_dict[f'{args.layer_name}.bias_bits'] = torch.Tensor([8.]).to(device)
new_state_dict[f'{args.layer_name}.quantize_activation'] = torch.Tensor([1.]).to(device)
new_state_dict[f'{args.layer_name}.adjust_output_shift'] = torch.Tensor([0.]).to(device)
Expand Down
11 changes: 11 additions & 0 deletions networks/ai85-dotprod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
arch: AI85DotProd
dataset: dotprod_test

layers:
# layer0: mlp
- op: mlp
activate: none
out_offset: 0x4000
processors: 0xffff.ffff.ffff.ffff
data_format: HWC
159 changes: 159 additions & 0 deletions networks/ai85-faceid_112.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
---
# Faceid_112 for face recognition. Compatible with MAX78000.

arch: ai85faceidnet_112
dataset: vggface2_faceid

layers:
# Layer 0: pre_stage. in 3ch, out 32 ch
- processors: 0x0000000000000007
in_offset: 0x0000
out_offset: 0x2000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: ReLU
streaming: true
# Layer 1: pre_stage_2. in 32ch, out 32 ch
- processors: 0xffffffff00000000
output_processors: 0x00000000ffffffff
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: ReLU
max_pool: 2
pool_stride: 2
streaming: true
# Layer 2: Bottleneck-0, n=0, conv1. in 32ch, out 64 ch
- processors: 0x00000000ffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 3: Bottleneck-0, n=0, conv2. in 64ch, out 48 ch
- processors: 0xffffffffffffffff
output_processors: 0x0000ffffffffffff
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: None
max_pool: 2
pool_stride: 2
# Layer 4: Bottleneck-1, n=0, conv1. in 48ch, out 192 ch
- processors: 0x0000ffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 5: Bottleneck-1, n=0, conv2. in 192 ch, out 64 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: None
max_pool: 2
pool_stride: 2
# Layer 6: Bottleneck-2, n=0, conv1. in 64ch, out 128 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x2000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 7: Bottleneck-2, n=0, conv2. in 128 ch, out 64 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 3x3
write_gap: 1
pad: 1
activate: None
# Layer 8: Bottleneck-2, n=0, Reform input layer
- in_offset: 0x4000
out_offset: 0x0004
processors: 0xffffffffffffffff
operation: passthrough
write_gap: 1
in_sequences: [5]
# Layer 9: Bottleneck-2, n=0, Residual add
- in_offset: 0x0000
out_offset: 0x2000
processors: 0xffffffffffffffff
operation: none
eltwise: add
in_sequences: [7, 8]
# Layer 10: Bottleneck-3, n=0, conv1. in 64, out 256 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 11: Bottleneck-3, n=0, conv2. in 256 ch, out 96 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffff0000
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: None
max_pool: 2
pool_stride: 2
# Layer 12: Bottleneck-4, n=0, conv1. in 96 ch, out 192 ch
- processors: 0xffffffffffff0000
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 13: Bottleneck-4, n=0, conv2. in 192 ch, out 128 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: None
# Layer 14: post_stage in 128 ch, out 128 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: ReLU
# Layer 15: pre_avg in 128 ch, out 128 ch
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x4000
operation: Conv2d
kernel_size: 3x3
pad: 1
activate: None
# Layer 16: Fake Fused Avg Pool
- processors: 0xffffffffffffffff
output_processors: 0xffffffffffffffff
out_offset: 0x0000
operation: Conv2d
kernel_size: 1x1
pad: 0
activate: None
avg_pool: [7, 7]
pool_stride: 1
# Layer 17: output layer in 128 features, out 64 features
- out_offset: 0x2000
processors: 0xffffffffffffffff
operation: MLP
activate: None
Loading

0 comments on commit b4a5096

Please sign in to comment.