folding.yaml

# Per operator type default configurations
defaults:
  # Scaled dot-product attention head implemented via HLS
  ScaledDotProductAttention_hls:
    # Type of memory to be used for internal buffer storage
    #   Options: auto, block, distributed, ultra
    ram_style: block
    # Type of memory to be used for threshold storage
    #   Options: auto, block, distributed
    ram_style_thresholds: block
    # Type of memory to be used fo the attention mask (if present)
    #   Options: auto, block, distributed
    ram_style_mask: block
    # Resource type to be used for implementing multiplications/MACs
    #   Options: auto, lut or dsp
    mac_resource: lut
  # Addition of two inputs (constants or streamed) implemented via HLS
  ElementwiseAdd_hls:
    # Type of memory to be used for internal buffer storage and/or constant
    # parameter tensors
    #   Options: auto, block, distributed, ultra
    ram_style: distributed
  # Matrix vector activation unit implemented via HLS
  MVAU_hls:
    # Resource type to be used for implementing multiplications/MACs
    #   Options: auto, lut or dsp
    resType: dsp
    # Memory mode for weight storage
    #   Options: internal_embedded, internal_decoupled, external
    mem_mode: internal_decoupled
    # Type of memory to be used for weight storage if "internal_decoupled"
    #   Options: auto, block, distributed, ultra
    ram_style: block
    # Type of memory to be used for threshold storage
    #   Options: auto, block, distributed
    ram_style_thresholds: block
    # Makes weights writeable through AXI-lite interface at runtime
    runtime_writeable_weights: 0
  # Matrix vector activation unit implemented via RTL
  MVAU_rtl:
    # Resource type to be used for implementing multiplications/MACs
    #   Options: auto, lut or dsp
    # Note: RTL MVAU currently does not support LUT-based implementation
    resType: dsp
    # Memory mode for weight storage
    #   Options: internal_embedded, internal_decoupled, external
    mem_mode: internal_decoupled
    # Type of memory to be used for weight storage if "internal_decoupled"
    #   Options: auto, block, distributed, ultra
    ram_style: block
    # Makes weights writeable through AXI-lite interface at runtime
    runtime_writeable_weights: 0
  # Multi-thresholds implemented via HLS (applies to standalone thresholds)
  Thresholding_hls:
    # Memory mode for threshold storage
    #   Options: internal_embedded, internal_decoupled
    mem_mode: internal_decoupled
    # Type of memory to be used for threshold storage if "internal_decoupled"
    #   Options: distributed, block
    ram_style: distributed
    # Makes thresholds writeable through AXI-lite interface at runtime
    runtime_writeable_weights: 0
  # Multi-thresholds implemented via RTL (applies to standalone thresholds)
  Thresholding_rtl:
    # Decides to use BRAM, URAM or LUTs for threshold memory, depending on the
    # depth of the thresholds
    # Note: This combination forces "distributed" LUT implementation
    depth_trigger_uram: 2147483647  # "infinity"
    depth_trigger_bram: 2147483647  # "infinity"
#    # Note: This combination forces "block" RAM implementation
#    depth_trigger_uram: 0
#    depth_trigger_bram: 1
#    # Note: This combination forces "ultra" RAM implementation
#    depth_trigger_uram: 1
#    depth_trigger_bram: 0
#    # Note: This combination is equivalent to "auto"
#    depth_trigger_uram: 0
#    depth_trigger_bram: 0
    # Makes thresholds writeable through AXI-lite interface at runtime
    runtime_writeable_weights: 0
  # FIFO implemented via RTL (there is no HLS FIFO implementation in FINN)
  StreamingFIFO_rtl:
    # RTL vs. IPI implementation of FIFOs
    #   Options: rtl, vivado
    impl_style: rtl
    # Resource type for FIFOs when impl_style is vivado
    #   Options: auto, block, distributed, ultra
    ram_style: distributed
# Individual, named node-specific configurations here
# ...