diff --git a/Bender.yml b/Bender.yml index 0bb27dd515..d1f22a9f2a 100644 --- a/Bender.yml +++ b/Bender.yml @@ -28,6 +28,8 @@ dependencies: riscv-dbg: { git: https://github.com/pulp-platform/riscv-dbg, version: 0.8.0 } cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, rev: 64e21ae455bbdde850c4df13bef86ea55ac42537 } idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.4 } + redmule: { git: https://github.com/pulp-platform/redmule, rev: 6c2a569 } #branch: picobello + hci: { git: https://github.com/pulp-platform/hci, rev: 06fcba671e060f2e1b03b7ebe2d3e719f1557099 } export_include_dirs: - hw/reqrsp_interface/include @@ -170,6 +172,7 @@ sources: - hw/snitch_cluster/src/snitch_clkdiv2.sv # Level 3 - hw/snitch_cluster/src/snitch_hive.sv + - hw/snitch_cluster/src/snitch_hwpe_subsystem.sv # Level 4 - hw/snitch_cluster/src/snitch_cluster.sv - target: test diff --git a/hw/mem_interface/src/mem_wide_narrow_mux.sv b/hw/mem_interface/src/mem_wide_narrow_mux.sv index 716393c8d4..ac08adb53b 100644 --- a/hw/mem_interface/src/mem_wide_narrow_mux.sv +++ b/hw/mem_interface/src/mem_wide_narrow_mux.sv @@ -7,11 +7,9 @@ `include "common_cells/assertions.svh" -/// This module multiplexes many narrow ports and one wide port onto many narrow -/// ports. The wide or narrow ports can be selected by using the `sel_wide_i` signal. -/// -/// `1` selects the wide port. -/// `0` selects the narrow port. +/// This module multiplexes many narrow ports, one wide port, and one hwpe port onto many narrow +/// ports. Arbitration is done statically: the wide port has the highest priority, +/// followed by the hwpe port, and finally the narrow ports. /// /// ## Constraint /// @@ -21,7 +19,7 @@ /// ## Caution /// /// As of now this module's request always need an immediate grant in case -/// `sel_wide_i` is high. Any delayed grant will break the module's behavior. +/// `in_wide_req_i.q_valid` is high. Any delayed grant will break the module's behavior. module mem_wide_narrow_mux #( /// Width of narrow data. parameter int unsigned NarrowDataWidth = 0, @@ -37,6 +35,10 @@ module mem_wide_narrow_mux #( parameter type mem_wide_req_t = logic, /// Response type of wide inputs. parameter type mem_wide_rsp_t = logic, + /// Request type of hwpe inputs. + parameter type mem_hwpe_req_t = logic, + /// Response type of hwpe inputs. + parameter type mem_hwpe_rsp_t = logic, /// Derived. *Do not override* /// Number of narrow inputs. parameter int unsigned NrPorts = WideDataWidth / NarrowDataWidth @@ -50,11 +52,12 @@ module mem_wide_narrow_mux #( /// Wide side. input mem_wide_req_t in_wide_req_i, output mem_wide_rsp_t in_wide_rsp_o, + /// HWPE side. + input mem_hwpe_req_t in_hwpe_req_i, + output mem_hwpe_rsp_t in_hwpe_rsp_o, // Multiplexed output. output mem_narrow_req_t [NrPorts-1:0] out_req_o, - input mem_narrow_rsp_t [NrPorts-1:0] out_rsp_i, - /// `0`: Use narrow port, `1`: Use wide port - input logic sel_wide_i + input mem_narrow_rsp_t [NrPorts-1:0] out_rsp_i ); localparam int unsigned NarrowStrbWidth = NarrowDataWidth/8; @@ -67,6 +70,7 @@ module mem_wide_narrow_mux #( // Broadcast data from all banks. for (int i = 0; i < NrPorts; i++) begin in_wide_rsp_o.p[i*NarrowDataWidth+:NarrowDataWidth] = out_rsp_i[i].p.data; + in_hwpe_rsp_o.p[i*NarrowDataWidth+:NarrowDataWidth] = out_rsp_i[i].p.data; end // --------------- @@ -74,11 +78,12 @@ module mem_wide_narrow_mux #( // --------------- // By default feed through narrow requests. out_req_o = in_narrow_req_i; - // Tie-off wide by default. + // Tie-off wide and hwpe by default. in_wide_rsp_o.q_ready = 1'b0; + in_hwpe_rsp_o.q_ready = 1'b0; - // The wide port is selected. - if (sel_wide_i) begin + // The wide port has the highest priority + if (in_wide_req_i.q_valid) begin for (int i = 0; i < NrPorts; i++) begin out_req_o[i].q_valid = in_wide_req_i.q_valid; // Block access from narrow ports. @@ -92,9 +97,25 @@ module mem_wide_narrow_mux #( user: in_wide_req_i.q.user }; // The protocol requires that the response is always granted - // immediately (at least when `sel_wide_i` is high). + // immediately (at least when `in_wide_req_i.q_valid` is high). in_wide_rsp_o.q_ready = 1'b1; end + end else if (in_hwpe_req_i.q_valid) begin // The hwpe port has the second highest priority + for (int i = 0; i < NrPorts; i++) begin + out_req_o[i].q_valid = in_hwpe_req_i.q_valid; + // Block access from narrow ports. + in_narrow_rsp_o[i].q_ready = 1'b0; + out_req_o[i].q = '{ + addr: in_hwpe_req_i.q.addr, + write: in_hwpe_req_i.q.write, + amo: reqrsp_pkg::AMONone, + data: in_hwpe_req_i.q.data[i*NarrowDataWidth+:NarrowDataWidth], + strb: in_hwpe_req_i.q.strb[i*NarrowStrbWidth+:NarrowStrbWidth], + user: in_hwpe_req_i.q.user + }; + + in_hwpe_rsp_o.q_ready = 1'b1; + end end end @@ -108,21 +129,23 @@ module mem_wide_narrow_mux #( logic [NrPorts-1:0] q_valid_flat; logic [NrPorts-1:0][NarrowDataWidth-1:0] q_data; logic [NrPorts-1:0][NarrowStrbWidth-1:0] q_strb; + // verilog_lint: waive-start line-length `ASSERT(ImmediateGrantWide, in_wide_req_i.q_valid |-> in_wide_rsp_o.q_ready) for (genvar i = 0; i < NrPorts; i++) begin : gen_per_port assign q_valid_flat[i] = out_req_o[i].q_valid; assign q_data[i] = out_req_o[i].q.data; assign q_strb[i] = out_req_o[i].q.strb; - `ASSERT(ImmediateGrantOut, sel_wide_i & out_req_o[i].q_valid |-> out_rsp_i[i].q_ready) - `ASSERT(SilentNarrow, sel_wide_i |-> !in_narrow_rsp_o[i].q_ready) - `ASSERT(NarrowPassThrough, !sel_wide_i & in_narrow_req_i[i].q_valid |-> out_req_o[i].q_valid) + `ASSERT(ImmediateGrantOut, in_wide_req_i.q_valid & out_req_o[i].q_valid |-> out_rsp_i[i].q_ready) + `ASSERT(SilentNarrow, in_wide_req_i.q_valid |-> !in_narrow_rsp_o[i].q_ready) + `ASSERT(NarrowPassThrough, !in_wide_req_i.q_valid & in_narrow_req_i[i].q_valid |-> out_req_o[i].q_valid) end - `ASSERT(DmaSelected, sel_wide_i & in_wide_req_i.q_valid |-> &q_valid_flat) + `ASSERT(DmaSelected, in_wide_req_i.q_valid & in_wide_req_i.q_valid |-> &q_valid_flat) `ASSERT(DmaSelectedReadyWhenValid, - sel_wide_i & in_wide_req_i.q_valid |-> in_wide_rsp_o.q_ready) + in_wide_req_i.q_valid & in_wide_req_i.q_valid |-> in_wide_rsp_o.q_ready) `ASSERT(DMAWriteDataCorrect, in_wide_req_i.q_valid & in_wide_rsp_o.q_ready |-> (in_wide_req_i.q.data == q_data) && (in_wide_req_i.q.strb == q_strb)) + // verilog_lint: waive-stop line-length endmodule diff --git a/hw/snitch/src/snitch_pkg.sv b/hw/snitch/src/snitch_pkg.sv index 6918a0a773..bda091edd0 100644 --- a/hw/snitch/src/snitch_pkg.sv +++ b/hw/snitch/src/snitch_pkg.sv @@ -126,7 +126,8 @@ package snitch_pkg; typedef enum integer { TCDM = 0, ClusterPeripherals = 1, - SoC = 2 + SoC = 2, + HWPE = 3 } cluster_slave_e; typedef enum integer { diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index 1222bb4a19..c13e19edfe 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -57,6 +57,12 @@ module snitch_cluster /// as cores. If SSRs are enabled, we recommend 4 times the the number of /// banks. parameter int unsigned NrBanks = NrCores, + // address width of the HWPE ctrl port + parameter int unsigned HwpeCtrlAddrWidth = 32, + // data width of the HWPE ctrl port + parameter int unsigned HwpeCtrlDataWidth = 32, + // data width of the HWPE data port + parameter int unsigned HwpeDataWidth = WideDataWidth, /// Size of DMA AXI buffer. parameter int unsigned DMANumAxInFlight = 3, /// Size of DMA request fifo. @@ -280,7 +286,7 @@ module snitch_cluster localparam int unsigned NrNarrowMasters = 3; localparam int unsigned NarrowIdWidthOut = $clog2(NrNarrowMasters) + NarrowIdWidthIn; - localparam int unsigned NrSlaves = 3; + localparam int unsigned NrSlaves = 4; localparam int unsigned NrRuleIdcs = NrSlaves - 1; localparam int unsigned NrRules = (1 + AliasRegionEnable) * NrRuleIdcs; @@ -350,6 +356,8 @@ module snitch_cluster typedef logic [NarrowDataWidth/8-1:0] strb_t; typedef logic [WideDataWidth-1:0] data_dma_t; typedef logic [WideDataWidth/8-1:0] strb_dma_t; + typedef logic [WideDataWidth-1:0] data_hwpe_t; + typedef logic [WideDataWidth/8-1:0] strb_hwpe_t; typedef logic [NarrowIdWidthIn-1:0] id_mst_t; typedef logic [NarrowIdWidthOut-1:0] id_slv_t; typedef logic [WideIdWidthIn-1:0] id_dma_mst_t; @@ -360,6 +368,10 @@ module snitch_cluster typedef logic [TCDMMemAddrWidth-1:0] tcdm_mem_addr_t; typedef logic [TCDMAddrWidth-1:0] tcdm_addr_t; + typedef logic [HwpeCtrlAddrWidth-1:0] addr_hwpe_ctrl_t; + typedef logic [HwpeCtrlDataWidth-1:0] data_hwpe_ctrl_t; + typedef logic [HwpeCtrlDataWidth/8-1:0] strb_hwpe_ctrl_t; + typedef struct packed { logic [CoreIDWidth-1:0] core_id; bit is_core; @@ -370,14 +382,20 @@ module snitch_cluster `AXI_TYPEDEF_ALL(axi_slv, addr_t, id_slv_t, data_t, strb_t, user_t) `AXI_TYPEDEF_ALL(axi_mst_dma, addr_t, id_dma_mst_t, data_dma_t, strb_dma_t, user_dma_t) `AXI_TYPEDEF_ALL(axi_slv_dma, addr_t, id_dma_slv_t, data_dma_t, strb_dma_t, user_dma_t) + // verilog_lint: waive-stop line-length + `AXI_TYPEDEF_ALL(axi_hwpe_mst, addr_hwpe_ctrl_t, id_slv_t, data_hwpe_ctrl_t, strb_hwpe_ctrl_t, user_t) + // verilog_lint: waive-stop line-length `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) `MEM_TYPEDEF_ALL(mem, tcdm_mem_addr_t, data_t, strb_t, tcdm_user_t) `MEM_TYPEDEF_ALL(mem_dma, tcdm_mem_addr_t, data_dma_t, strb_dma_t, logic) + `MEM_TYPEDEF_ALL(mem_hwpe, tcdm_mem_addr_t, data_hwpe_t, strb_hwpe_t, logic) `TCDM_TYPEDEF_ALL(tcdm, tcdm_addr_t, data_t, strb_t, tcdm_user_t) `TCDM_TYPEDEF_ALL(tcdm_dma, tcdm_addr_t, data_dma_t, strb_dma_t, logic) + `TCDM_TYPEDEF_ALL(tcdm_hwpe, tcdm_addr_t, data_hwpe_t, strb_hwpe_t, logic) + `TCDM_TYPEDEF_ALL(hwpectrl, addr_hwpe_ctrl_t, data_hwpe_ctrl_t, strb_hwpe_ctrl_t, logic) `REG_BUS_TYPEDEF_REQ(reg_req_t, addr_t, data_t, strb_t) `REG_BUS_TYPEDEF_RSP(reg_rsp_t, data_t) @@ -480,6 +498,10 @@ module snitch_cluster assign zero_mem_start_address = cluster_periph_end_address; assign zero_mem_end_address = cluster_periph_end_address + ZeroMemorySize * 1024; + addr_t hwpe_mem_start_address, hwpe_mem_end_address; + assign hwpe_mem_start_address = zero_mem_end_address; + assign hwpe_mem_end_address = hwpe_mem_start_address + 'h100; + localparam addr_t TCDMAliasStart = AliasRegionBase & TCDMMask; localparam addr_t TCDMAliasEnd = (TCDMAliasStart + TCDMSize) & TCDMMask; @@ -492,6 +514,9 @@ module snitch_cluster localparam addr_t ZeroMemAliasStart = PeriphAliasEnd; localparam addr_t ZeroMemAliasEnd = PeriphAliasEnd + ZeroMemorySize * 1024; + localparam addr_t HWPEAliasStart = ZeroMemAliasEnd; + localparam addr_t HWPEAliasEnd = HWPEAliasStart + 'h100; + // ---------------- // Wire Definitions // ---------------- @@ -502,6 +527,11 @@ module snitch_cluster axi_mst_req_t [NrNarrowMasters-1:0] narrow_axi_mst_req; axi_mst_resp_t [NrNarrowMasters-1:0] narrow_axi_mst_rsp; + axi_hwpe_mst_req_t axi_hwpe_mst_req; + axi_hwpe_mst_resp_t axi_hwpe_mst_rsp; + axi_hwpe_mst_req_t axi_hwpe_cut_mst_req; + axi_hwpe_mst_resp_t axi_hwpe_cut_mst_rsp; + // DMA AXI buses axi_mst_dma_req_t [NrWideMasters-1:0] wide_axi_mst_req; axi_mst_dma_resp_t [NrWideMasters-1:0] wide_axi_mst_rsp; @@ -515,10 +545,16 @@ module snitch_cluster mem_dma_req_t [NrSuperBanks-1:0] sb_dma_req; mem_dma_rsp_t [NrSuperBanks-1:0] sb_dma_rsp; + mem_hwpe_req_t [NrSuperBanks-1:0] sb_hwpe_req; + mem_hwpe_rsp_t [NrSuperBanks-1:0] sb_hwpe_rsp; + // 3. Memory Subsystem (Interconnect) tcdm_dma_req_t ext_dma_req; tcdm_dma_rsp_t ext_dma_rsp; + tcdm_hwpe_req_t tcdm_hwpe_req; + tcdm_hwpe_rsp_t tcdm_hwpe_rsp; + // AXI Ports into TCDM (from SoC). tcdm_req_t axi_soc_req; tcdm_rsp_t axi_soc_rsp; @@ -541,7 +577,14 @@ module snitch_cluster reg_req_t reg_req; reg_rsp_t reg_rsp; - // 5. Misc. Wires. + // 6. HWPE Control & Events + hwpectrl_req_t hwpe_ctrl_req; + hwpectrl_rsp_t hwpe_ctrl_rsp; + + logic [NrCores-1:0][1:0] hwpe_evt; + logic hwpe_busy; + + // 7. Misc. Wires. logic icache_prefetch_enable; logic [NrCores-1:0] cl_interrupt; logic [NrCores-1:0] barrier_in; @@ -713,6 +756,26 @@ module snitch_cluster .mem_rsp_i (sb_dma_rsp) ); + snitch_tcdm_interconnect #( + .NumInp (1), + .NumOut (NrSuperBanks), + .tcdm_req_t (tcdm_hwpe_req_t), + .tcdm_rsp_t (tcdm_hwpe_rsp_t), + .mem_req_t (mem_hwpe_req_t), + .mem_rsp_t (mem_hwpe_rsp_t), + .user_t (logic), + .MemAddrWidth (TCDMMemAddrWidth), + .DataWidth (HwpeDataWidth), + .MemoryResponseLatency (MemoryMacroLatency) + ) i_hwpe_interconnect ( + .clk_i, + .rst_ni, + .req_i (tcdm_hwpe_req), + .rsp_o (tcdm_hwpe_rsp), + .mem_req_o (sb_hwpe_req), + .mem_rsp_i (sb_hwpe_rsp) + ); + // ---------------- // Memory Subsystem // ---------------- @@ -727,7 +790,9 @@ module snitch_cluster .mem_narrow_req_t (mem_req_t), .mem_narrow_rsp_t (mem_rsp_t), .mem_wide_req_t (mem_dma_req_t), - .mem_wide_rsp_t (mem_dma_rsp_t) + .mem_wide_rsp_t (mem_dma_rsp_t), + .mem_hwpe_req_t (mem_hwpe_req_t), + .mem_hwpe_rsp_t (mem_hwpe_rsp_t) ) i_tcdm_mux ( .clk_i, .rst_ni, @@ -735,9 +800,10 @@ module snitch_cluster .in_narrow_rsp_o (ic_rsp [i]), .in_wide_req_i (sb_dma_req [i]), .in_wide_rsp_o (sb_dma_rsp [i]), + .in_hwpe_req_i (sb_hwpe_req [i]), + .in_hwpe_rsp_o (sb_hwpe_rsp [i]), .out_req_o (amo_req), - .out_rsp_i (amo_rsp), - .sel_wide_i (sb_dma_req[i].q_valid) + .out_rsp_i (amo_rsp) ); // generate banks of the superbank @@ -1139,6 +1205,11 @@ module snitch_cluster idx: ClusterPeripherals, start_addr: cluster_periph_start_address, end_addr: cluster_periph_end_address + }, + '{ + idx: HWPE, + start_addr: hwpe_mem_start_address, + end_addr: hwpe_mem_end_address } }; if (AliasRegionEnable) begin : gen_cluster_xbar_alias @@ -1152,6 +1223,11 @@ module snitch_cluster idx: ClusterPeripherals, start_addr: PeriphAliasStart, end_addr: PeriphAliasEnd + }, + '{ + idx: HWPE, + start_addr: HWPEAliasStart, + end_addr: HWPEAliasEnd } }; end @@ -1317,6 +1393,94 @@ module snitch_cluster .icache_events_i (icache_events) ); + // 3. Hardware Processing Engine + + // Convert the narrow branch data width to the one used by the HWPE control protocol (32) + axi_dw_converter #( + .AxiMaxReads ( 1 ), + .AxiSlvPortDataWidth ( NarrowDataWidth ), + .AxiMstPortDataWidth ( HwpeCtrlDataWidth ), + .AxiAddrWidth ( PhysicalAddrWidth ), + .AxiIdWidth ( NarrowIdWidthOut ), + .aw_chan_t ( axi_slv_aw_chan_t ), + .mst_w_chan_t ( axi_hwpe_mst_w_chan_t ), + .slv_w_chan_t ( axi_slv_w_chan_t ), + .b_chan_t ( axi_slv_b_chan_t ), + .ar_chan_t ( axi_slv_ar_chan_t ), + .mst_r_chan_t ( axi_hwpe_mst_r_chan_t ), + .slv_r_chan_t ( axi_slv_r_chan_t ), + .axi_mst_req_t ( axi_hwpe_mst_req_t ), + .axi_mst_resp_t ( axi_hwpe_mst_resp_t ), + .axi_slv_req_t ( axi_slv_req_t ), + .axi_slv_resp_t ( axi_slv_resp_t ) + ) i_axi_dw_hwpe ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( narrow_axi_slv_req[HWPE] ), + .slv_resp_o ( narrow_axi_slv_rsp[HWPE] ), + .mst_req_o ( axi_hwpe_mst_req ), + .mst_resp_i ( axi_hwpe_mst_rsp ) + ); + + // Decouple the AXI master ports of HWPE + axi_cut #( + .Bypass ( 1'b0 ), + .aw_chan_t ( axi_hwpe_mst_aw_chan_t ), + .w_chan_t ( axi_hwpe_mst_w_chan_t ), + .b_chan_t ( axi_hwpe_mst_b_chan_t ), + .ar_chan_t ( axi_hwpe_mst_ar_chan_t ), + .r_chan_t ( axi_hwpe_mst_r_chan_t ), + .axi_req_t ( axi_hwpe_mst_req_t ), + .axi_resp_t ( axi_hwpe_mst_resp_t ) + ) i_axi_cut_hwpe_mst ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( axi_hwpe_mst_req ), + .slv_resp_o ( axi_hwpe_mst_rsp ), + .mst_req_o ( axi_hwpe_cut_mst_req ), + .mst_resp_i ( axi_hwpe_cut_mst_rsp ) + ); + + // Convert the AXI protocol to the request/grant protocol used by the control interface + axi_to_tcdm #( + .axi_req_t ( axi_hwpe_mst_req_t ), + .axi_rsp_t ( axi_hwpe_mst_resp_t ), + .tcdm_req_t ( hwpectrl_req_t ), + .tcdm_rsp_t ( hwpectrl_rsp_t ), + .IdWidth ( NarrowIdWidthOut ), + .AddrWidth ( HwpeCtrlAddrWidth ), + .DataWidth ( HwpeCtrlDataWidth ) + ) i_axi_to_hwpe_ctrl ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .axi_req_i ( axi_hwpe_cut_mst_req ), + .axi_rsp_o ( axi_hwpe_cut_mst_rsp ), + .tcdm_req_o ( hwpe_ctrl_req ), + .tcdm_rsp_i ( hwpe_ctrl_rsp ) + ); + + snitch_hwpe_subsystem #( + .tcdm_req_t ( tcdm_hwpe_req_t ), + .tcdm_rsp_t ( tcdm_hwpe_rsp_t ), + .periph_req_t ( hwpectrl_req_t ), + .periph_rsp_t ( hwpectrl_rsp_t ), + .HwpeDataWidth ( HwpeDataWidth ), + .IdWidth ( 8 ), + .NrCores ( NrCores ), + .TCDMDataWidth ( NarrowDataWidth ) + ) i_snitch_hwpe_subsystem ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( 1'b0 ), + .tcdm_req_o ( tcdm_hwpe_req ), + .tcdm_rsp_i ( tcdm_hwpe_rsp ), + .hwpe_ctrl_req_i ( hwpe_ctrl_req ), + .hwpe_ctrl_rsp_o ( hwpe_ctrl_rsp ) + // FIXME + //.hwpe_evt_o (hwpe_evt), + //.hwpe_busy_o (hwpe_busy) + ); + // Optionally decouple the external narrow AXI master ports. axi_cut #( .Bypass ( !RegisterExtNarrow ), diff --git a/hw/snitch_cluster/src/snitch_hwpe_subsystem.sv b/hw/snitch_cluster/src/snitch_hwpe_subsystem.sv new file mode 100644 index 0000000000..3c0221fd3f --- /dev/null +++ b/hw/snitch_cluster/src/snitch_hwpe_subsystem.sv @@ -0,0 +1,138 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +`include "hci_helpers.svh" + +module snitch_hwpe_subsystem + import hci_package::*; + import hwpe_ctrl_package::*; + import reqrsp_pkg::amo_op_e; +#( + parameter type tcdm_req_t = logic, + parameter type tcdm_rsp_t = logic, + parameter type periph_req_t = logic, + parameter type periph_rsp_t = logic, + parameter int unsigned HwpeDataWidth = 256, + parameter int unsigned IdWidth = 8, + parameter int unsigned NrCores = 8, + parameter int unsigned TCDMDataWidth = 64 +) ( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + + // TCDM interface (Master) + output tcdm_req_t tcdm_req_o, + input tcdm_rsp_t tcdm_rsp_i, + + // HWPE control interface (Slave) + input periph_req_t hwpe_ctrl_req_i, + output periph_rsp_t hwpe_ctrl_rsp_o +); + + localparam int unsigned NrTCDMPorts = (HwpeDataWidth / TCDMDataWidth); + + localparam hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '{ + DW: HwpeDataWidth, + AW: DEFAULT_AW, + BW: DEFAULT_BW, + UW: DEFAULT_UW, + IW: DEFAULT_IW, + EW: 0, + EHW: 0 + }; + + logic hwpe_clk; + logic clk_en; + + // Currently unused + logic [NrCores-1:0][1:0] evt; + logic busy; + + hwpe_ctrl_intf_periph #(.ID_WIDTH(IdWidth)) periph (.clk(clk_i)); + + hci_core_intf #( + `ifndef SYNTHESIS + .WAIVE_RSP3_ASSERT ( 1'b1 ), + `endif + .DW ( HwpeDataWidth ) + ) tcdm ( + .clk (clk_i) + ); + + // request channel + assign tcdm_req_o.q_valid = tcdm.req; + assign tcdm_req_o.q.addr = tcdm.add; + assign tcdm_req_o.q.write = ~tcdm.wen; + assign tcdm_req_o.q.strb = tcdm.be; + assign tcdm_req_o.q.data = tcdm.data; + assign tcdm_req_o.q.amo = reqrsp_pkg::AMONone; + assign tcdm_req_o.q.user = '0; + // response channel + assign tcdm.gnt = tcdm_rsp_i.q_ready; + assign tcdm.r_valid = tcdm_rsp_i.p_valid; + assign tcdm.r_data = tcdm_rsp_i.p.data; + assign tcdm.r_opc = '0; + assign tcdm.r_user = '0; + + always_comb begin + periph.req = '0; + periph.add = '0; + periph.wen = '0; + periph.be = '0; + periph.data = '0; + periph.id = '0; + hwpe_ctrl_rsp_o.q_ready = '0; + hwpe_ctrl_rsp_o.p.data = '0; + hwpe_ctrl_rsp_o.p_valid = '0; + + if (hwpe_ctrl_req_i.q.addr[7:0] == 'h9C && hwpe_ctrl_req_i.q_valid) begin + hwpe_ctrl_rsp_o.q_ready = '1; + hwpe_ctrl_rsp_o.p_valid = '1; + end else begin + periph.req = hwpe_ctrl_req_i.q_valid; + periph.add = hwpe_ctrl_req_i.q.addr; + periph.wen = ~hwpe_ctrl_req_i.q.write; + periph.be = hwpe_ctrl_req_i.q.strb; + periph.data = hwpe_ctrl_req_i.q.data; + periph.id = hwpe_ctrl_req_i.q.user; + hwpe_ctrl_rsp_o.q_ready = periph.gnt; + hwpe_ctrl_rsp_o.p.data = periph.r_data; + hwpe_ctrl_rsp_o.p_valid = periph.r_valid; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + clk_en <= '0; + end else begin + if (hwpe_ctrl_req_i.q.addr[7:0] == 'h9C && hwpe_ctrl_req_i.q_valid && hwpe_ctrl_req_i.q.write) begin + clk_en <= hwpe_ctrl_req_i.q.data[0]; + end + end + end + + tc_clk_gating i_hwpe_clk_gate ( + .clk_i ( clk_i ), + .en_i ( clk_en ), + .test_en_i ( '0 ), + .clk_o ( hwpe_clk ) + ); + + redmule_top #( + .ID_WIDTH ( IdWidth ), + .N_CORES ( NrCores ), + .DW ( HwpeDataWidth ), + .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(tcdm) ) + ) i_redmule_top ( + .clk_i ( hwpe_clk ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .evt_o ( evt ), + .busy_o ( busy ), + .tcdm ( tcdm ), + .periph ( periph ) + ); + +endmodule : snitch_hwpe_subsystem diff --git a/target/snitch_cluster/sw.mk b/target/snitch_cluster/sw.mk index c8121962db..4e642bd72d 100644 --- a/target/snitch_cluster/sw.mk +++ b/target/snitch_cluster/sw.mk @@ -75,6 +75,7 @@ APPS += sw/apps/log APPS += sw/apps/kbpcpa APPS += sw/apps/box3d1r APPS += sw/apps/j3d27pt +APPS += sw/apps/redmule # Include Makefile from each app subdirectory $(foreach app,$(APPS), \ diff --git a/target/snitch_cluster/sw/apps/redmule/app.mk b/target/snitch_cluster/sw/apps/redmule/app.mk new file mode 100644 index 0000000000..b4c4938143 --- /dev/null +++ b/target/snitch_cluster/sw/apps/redmule/app.mk @@ -0,0 +1,12 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Andrea Belano + +APP := redmule +$(APP)_BUILD_DIR := $(ROOT)/target/snitch_cluster/sw/apps/$(APP)/build +SRCS := $(ROOT)/target/snitch_cluster/sw/apps/$(APP)/src/$(APP).c +$(APP)_INCDIRS := $(ROOT)/target/snitch_cluster/sw/apps/$(APP)/data + +include $(ROOT)/target/snitch_cluster/sw/apps/common.mk diff --git a/target/snitch_cluster/sw/apps/redmule/data/data.h b/target/snitch_cluster/sw/apps/redmule/data/data.h new file mode 100644 index 0000000000..bff2748273 --- /dev/null +++ b/target/snitch_cluster/sw/apps/redmule/data/data.h @@ -0,0 +1,1005 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Yvan Tortorella +// + +#ifndef __ARCHI_REDMULE_H__ +#define __ARCHI_REDMULE_H__ + +/* + * |========================================================================| + * || || + * ||Control and generic configuration register layout || + * |========================================================================| + * || # reg | offset | bits | bitmask || content || + * ||-------+----------+---------+--------------++-------------------------|| + * || 0 | 0x0000 | 31: 0 | 0xFFFFFFFF || TRIGGER || + * || 1 | 0x0004 | 31: 0 | 0xFFFFFFFF || ACQUIRE || + * || 2 | 0x0008 | 31: 0 | 0xFFFFFFFF || EVT_ENABLE || + * || 3 | 0x000c | 31: 0 | 0xFFFFFFFF || STATUS || + * || 4 | 0x0010 | 31: 0 | 0xFFFFFFFF || RUNNING_JOB || + * || 5 | 0x0014 | 31: 0 | 0xFFFFFFFF || SOFT_CLEAR || + * |========================================================================| + * || || + * ||Job-dependent registers layout || + * |========================================================================| + * || # reg | offset | bits | bitmask || content || + * ||-------+----------+---------+--------------++-------------------------|| + * || 0 | 0x0040 | 31: 0 | 0xFFFFFFFF || X_ADDR || + * ||-------+----------+---------+--------------++-------------------------|| + * || 1 | 0x0044 | 31: 0 | 0xFFFFFFFF || W_ADDR || + * ||-------+----------+---------+--------------++-------------------------|| + * || 2 | 0x0048 | 31: 0 | 0xFFFFFFFF || Z_ADDR || + * ||-------+----------+---------+--------------++-------------------------|| + * || 3 | 0x004C | | || Matrix Config 0 Reg || + * || | | 31:16 | 0xFFFF0000 || K Size (W Columns) || + * || | | 15: 0 | 0x0000FFFF || M Size (X Rows) || + * ||-------+----------+---------+--------------++-------------------------|| + * || 4 | 0x0050 | | || Matrix Config 1 Reg || + * || | | 31:16 | 0xFFFFFFFF || N Size (X Cols/W Rows) || + * ||-------+----------+---------+--------------++-------------------------|| + * || 5 | 0x0054 | | || Matrix Arithmetic Reg || + * || | | 12:10 | 0x00001C00 || Operation selection || + * || | | 9: 7 | 0x00000380 || Input/Output format || + * |========================================================================| + * + */ + +#define ARCHI_CL_EVT_ACC0 0 +#define ARCHI_CL_EVT_ACC1 1 + +// Base address +#define REDMULE_BASE_ADD 0x10040000 + +// Commands +#define REDMULE_TRIGGER 0x00 +#define REDMULE_ACQUIRE 0x04 +#define REDMULE_FINISHED 0x08 +#define REDMULE_STATUS 0x0C +#define REDMULE_RUNNING_JOB 0x10 +#define REDMULE_SOFT_CLEAR 0x14 + +// Registers +#define REDMULE_REG_OFFS 0x40 +#define REDMULE_REG_X_PTR 0x00 +#define REDMULE_REG_W_PTR 0x04 +#define REDMULE_REG_Z_PTR 0x08 +#define REDMULE_MCFG0_PTR 0x0C +#define REDMULE_MCFG1_PTR 0x10 +#define REDMULE_ARITH_PTR 0x14 + +#define CK_GATE_OFFS 0x9C + +// OPs definition +#define MATMUL 0x0 +#define GEMM 0x1 +#define ADDMAX 0x2 +#define ADDMIN 0x3 +#define MULMAX 0x4 +#define MULMIN 0x5 +#define MAXMIN 0x6 +#define MINMAX 0x7 + +// GEMM formats +#define Float8 0x0 +#define Float16 0x1 +#define Float8Alt 0x2 +#define Float16Alt 0x3 + +// FP Formats encoding +#define FP16 0x2 +#define FP8 0x3 +#define FP16ALT 0x4 +#define FP8ALT 0x5 + +#endif + +#ifndef __HAL_REDMULE_H__ +#define __HAL_REDMULE_H__ + +/* LOW-LEVEL HAL */ +#define REDMULE_ADDR_BASE REDMULE_BASE_ADD +#define REDMULE_ADDR_SPACE 0x00000100 + +#define HWPE_WRITE(value, offset) *(volatile int *)(REDMULE_ADDR_BASE + offset) = value +#define HWPE_READ(offset) *(volatile int *)(REDMULE_ADDR_BASE + offset) + +static inline void redmule_x_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_X_PTR); +} + +static inline void redmule_w_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_W_PTR); +} + +static inline void redmule_z_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_Z_PTR); +} + +static inline void redmule_mcfg_set(uint32_t mcfg0, uint32_t mcfg1) { + HWPE_WRITE(mcfg0, REDMULE_REG_OFFS + REDMULE_MCFG0_PTR); + HWPE_WRITE(mcfg1, REDMULE_REG_OFFS + REDMULE_MCFG1_PTR); +} + +static inline void redmule_arith_set(uint32_t arith) { + HWPE_WRITE(arith, REDMULE_REG_OFFS + REDMULE_ARITH_PTR); +} + +static inline void hwpe_trigger_job() { HWPE_WRITE(0, REDMULE_TRIGGER); } + +static inline int hwpe_acquire_job() { return HWPE_READ(REDMULE_ACQUIRE); } + +static inline unsigned int hwpe_get_status() { return HWPE_READ(REDMULE_STATUS); } + +static inline void hwpe_soft_clear() { + volatile int i; + HWPE_WRITE(0, REDMULE_SOFT_CLEAR); +} + +static inline void hwpe_cg_enable() { HWPE_WRITE(1, CK_GATE_OFFS); } + +static inline void hwpe_cg_disable() { HWPE_WRITE(0, CK_GATE_OFFS); } + +void redmule_cfg(unsigned int x, unsigned int w, unsigned int z, uint16_t m_size, uint16_t n_size, + uint16_t k_size, uint8_t gemm_op, uint8_t gemm_fmt) { + + uint32_t mcfg_reg0 = 0; + uint32_t mcfg_reg1 = 0; + uint32_t arith_reg = 0; + + mcfg_reg0 = (k_size << 16) | (m_size << 0); + mcfg_reg1 = n_size << 0; + + arith_reg = (gemm_op << 10) | (gemm_fmt << 7); + + redmule_x_add_set((unsigned int)x); + redmule_w_add_set((unsigned int)w); + redmule_z_add_set((unsigned int)z); + redmule_mcfg_set((unsigned int)mcfg_reg0, (unsigned int)mcfg_reg1); + redmule_arith_set((unsigned int)arith_reg); +} + +#endif + +#ifndef REDMULE_UTILS_H +#define REDMULE_UTILS_H + +#define ERR 0x0011 + +int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { + uint32_t actual_word = 0; + uint16_t actual_MSHWord, actual_LSHWord; + uint32_t golden_word = 0; + uint16_t golden_MSHWord, golden_LSHWord; + uint32_t actual = 0; + uint32_t golden = 0; + + int errors = 0; + int error; + + for (int i = 0; i < len; i++) { + error = 0; + actual_word = *(actual_z + i); + golden_word = *(golden_z + i); + + // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; + uint16_t diff = 0; + + // Chechink Least Significant Half-Word + actual_LSHWord = (uint16_t)(actual_word & 0x0000FFFF); + golden_LSHWord = (uint16_t)(golden_word & 0x0000FFFF); + + diff = (actual_LSHWord > golden_LSHWord) ? (actual_LSHWord - golden_LSHWord) + : (actual_LSHWord < golden_LSHWord) ? (golden_LSHWord - actual_LSHWord) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("LSW: Error!\n"); +#endif + } + + // Checking Most Significant Half-Word + actual_MSHWord = (uint16_t)((actual_word >> 16) & 0x0000FFFF); + golden_MSHWord = (uint16_t)((golden_word >> 16) & 0x0000FFFF); + + diff = (actual_MSHWord > golden_MSHWord) ? (actual_MSHWord - golden_MSHWord) + : (actual_MSHWord < golden_MSHWord) ? (golden_MSHWord - actual_MSHWord) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("MSW: Error!\n"); +#endif + } + + errors += error; + +#ifdef DEBUG + printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); +#endif + +#ifdef VERBOSE + if (error) { + if (errors == 1) printf(" golden <- actual @ address @ index\n"); + printf("0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + i * 4); + } +#endif + } + return errors; +} + +int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { + uint32_t actual_word = 0; + uint8_t actual_Byte0, actual_Byte1, actual_Byte2, actual_Byte3; + uint32_t golden_word = 0; + uint8_t golden_Byte0, golden_Byte1, golden_Byte2, golden_Byte3; + uint32_t actual = 0; + uint32_t golden = 0; + + int errors = 0; + int error; + + for (int i = 0; i < len; i++) { + error = 0; + actual_word = *(actual_z + i); + golden_word = *(golden_z + i); + + // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; + uint8_t diff = 0; + + // Cheching Byte0 + actual_Byte0 = (uint8_t)(actual_word & 0x000000FF); + golden_Byte0 = (uint8_t)(golden_word & 0x000000FF); + + diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) + : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("Byte0: Error!\n"); +#endif + } + + // Cheching Byte1 + actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); + golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); + + diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) + : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("Byte1: Error!\n"); +#endif + } + + // Cheching Byte2 + actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); + golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); + + diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) + : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("Byte2: Error!\n"); +#endif + } + + // Cheching Byte3 + actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); + golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); + + diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) + : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) + : 0; + + if (diff > ERR) { + error = 1; +#ifdef VERBOSE + printf("diff: 0x%08x\n", diff); + printf("Byte3: Error!\n"); +#endif + } + + errors += error; + +#ifdef DEBUG + printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); +#endif + +#ifdef VERBOSE + if (error) { + if (errors == 1) printf(" golden <- actual @ address @ index\n"); + printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + i * 4); + } +#endif + } + return errors; +} + +#endif + +/* Header file generated by RedMulE Golden Model */ +uint32_t golden [512] = { + 0x476e47e1, + 0x478847ea, + 0x47274860, + 0x47cf46be, + 0x485747dc, + 0x479147a8, + 0x4564459c, + 0x468a4743, + 0x47264840, + 0x483a47b7, + 0x478747e6, + 0x47844688, + 0x476646a2, + 0x490547f8, + 0x484d4788, + 0x4812483e, + 0x486d482c, + 0x48a647d0, + 0x481f494d, + 0x489047b6, + 0x48024757, + 0x47f74872, + 0x47b54691, + 0x48034856, + 0x483c486e, + 0x4807486b, + 0x47714880, + 0x48ef479b, + 0x481f47a5, + 0x488747fe, + 0x48d748cf, + 0x486c4878, + 0x479a483f, + 0x48834797, + 0x484948cc, + 0x486346e0, + 0x48d44866, + 0x482947b0, + 0x48204646, + 0x46b3477f, + 0x47c548d1, + 0x48524835, + 0x4700471e, + 0x47df46c7, + 0x48d44695, + 0x487d484f, + 0x481848ab, + 0x476c48d4, + 0x48544861, + 0x484248a2, + 0x490a48ea, + 0x4851481d, + 0x488348c4, + 0x48644825, + 0x487146d1, + 0x480e4839, + 0x4832493b, + 0x482f4919, + 0x4757482b, + 0x488946f6, + 0x48d24721, + 0x49284885, + 0x493f48f6, + 0x480348d0, + 0x46c14786, + 0x48ec4802, + 0x487f4974, + 0x48e14742, + 0x4891480f, + 0x48284863, + 0x481546ae, + 0x486d4834, + 0x48164875, + 0x481047c8, + 0x48674795, + 0x481146a9, + 0x48204761, + 0x49064835, + 0x488348df, + 0x488648cb, + 0x481a482c, + 0x487c48b6, + 0x48f5495b, + 0x481c47be, + 0x496b484b, + 0x48824856, + 0x4847474d, + 0x481e482f, + 0x484d4920, + 0x476848b7, + 0x481047f9, + 0x486b480d, + 0x489746b3, + 0x4959485a, + 0x48a7490c, + 0x4843489c, + 0x479447f5, + 0x480d4804, + 0x4890496a, + 0x486747bf, + 0x488f48e3, + 0x4858485a, + 0x477a461c, + 0x479a470c, + 0x48644848, + 0x47b74869, + 0x46ec47c7, + 0x486f46de, + 0x483f47f4, + 0x490b483f, + 0x48fb48e4, + 0x481c48c4, + 0x46a44678, + 0x47bf46fb, + 0x470e48be, + 0x4822463e, + 0x47894809, + 0x470d4808, + 0x477f4566, + 0x47f6485d, + 0x47c947d4, + 0x475b480b, + 0x46f1473e, + 0x469a46b0, + 0x485046ad, + 0x48804778, + 0x482c4824, + 0x46c747dd, + 0x47614805, + 0x48724806, + 0x482148e8, + 0x47af456e, + 0x4864471a, + 0x481c4792, + 0x484b4606, + 0x4765476d, + 0x472148af, + 0x48444891, + 0x47324819, + 0x483046ac, + 0x48334686, + 0x48d747a7, + 0x48784846, + 0x47ba4883, + 0x461946b1, + 0x475246a4, + 0x4780482a, + 0x47934572, + 0x47ed4670, + 0x46604682, + 0x472044d2, + 0x462c468f, + 0x469047ce, + 0x470847c8, + 0x46804645, + 0x46a546d4, + 0x487545e3, + 0x48bd4657, + 0x483d4854, + 0x472c4701, + 0x486a4880, + 0x485a4862, + 0x48d349b3, + 0x489c473e, + 0x485448bb, + 0x487747f0, + 0x47c3467e, + 0x48af4803, + 0x487948a9, + 0x47a14867, + 0x4815484f, + 0x48574808, + 0x484e47d6, + 0x48ae48d0, + 0x48e34862, + 0x48ea48bb, + 0x469d4717, + 0x477a47e3, + 0x46dc4853, + 0x470a46e1, + 0x485b46c9, + 0x47904699, + 0x46ee4526, + 0x470846e3, + 0x4727480b, + 0x472d4818, + 0x459d47de, + 0x4837475d, + 0x48164648, + 0x488f4715, + 0x485047b4, + 0x47c54762, + 0x46cb46b6, + 0x478646ea, + 0x467647e4, + 0x47004645, + 0x46ba46a7, + 0x463a4711, + 0x46e644be, + 0x468846c7, + 0x46ea4812, + 0x47394852, + 0x46494751, + 0x47df45fe, + 0x47a6455a, + 0x48174728, + 0x477147a0, + 0x46bb47fc, + 0x47ae4700, + 0x4812480a, + 0x47fb4958, + 0x475a4836, + 0x48804822, + 0x48004830, + 0x48494687, + 0x46a046f0, + 0x483748b3, + 0x473e48fd, + 0x47744806, + 0x48f547b1, + 0x484446d2, + 0x498647e9, + 0x488b48b4, + 0x47a5486b, + 0x47c84787, + 0x47e74817, + 0x47c148b2, + 0x483247ad, + 0x48754776, + 0x48094771, + 0x463d460a, + 0x47ec46ca, + 0x485348b5, + 0x46e4489a, + 0x47994728, + 0x485a47bc, + 0x482345b1, + 0x48784802, + 0x481448dc, + 0x483a473d, + 0x46f14773, + 0x47724843, + 0x48204904, + 0x484447ae, + 0x48cb481b, + 0x47da4828, + 0x46bb4555, + 0x46ec4732, + 0x47a3484d, + 0x4792489f, + 0x473a47a9, + 0x480c4767, + 0x480c4666, + 0x48ec4806, + 0x49004823, + 0x484c480a, + 0x48e64866, + 0x48d048a3, + 0x484f494c, + 0x4884479c, + 0x491448f7, + 0x48614898, + 0x4880473a, + 0x48a148d3, + 0x487d48d3, + 0x488848be, + 0x48b9483f, + 0x48f44877, + 0x492347fd, + 0x49e54871, + 0x493c4910, + 0x48f44889, + 0x47bb46cc, + 0x47a647e2, + 0x45f148af, + 0x48034596, + 0x47f446bc, + 0x472046d2, + 0x48014577, + 0x47af4699, + 0x47424818, + 0x470a46fa, + 0x46c34654, + 0x48054659, + 0x4726469e, + 0x48724702, + 0x4775482a, + 0x4793478b, + 0x4778478e, + 0x485f4881, + 0x47f5492e, + 0x489246ff, + 0x488c4828, + 0x47714770, + 0x482c45c0, + 0x48de4880, + 0x487148d3, + 0x48594895, + 0x481b47d0, + 0x48324696, + 0x48df470a, + 0x48d047b1, + 0x48bb48ca, + 0x47b9484f, + 0x483c4848, + 0x4867484c, + 0x483a491a, + 0x486b4786, + 0x48c1483f, + 0x486947f2, + 0x47584616, + 0x480e485e, + 0x48a248f4, + 0x48c148d7, + 0x47b24861, + 0x48e4485c, + 0x48e34721, + 0x49804896, + 0x493448e7, + 0x48494854, + 0x484447ec, + 0x47f6484b, + 0x48a4486f, + 0x48154754, + 0x48834856, + 0x48314785, + 0x47a84660, + 0x47f24828, + 0x480d48f3, + 0x46f648c4, + 0x468847ca, + 0x48bb45dc, + 0x48554607, + 0x48dd478a, + 0x48d54831, + 0x4794487a, + 0x478446de, + 0x4834475d, + 0x480348b9, + 0x480a46c8, + 0x48074774, + 0x4817478e, + 0x47a24735, + 0x475046e6, + 0x4792486b, + 0x48084873, + 0x473d478c, + 0x48a146aa, + 0x47ca4600, + 0x48b2476e, + 0x480248c6, + 0x47934858, + 0x480a476e, + 0x47d54838, + 0x48534970, + 0x484746fc, + 0x492b483f, + 0x489747c3, + 0x47b34731, + 0x47a84825, + 0x484e4862, + 0x48564935, + 0x470e48dd, + 0x48e24829, + 0x484f4703, + 0x498447ad, + 0x48b948aa, + 0x48124876, + 0x46e6476b, + 0x47c046e6, + 0x476a4839, + 0x47a74688, + 0x478a46de, + 0x473246b8, + 0x47db448b, + 0x47c246f6, + 0x47064817, + 0x471947c5, + 0x46fc4798, + 0x47af45e3, + 0x47cc4605, + 0x482547b0, + 0x484e486b, + 0x4709471a, + 0x47da4825, + 0x47c247bb, + 0x47b64904, + 0x485c45da, + 0x486e4891, + 0x4755478b, + 0x486d463f, + 0x4788479a, + 0x4856489e, + 0x48684867, + 0x46dd4800, + 0x4837474d, + 0x481d46d5, + 0x49014810, + 0x481a4879, + 0x48754883, + 0x489b4899, + 0x494648c5, + 0x48f849a5, + 0x489547b1, + 0x48ae48c9, + 0x4845481c, + 0x48c546ba, + 0x48844898, + 0x489b492a, + 0x488c4904, + 0x48424888, + 0x48e747d6, + 0x48e147f9, + 0x49354897, + 0x4988490d, + 0x483f48da, + 0x452546b8, + 0x465e4791, + 0x4731478c, + 0x470b452d, + 0x473445ba, + 0x46a04632, + 0x467444f7, + 0x470a464e, + 0x46bc4764, + 0x46db466a, + 0x45ae46ba, + 0x45c6458c, + 0x4685464f, + 0x4770468e, + 0x480c4761, + 0x47224789, + 0x46f4467b, + 0x47d2471d, + 0x47954878, + 0x476e45c3, + 0x47fb475e, + 0x47be46da, + 0x469f45bf, + 0x46c64635, + 0x471b4802, + 0x47ab47d4, + 0x4631460c, + 0x48014623, + 0x4817462b, + 0x48544675, + 0x47e14762, + 0x47914722, + 0x48694879, + 0x48c448ad, + 0x48574907, + 0x4884474a, + 0x485a489b, + 0x4848484d, + 0x489346a6, + 0x485e4833, + 0x48f24892, + 0x481a491c, + 0x48974770, + 0x48764785, + 0x489e475f, + 0x48e44811, + 0x4909491c, + 0x48364899, + 0x4901490d, + 0x496c491c, + 0x494b4a1e, + 0x49be4868, + 0x49ea497c, + 0x498948fd, + 0x489247a9, + 0x490e4982, + 0x498549c2, + 0x499e4a42, + 0x48bf496a, + 0x493848b6, + 0x49aa48fb, + 0x49fd4984, + 0x4a2849c3, + 0x498449f9, + 0x461646bc, + 0x474846eb, + 0x46a5483e, + 0x47a74743, + 0x482e4767, + 0x48344701, + 0x46944515, + 0x47cb46ec, + 0x468c4838, + 0x471b4803, + 0x471c46f2, + 0x481e4616, + 0x481245ef, + 0x4874477c, + 0x47e247f0, + 0x47c1482a, + 0x47684782, + 0x46e747c6, + 0x475e490d, + 0x4806472b, + 0x47ce4727, + 0x47af4842, + 0x472c466c, + 0x47d446cc, + 0x47fd484b, + 0x46b74881, + 0x466c477d, + 0x48534692, + 0x47d645e2, + 0x489447e4, + 0x48084860, + 0x46f4484f, + }; + /* Header file generated by RedMulE Golden Model */ + #ifndef __TENSOR_DIM__ + #define __TENSOR_DIM__ + + #define M_SIZE 32 + #define N_SIZE 32 + #define K_SIZE 32 + #define SRC_FMT FP16 + #define DST_FMT FP16 + #define FPFORMAT 16 + uint8_t gemm_ops = GEMM; + + #endif + /* Header file generated by RedMulE Golden Model */ + uint16_t w_inp [1024] = { + 0x39a9, 0x2125, 0x34df, 0x3860, 0x3a54, 0x3a7d, 0x326d, 0x3a7c, 0x3960, 0x358d, 0x3a38, 0x304c, 0x2fb3, 0x35b0, 0x3855, 0x3b07, 0x3118, 0x3265, 0x38c6, 0x3af3, 0x3503, 0x35e1, 0x3920, 0x29e5, 0x3a90, 0x3806, 0x31c5, 0x3b36, 0x38cd, 0x3b8f, 0x3882, 0x3a04, + 0x35a7, 0x349d, 0x3ae0, 0x374d, 0x2dfc, 0x3859, 0x3afa, 0x3019, 0x38b3, 0x3be0, 0x352c, 0x374d, 0x3473, 0x35ae, 0x38b9, 0x344f, 0x3bf7, 0x37ba, 0x3acc, 0x3757, 0x3ba3, 0x3a5e, 0x3bac, 0x39b3, 0x38c7, 0x392e, 0x3938, 0x3b71, 0x3002, 0x3a3a, 0x3b95, 0x3a6b, + 0x29aa, 0x3990, 0x388a, 0x390a, 0x3656, 0x3478, 0x3a76, 0x352a, 0x1fdf, 0x3859, 0x3afe, 0x357b, 0x3850, 0x3b0e, 0x3788, 0x3499, 0x357f, 0x354c, 0x3bdc, 0x30b8, 0x3aaa, 0x346d, 0x384c, 0x3bc2, 0x36b8, 0x2db6, 0x2ae2, 0x3670, 0x3ac0, 0x382b, 0x38a1, 0x3170, + 0x3360, 0x33d9, 0x326d, 0x39ae, 0x3962, 0x3439, 0x26d8, 0x3a3d, 0x3125, 0x3933, 0x2caf, 0x30e5, 0x2c6a, 0x3bfa, 0x3a15, 0x38eb, 0x396a, 0x260d, 0x2813, 0x30d0, 0x3962, 0x3818, 0x2d15, 0x1a6b, 0x3a39, 0x3a17, 0x392d, 0x38c2, 0x3535, 0x3992, 0x379a, 0x361c, + 0x3a49, 0x340b, 0x39e8, 0x2f26, 0x33a4, 0x3a37, 0x388d, 0x31af, 0x3918, 0x3875, 0x303b, 0x3aed, 0x3027, 0x365e, 0x3b0c, 0x3540, 0x38d3, 0x3ab6, 0x2c2f, 0x3b91, 0x3a55, 0x2a7f, 0x3125, 0x3744, 0x3995, 0x34c4, 0x39e4, 0x3854, 0x3318, 0x3114, 0x3adf, 0x39d0, + 0x390c, 0x3b88, 0x2f0d, 0x3abc, 0x2dd9, 0x3983, 0x1dc9, 0x374e, 0x3b5e, 0x39bd, 0x290d, 0x384f, 0x1106, 0x343c, 0x3a37, 0x37b0, 0x39fa, 0x39d6, 0x3842, 0x3984, 0x329d, 0x2f8d, 0x3bb3, 0x39c6, 0x2a41, 0x3b58, 0x3a3d, 0x35ab, 0x38be, 0x3a28, 0x34da, 0x3864, + 0x3a98, 0x35fb, 0x39e3, 0x38f8, 0x394b, 0x3432, 0x3290, 0x3a3c, 0x2dc4, 0x38f5, 0x3a1e, 0x3b47, 0x3203, 0x304a, 0x33a3, 0x3492, 0x3930, 0x3060, 0x398d, 0x3b70, 0x349f, 0x3893, 0x26e9, 0x3697, 0x2d88, 0x3926, 0x2d0a, 0x2911, 0x3731, 0x35f6, 0x3b6f, 0x3ad9, + 0x3462, 0x3a7b, 0x3859, 0x388a, 0x3bf1, 0x3205, 0x2a98, 0x3bcb, 0x3999, 0x36aa, 0x3a70, 0x3957, 0x3439, 0x301e, 0x39de, 0x39c9, 0x2cb2, 0x364d, 0x341a, 0x3b13, 0x37a4, 0x39ec, 0x363d, 0x335e, 0x399e, 0x3819, 0x319b, 0x3b3d, 0x31c4, 0x26a2, 0x300d, 0x3b63, + 0x3a5c, 0x35ea, 0x3b6e, 0x3973, 0x2eee, 0x3b6a, 0x31c2, 0x32bd, 0x3bdb, 0x39fe, 0x249b, 0x3818, 0x34a2, 0x35b2, 0x381d, 0x2bc9, 0x3a1f, 0x3655, 0x38f2, 0x3580, 0x3707, 0x37a8, 0x3316, 0x31e6, 0x36bc, 0x3887, 0x383d, 0x2c9f, 0x2d54, 0x3bf5, 0x36f7, 0x31cb, + 0x3927, 0x3a8b, 0x3b75, 0x33fb, 0x395d, 0x281a, 0x361f, 0x35f3, 0x367d, 0x3b90, 0x3886, 0x2ee1, 0x3b67, 0x3214, 0x39d4, 0x3535, 0x2d50, 0x2da5, 0x3728, 0x37c9, 0x366f, 0x3187, 0x3abd, 0x3533, 0x3413, 0x37e7, 0x36e9, 0x3a5b, 0x3a8e, 0x36a8, 0x3510, 0x3431, + 0x30a8, 0x3197, 0x38d3, 0x1988, 0x3bbe, 0x396b, 0x3b5b, 0x3905, 0x3a2f, 0x3a54, 0x391e, 0x3b91, 0x3298, 0x37d6, 0x3393, 0x3bdf, 0x3a52, 0x3970, 0x3b81, 0x346e, 0x3432, 0x367c, 0x3503, 0x3ad8, 0x3152, 0x39cb, 0x3520, 0x39eb, 0x3333, 0x39b9, 0x2fad, 0x33e4, + 0x38b6, 0x3bca, 0x34b8, 0x3535, 0x39e0, 0x32cc, 0x3150, 0x3a03, 0x3b72, 0x3010, 0x29e7, 0x3477, 0x38c9, 0x36ca, 0x38a8, 0x3bb2, 0x2daf, 0x3b30, 0x3372, 0x351a, 0x3487, 0x3940, 0x31c3, 0x3819, 0x3809, 0x37cf, 0x399e, 0x2141, 0x350c, 0x3174, 0x3982, 0x33a2, + 0x398a, 0x362a, 0x364d, 0x3b8a, 0x3a9e, 0x389f, 0x3432, 0x388f, 0x36ae, 0x3652, 0x3a68, 0x335f, 0x30b4, 0x395c, 0x36c3, 0x3a86, 0x3720, 0x3894, 0x35ae, 0x34e0, 0x391d, 0x372c, 0x3376, 0x372d, 0x35f2, 0x37c4, 0x3bef, 0x39e5, 0x382c, 0x3b61, 0x3b91, 0x3019, + 0x3a56, 0x3931, 0x3b7c, 0x33b5, 0x3a8e, 0x2820, 0x38ab, 0x322f, 0x30fe, 0x3725, 0x372e, 0x3901, 0x3472, 0x2e00, 0x2d33, 0x3248, 0x3b9e, 0x39ac, 0x3a47, 0x3aa5, 0x3ba9, 0x2d3b, 0x3bc3, 0x3a9b, 0x3756, 0x34ed, 0x3b26, 0x399a, 0x3925, 0x3ad7, 0x37bd, 0x3992, + 0x360d, 0x28f3, 0x3808, 0x3850, 0x392f, 0x3742, 0x3b59, 0x3943, 0x3833, 0x3b9e, 0x3960, 0x31d0, 0x3780, 0x25d7, 0x3017, 0x2c59, 0x370e, 0x3bc2, 0x389f, 0x2b94, 0x38c6, 0x3a1b, 0x3901, 0x38a4, 0x367d, 0x31bb, 0x39be, 0x3bc8, 0x3843, 0x3864, 0x38c6, 0x39c1, + 0x38e5, 0x3b7e, 0x3330, 0x330b, 0x35c9, 0x3b82, 0x3af8, 0x34a1, 0x3a70, 0x2182, 0x3ad6, 0x36e7, 0x3584, 0x349e, 0x3886, 0x3856, 0x335f, 0x31a4, 0x304b, 0x3104, 0x363b, 0x3bc1, 0x2f35, 0x3964, 0x32e2, 0x30bc, 0x355b, 0x3b66, 0x38e3, 0x3a3c, 0x2d59, 0x3a42, + 0x3277, 0x35fc, 0x3a66, 0x3746, 0x3a13, 0x3235, 0x2e67, 0x36f0, 0x3021, 0x3b60, 0x3a14, 0x35ff, 0x31be, 0x3885, 0x2ff8, 0x3786, 0x382e, 0x2d2b, 0x3888, 0x3bd0, 0x3367, 0x391d, 0x37de, 0x3810, 0x346f, 0x395e, 0x31f3, 0x3bda, 0x3b70, 0x326f, 0x36a6, 0x34dc, + 0x34bc, 0x3ac7, 0x30fb, 0x3876, 0x3bee, 0x350d, 0x33ca, 0x3a97, 0x369b, 0x3842, 0x3bca, 0x3a87, 0x3810, 0x3aac, 0x368b, 0x20d0, 0x35b1, 0x38d7, 0x2e4d, 0xd6a, 0x37bc, 0x2da8, 0x36aa, 0x340c, 0x395a, 0x34a1, 0x3b22, 0x3901, 0x3bfe, 0x34b6, 0x3b31, 0x3a98, + 0x331f, 0x33cf, 0x3837, 0x2e1d, 0x35f7, 0x369f, 0x303e, 0x32f5, 0x1769, 0x397d, 0x2c0d, 0x39b9, 0x3573, 0x3ae1, 0x372b, 0x3b06, 0x3a1e, 0x3965, 0x3862, 0x381c, 0x39c7, 0x2eae, 0x3b34, 0x3998, 0x2ab4, 0x3840, 0x203f, 0x3b33, 0x365d, 0x38a2, 0x3512, 0x3a33, + 0x399c, 0x36e7, 0x3886, 0x3451, 0x328b, 0x38f6, 0x38e1, 0x386e, 0x32f9, 0x2ef1, 0x36f8, 0x3776, 0x36d7, 0x340e, 0x39de, 0x3520, 0x3b2f, 0x3b49, 0x3afc, 0x3909, 0x386f, 0x355f, 0x33f0, 0x3bf1, 0x3328, 0x371e, 0x2f04, 0x39ad, 0x394f, 0x3ae7, 0x3784, 0x2a89, + 0x237d, 0x38f9, 0x3a20, 0x39be, 0x396f, 0x2f16, 0x3297, 0x38a9, 0x3b3b, 0x3b65, 0x30e6, 0x3252, 0x27c1, 0x3825, 0x3970, 0x32d7, 0x31ac, 0x3a6b, 0x3a44, 0x342e, 0x284c, 0x35dc, 0x3974, 0x3208, 0x3834, 0x3ba4, 0x3b5c, 0x35e2, 0x3b99, 0x368e, 0x2c44, 0x3a94, + 0x3ab3, 0x397f, 0x3aca, 0x3a83, 0x3bdf, 0x38e0, 0x342e, 0x31f0, 0x3b28, 0x23cd, 0x3622, 0x32e8, 0x306e, 0x3ac4, 0x2a3b, 0x344d, 0x3a90, 0x332d, 0x3970, 0x25ee, 0x34a7, 0x37d1, 0x280f, 0x393e, 0x383d, 0x33e9, 0x3804, 0x347b, 0x3bd1, 0x381e, 0x3a11, 0x3903, + 0x3b65, 0x317c, 0x3044, 0x3705, 0x3bc9, 0x3a16, 0x3255, 0x28d9, 0x2d5a, 0x3806, 0x3502, 0x3818, 0x341b, 0x3417, 0x34d7, 0x2ea3, 0x3651, 0x2ff3, 0x3a75, 0x3796, 0x3b5e, 0x319d, 0x3966, 0x3b37, 0x302f, 0x34f9, 0x379e, 0x3628, 0x39c0, 0x3831, 0x351d, 0x3858, + 0x36c9, 0x3674, 0x35ef, 0x36e0, 0x39fa, 0x3517, 0x3a6b, 0x3a48, 0x37a1, 0x2cb4, 0x3956, 0x3919, 0x2eb4, 0x2f5c, 0x3b23, 0x3b46, 0x376e, 0x3bb3, 0x3484, 0x3645, 0x36b1, 0x3a9e, 0x3b18, 0x3103, 0x3add, 0x3425, 0x3b68, 0x308c, 0x38aa, 0x3a9a, 0x38ec, 0x3be4, + 0x3a93, 0x39c8, 0x390d, 0x3423, 0x3962, 0x38e5, 0x3755, 0x381a, 0x3b28, 0x3ac9, 0x3a4d, 0x39f1, 0x3a4d, 0x317b, 0x24a6, 0x342e, 0x38cc, 0x30fd, 0x3547, 0x32fd, 0x31bd, 0x356e, 0x33c7, 0x34c4, 0x3aaf, 0x3918, 0x37c2, 0x39ab, 0x3781, 0x348c, 0x341f, 0x34b1, + 0x31d1, 0x37b6, 0x2ee3, 0x395b, 0x3927, 0x2b41, 0x266f, 0x337a, 0x3a35, 0x374e, 0x379c, 0x2f40, 0x3884, 0x3bd0, 0x3358, 0x2d89, 0x3416, 0x3943, 0x3bbf, 0x3a46, 0x353f, 0x3748, 0x3a1b, 0x2461, 0x3654, 0x3b69, 0x2fab, 0x3bc3, 0x34af, 0x2e72, 0x39fd, 0x344a, + 0x3711, 0x36a1, 0x39c6, 0x2c9d, 0x3b6a, 0x3b4b, 0x3608, 0x3893, 0x2022, 0x2e97, 0x3925, 0x3523, 0x3a8a, 0x3aa6, 0x33cd, 0x3a7d, 0x3bac, 0x3a77, 0x3417, 0x2794, 0x3499, 0x3494, 0x3027, 0x3ae3, 0x2f21, 0x36de, 0x3290, 0x3035, 0x394b, 0x2f89, 0x38ba, 0x2c23, + 0x347e, 0x319d, 0x322e, 0x3be6, 0x3811, 0x2d80, 0x39e0, 0x39f0, 0x2a17, 0x38dd, 0x377e, 0x39e3, 0x362a, 0x3bcb, 0x2861, 0x3ae3, 0x39d5, 0x3083, 0x3081, 0x3ae9, 0x2cbd, 0x3bb9, 0x3941, 0x3b0d, 0x2e28, 0x38e1, 0x29c2, 0x3b19, 0x39dd, 0x2de1, 0x3778, 0x33fb, + 0x37a0, 0x374d, 0x38ab, 0x3847, 0x384a, 0x3b4b, 0x2c76, 0x396d, 0x3893, 0x3612, 0x3133, 0x3612, 0x3aff, 0x35cc, 0x3505, 0x39c5, 0x32f7, 0x3a19, 0x38bc, 0x3401, 0x3382, 0x3a9f, 0x3346, 0x36d6, 0x2ef7, 0x3858, 0x3373, 0x3a01, 0x2acd, 0x3b76, 0x2a49, 0x3986, + 0x30a0, 0x371e, 0x35e9, 0x3a74, 0x3298, 0x380c, 0x3ac0, 0x3bd4, 0x387d, 0x3a55, 0x38ad, 0x3aa6, 0x37c3, 0x390c, 0x3b54, 0x3bc4, 0x39e6, 0x3ab6, 0x39ab, 0x3729, 0x38a1, 0x3aee, 0x244c, 0x30d2, 0x37ec, 0x3994, 0x3a76, 0x343f, 0x39f7, 0x399b, 0x399e, 0x35b8, + 0x330c, 0x3370, 0x2ecc, 0x32ac, 0x349d, 0x3a7f, 0x3afe, 0x3676, 0x351c, 0x3729, 0x37a6, 0x39a6, 0x321a, 0x2c5a, 0x39fa, 0x308d, 0x3aa9, 0x2d95, 0x3aa5, 0x3841, 0x3911, 0x249b, 0x214f, 0x373e, 0x21c2, 0x3bf4, 0x3a03, 0x3b5c, 0x3a5f, 0x3b16, 0x3a3f, 0x324d, + 0x22ff, 0x330e, 0x2e28, 0x3b56, 0x3a5a, 0x37ae, 0x3760, 0x372e, 0x3aac, 0x3867, 0x34e4, 0x3640, 0x38df, 0x3642, 0x3513, 0x357c, 0x33e8, 0x35c4, 0x3868, 0x39fe, 0x38cc, 0x200a, 0x2ef3, 0x3b91, 0x3b07, 0x33ac, 0x379d, 0x36a6, 0x3418, 0x366f, 0x3a14, 0x3544 + }; /* Header file generated by RedMulE Golden Model */ + uint16_t x_inp [1024] = { + 0x3807, 0x3a98, 0x3244, 0x3913, 0x3944, 0x3627, 0x3971, 0x3aa4, 0x3562, 0x3a1e, 0x2ef7, 0x3641, 0x332e, 0x3bdf, 0x3a94, 0x3685, 0x38a5, 0x31ad, 0x2842, 0x37fb, 0x3019, 0x3801, 0x2ccd, 0x388e, 0x38af, 0x3437, 0x2ce3, 0x36f8, 0x38ab, 0x2dad, 0x391f, 0x3401, + 0x3921, 0x39f6, 0x3bd9, 0x390f, 0x27d4, 0x3ae8, 0x3995, 0x38b7, 0x330a, 0x389d, 0x326d, 0x3754, 0x368b, 0x3bcd, 0x2824, 0x3978, 0x348b, 0x3ba5, 0x2c9e, 0x3baa, 0x3799, 0x33d2, 0x38bb, 0x3890, 0x2d36, 0x3086, 0x398f, 0x3587, 0x34ff, 0x3887, 0x31d3, 0x3ace, + 0x26ad, 0x3409, 0x30e1, 0x3a37, 0x3b31, 0x3bc5, 0x3945, 0x362e, 0x39a6, 0x3820, 0x3378, 0x3791, 0x3276, 0x3345, 0x34dc, 0x3254, 0x38fd, 0x3910, 0x2b99, 0x3729, 0x3696, 0x387b, 0x38c0, 0x37a5, 0x3a4b, 0x3a93, 0x3a56, 0x393e, 0x2e45, 0x3805, 0x3b80, 0x3795, + 0x3637, 0x38be, 0x3b6c, 0x3907, 0x3bba, 0x3919, 0x3857, 0x2cbf, 0x3bae, 0x37fb, 0x38f4, 0x3143, 0x381b, 0x31b9, 0x24ae, 0x3a10, 0x3710, 0x3861, 0x2f44, 0x3759, 0x3321, 0x3b8e, 0x3320, 0x344c, 0x3b0c, 0x39fa, 0x3a84, 0x3455, 0x3aee, 0x377e, 0x3bdd, 0x3792, + 0x3a22, 0x2ee2, 0x2fc6, 0x3b64, 0x385c, 0x3193, 0x38f0, 0x37d9, 0x37e5, 0x3870, 0x31aa, 0x3513, 0x38a7, 0x27e6, 0x3b6d, 0x3911, 0x3ab8, 0x3806, 0x3337, 0x30a7, 0x37ce, 0x3872, 0x3b66, 0x38cb, 0x2ca7, 0x3031, 0x3a71, 0x3b22, 0x3810, 0x3b5e, 0x3546, 0x3af9, + 0x27b0, 0x35d7, 0x38a7, 0x3744, 0x3b7e, 0x3590, 0x36a0, 0x32a4, 0x3b94, 0x3928, 0x3ab4, 0x3173, 0x3ade, 0x2984, 0x376a, 0x3ada, 0x3a64, 0x3821, 0x3b7e, 0x2c87, 0x39d1, 0x360f, 0x3908, 0x3896, 0x3abd, 0x38bd, 0x3a70, 0x37d5, 0x3a26, 0x351c, 0x3853, 0x33a3, + 0x3a6d, 0x37df, 0x355b, 0x3324, 0x3a5a, 0x357e, 0x37cc, 0x2bfa, 0x361c, 0x29c0, 0x390c, 0x3735, 0x3b31, 0x294e, 0x3a20, 0x388c, 0x3ad6, 0x3ba0, 0x3551, 0x34be, 0x34e4, 0x3adb, 0x30e7, 0x3aeb, 0x3a51, 0x36be, 0x33d1, 0x3302, 0x371c, 0x2fd2, 0x3b35, 0x3bbb, + 0x3acb, 0x2e24, 0x38db, 0x396b, 0x38b2, 0x34e9, 0x2dda, 0x39ae, 0x351d, 0x39cc, 0x378c, 0x3706, 0x3846, 0x30b7, 0x2c2b, 0x3241, 0x38db, 0x3191, 0x2d7a, 0x38fe, 0x39b6, 0x35ea, 0x3420, 0x3a49, 0x3277, 0x38e9, 0x379b, 0x3466, 0x29a1, 0x3af0, 0x3ab3, 0x390f, + 0x3590, 0x37a9, 0x2d02, 0x3921, 0x30c7, 0x3360, 0x3b6c, 0x38e4, 0x3912, 0x3739, 0x30ea, 0x340a, 0x3b56, 0x38a7, 0x3147, 0x35fc, 0x3a02, 0x359c, 0x38c0, 0x390c, 0x332d, 0x3b2e, 0x3ba2, 0x302d, 0x290e, 0x3a16, 0x3851, 0x3823, 0x3864, 0x38f6, 0x3843, 0x3619, + 0x3ac1, 0x35b7, 0x3571, 0x3bfd, 0x2de9, 0x3a6a, 0x393b, 0x2c3e, 0x3854, 0x3488, 0x25ef, 0x1cb6, 0x2cb6, 0x2e9a, 0x3865, 0x3993, 0x39fe, 0x3163, 0x3846, 0x3953, 0x38f3, 0x33ea, 0x35ac, 0x33b2, 0x3206, 0x3a99, 0x37d7, 0x3561, 0x3565, 0x33fe, 0x3b49, 0x3350, + 0x3871, 0x36ed, 0x3093, 0x3a45, 0x39d5, 0x3ae7, 0x35a0, 0x39d4, 0x38b6, 0x2c88, 0x344e, 0x3a66, 0x349d, 0x3a1c, 0x38bc, 0x3a5b, 0x362e, 0x38c8, 0x3ba5, 0x3130, 0x296c, 0x3a6e, 0x3bb3, 0x3ae3, 0x3bda, 0x1e13, 0x38aa, 0x300b, 0x3830, 0x39b2, 0x249b, 0x384d, + 0x390e, 0x3930, 0x3b19, 0x3b6e, 0x34f0, 0x3a6a, 0x3559, 0x37cd, 0x38f4, 0x3933, 0x3a5e, 0x3504, 0x35ea, 0x3991, 0x37b7, 0x3098, 0x3464, 0x3036, 0x3b8e, 0x355e, 0x2a4b, 0x3029, 0x3905, 0x3854, 0x3856, 0x351d, 0x34e3, 0x2fa3, 0x2e34, 0x2cda, 0x33cd, 0x3878, + 0x3578, 0x39ad, 0x3b43, 0x35ce, 0x3534, 0x3692, 0x391b, 0x35c9, 0x3121, 0x3087, 0x3397, 0x3911, 0x3be2, 0x3a35, 0x33c0, 0x2f6c, 0x2669, 0x19d9, 0x33fb, 0x3aae, 0x3947, 0x36a5, 0x3631, 0x321b, 0x30a7, 0x392c, 0x3471, 0x382c, 0x3107, 0x34fe, 0x3693, 0x34eb, + 0x297b, 0x3ba2, 0x3950, 0x304e, 0x31c8, 0x32bf, 0x2c72, 0x33b5, 0x32c3, 0x2d9e, 0x3ba4, 0x2e74, 0x397d, 0x3865, 0x3841, 0x3aaf, 0x300a, 0x3aa5, 0x3758, 0x3a35, 0x3905, 0x389b, 0x3b6d, 0x2ed1, 0x3934, 0x3ba3, 0x3819, 0x3b71, 0x32d0, 0x2da0, 0x39e2, 0x39df, + 0x24a2, 0x38d5, 0x3491, 0x2bfb, 0x34e8, 0x3910, 0x368a, 0x39c1, 0x30e2, 0x3807, 0x35ca, 0x1984, 0x35a1, 0x3b4e, 0x37b0, 0x30a2, 0x357e, 0x3460, 0x2e8c, 0x386e, 0x3af0, 0x398c, 0x399a, 0x3a4d, 0x368f, 0xf02, 0x3a56, 0x3b02, 0x39e7, 0x3a94, 0x39dd, 0x2b38, + 0x3a70, 0x3a20, 0x3a1d, 0x3619, 0x3428, 0x3775, 0x3bb8, 0x34b5, 0x304c, 0x396b, 0x3acf, 0x2c1a, 0x35ef, 0x3698, 0x3bb7, 0x38db, 0x21ba, 0x32c2, 0x3547, 0x3001, 0x3a32, 0x36df, 0x399b, 0x3698, 0x398b, 0x2ac1, 0x2e5a, 0x3846, 0x3aa7, 0x27be, 0x3b24, 0x3385, + 0x2e7c, 0x389b, 0x36a7, 0x3995, 0x3882, 0x3ba7, 0x31cf, 0x39d5, 0x3910, 0x3a4d, 0x3516, 0x38cb, 0x3a1d, 0x2f77, 0x31be, 0x3be8, 0x3b52, 0x38c3, 0x396b, 0x3b6d, 0x3a8e, 0x34e2, 0x38a3, 0x3bc2, 0x3a05, 0x38ee, 0x2d99, 0x3ab7, 0x3a23, 0x231b, 0x35f4, 0x3314, + 0x3687, 0x3152, 0x2c5d, 0x358a, 0x2c66, 0x37d7, 0x3408, 0x394c, 0x3782, 0x390d, 0x2f1b, 0x39f1, 0x3bf3, 0x3ada, 0x2659, 0x363a, 0x3b69, 0x38fb, 0x3b4c, 0x2ee8, 0x381d, 0x3955, 0x2d26, 0x3599, 0x2c73, 0x306b, 0x3999, 0x39a5, 0x34b4, 0x34f1, 0x3195, 0x3a25, + 0x36dd, 0x36b7, 0x359c, 0x3a96, 0x3574, 0x3571, 0x303e, 0x377b, 0x3af2, 0x370d, 0x3601, 0x39ed, 0x3aca, 0x2ef0, 0x2d82, 0x34fa, 0x3b40, 0x2127, 0x3bc3, 0x3abc, 0x3a31, 0x3999, 0x2f9f, 0x3a3f, 0x2d3f, 0x3325, 0x3789, 0x3a6a, 0x38fa, 0x37f6, 0x3987, 0x39d0, + 0x34ce, 0x3b36, 0x38df, 0x3987, 0x3895, 0x3adf, 0x378d, 0x3871, 0x39a4, 0x3516, 0x3403, 0x393d, 0x337e, 0x3b42, 0x343b, 0x3719, 0x3b43, 0x2c12, 0x3809, 0x3bdc, 0x380c, 0x22d6, 0x3ae4, 0x3ae2, 0x39ba, 0x3a08, 0x335c, 0x3659, 0x30d5, 0x33ec, 0x39d3, 0x3477, + 0x3736, 0x3bed, 0x3a0b, 0x331e, 0x3543, 0x3b15, 0x3a3e, 0x3628, 0x3965, 0x2e44, 0x35b7, 0x3974, 0x2736, 0xdc3, 0x309b, 0x3869, 0x310a, 0x384d, 0x3be8, 0x3a32, 0x286d, 0x3874, 0x38b2, 0x298b, 0x3bd7, 0x1c1d, 0x3b21, 0x1d82, 0x3a61, 0x3644, 0x3bd2, 0x394e, + 0x3345, 0x302f, 0x38d1, 0x21a9, 0x36b6, 0x3222, 0x2812, 0x3516, 0x290f, 0x3876, 0x2e57, 0x3499, 0x3083, 0x3182, 0x36cd, 0x30b9, 0x3bb0, 0x34e1, 0x38be, 0x3b15, 0x301d, 0x3be8, 0x3b3f, 0x37eb, 0x3a71, 0x386b, 0x39b1, 0x3832, 0x3a58, 0x3b82, 0x3a44, 0x3bce, + 0x38b5, 0x3b1b, 0x3874, 0x2a51, 0x39a4, 0x34de, 0x27b0, 0x3a89, 0x38bb, 0x3b01, 0x394b, 0x3496, 0x36ac, 0x39c2, 0x3465, 0x30b3, 0x34cb, 0x395e, 0x3b8e, 0x3633, 0x359a, 0x2cdd, 0x3bb7, 0x2ee6, 0x3756, 0x396e, 0x3a4b, 0x3166, 0x389e, 0x38d9, 0x389b, 0x3b25, + 0x322f, 0x298f, 0x34f3, 0x3841, 0x34a6, 0x39bf, 0x2c87, 0x32ec, 0x2ced, 0x32bc, 0x357c, 0x3247, 0x3b9a, 0x32e9, 0x1c09, 0x3998, 0x34f6, 0x2df1, 0x3a80, 0x38ab, 0x3806, 0x3b82, 0x3b81, 0x3aef, 0x3775, 0x309d, 0x32f6, 0x39ab, 0x3704, 0x3b16, 0x37a3, 0x3882, + 0x39fe, 0x3410, 0x2604, 0x3acc, 0x3b39, 0x3b30, 0x36c6, 0x34a9, 0x3648, 0x1e60, 0x34a5, 0x3b94, 0x30dc, 0x3a09, 0x38ca, 0x34a9, 0x3a7d, 0x3947, 0x3b94, 0x3920, 0x31f5, 0x3af9, 0x310e, 0x33a2, 0x30b9, 0x3b30, 0x3934, 0x32bb, 0x358a, 0x3065, 0x3486, 0x3aca, + 0x334b, 0x3532, 0x3a7b, 0x3b3e, 0x3612, 0x3b1c, 0x39a3, 0x3848, 0x3a14, 0x3810, 0x32e6, 0x27a1, 0x3bfb, 0x3287, 0x2a6f, 0x38da, 0x350f, 0x313a, 0x3afb, 0x3b79, 0x38da, 0x3bf2, 0x376a, 0x3a67, 0x3582, 0x38d7, 0x3a7f, 0x352e, 0x38cb, 0x3a9a, 0x3206, 0x3bab, + 0x3bf4, 0x2856, 0x3529, 0x3b86, 0x3a1b, 0x3532, 0x3b00, 0x3082, 0x3aa9, 0x34ed, 0x2a27, 0x2ffb, 0x370b, 0x3418, 0x3b5a, 0x32c2, 0x3443, 0x3401, 0x3bf4, 0x31cf, 0x344c, 0x3222, 0x30be, 0x3996, 0x3073, 0x26e2, 0x3adb, 0x2b54, 0x33c5, 0x3579, 0x3185, 0x3610, + 0x3481, 0x2b07, 0x3419, 0x380b, 0x3318, 0x3a0f, 0x3bfa, 0x38f5, 0x3a04, 0x3432, 0x3a0f, 0x3323, 0x2ac5, 0x349c, 0x3108, 0x37c5, 0x3534, 0x370a, 0x355e, 0x38b0, 0x3a2f, 0x2c6c, 0x3a3f, 0x2d8c, 0x3823, 0x395e, 0x3898, 0x36de, 0x317f, 0x2ad7, 0x3758, 0x3be4, + 0x36e4, 0x3800, 0x376e, 0x3308, 0x35d6, 0x3443, 0x3bea, 0x33a3, 0x3b3d, 0x39ba, 0x3661, 0x399b, 0x3b32, 0x3179, 0x363c, 0x3a28, 0x2da6, 0x378a, 0x3907, 0x3bb9, 0x3bf4, 0x3a6e, 0x2ca0, 0x398a, 0x3470, 0x37fc, 0x39b2, 0x3855, 0x39e4, 0x3898, 0x32ac, 0x37e6, + 0x3baa, 0x3a10, 0x38b2, 0x39c0, 0x39d6, 0x39d0, 0x3bb3, 0x3922, 0x3a72, 0x382e, 0x36ca, 0x3a89, 0x3736, 0x3698, 0x37d4, 0x3894, 0x38fe, 0x37a9, 0x3916, 0x3923, 0x3ae1, 0x384d, 0x3bf3, 0x3be2, 0x387c, 0x38f1, 0x303f, 0x2d34, 0x3902, 0x3b26, 0x39b0, 0x3aee, + 0x319a, 0x3b83, 0x3054, 0x36fa, 0x3748, 0x3273, 0x38df, 0x3850, 0x33e9, 0x2cb5, 0x3a76, 0x375e, 0x3b74, 0x3457, 0x31ca, 0x34f4, 0x351a, 0x3811, 0x3ae3, 0x34ea, 0x3961, 0x323e, 0x361a, 0x3042, 0x39cc, 0x2118, 0x3026, 0x3bc8, 0x2aa4, 0x37ac, 0x3af3, 0x3782, + 0x3333, 0x32b7, 0x3753, 0x35f4, 0x2cae, 0x3215, 0x3913, 0x3665, 0x34d8, 0x3b62, 0x3be1, 0x38da, 0x3a12, 0x3605, 0x369c, 0x390c, 0x2ec0, 0x390b, 0x2e08, 0x3981, 0x2824, 0x31b1, 0x3b5f, 0x3960, 0x38f3, 0x3abb, 0x3b22, 0x3299, 0x301e, 0x2e72, 0x3a41, 0x370c + }; /* Header file generated by RedMulE Golden Model */ + uint16_t y_inp [1024] = { + 0x3941, 0x39e2, 0x2d14, 0x39a6, 0x3502, 0x3a65, 0x37a8, 0x3848, 0x3a44, 0x3a93, 0x38fd, 0x39e6, 0x38e0, 0x3366, 0x3989, 0x3622, 0x3a7f, 0x385e, 0x3619, 0x3bfb, 0x3af5, 0x3a03, 0x2cc8, 0x38e4, 0x3542, 0x30f5, 0x3b13, 0x3a26, 0x33be, 0x399f, 0x3b50, 0x362b, + 0x3beb, 0x3831, 0x3148, 0x3b24, 0x3a82, 0x3b59, 0x3b3a, 0x38e3, 0x2cae, 0x1165, 0x38c4, 0x34df, 0x39f8, 0x3745, 0x39ea, 0x3868, 0x35b3, 0x3879, 0x308f, 0x377e, 0x3b07, 0x3983, 0x34d4, 0x3b13, 0x3a52, 0x382e, 0x2ff2, 0x31eb, 0x34af, 0x37d1, 0x3553, 0x39fd, + 0x3b7e, 0x26c9, 0x30fe, 0x3b9e, 0x34fc, 0x3b5e, 0x39bf, 0x3abb, 0x39f9, 0x3b9b, 0x3813, 0x33a5, 0x3983, 0x3aaa, 0x10e8, 0x2bb8, 0x389d, 0x3575, 0x3769, 0x3ad6, 0x2cae, 0x38be, 0x38d3, 0x3357, 0x3136, 0x3950, 0x3870, 0x346a, 0x3814, 0x382f, 0x3aeb, 0x3435, + 0x3815, 0x36bf, 0x37a1, 0x2f5d, 0x3407, 0x3747, 0x3ab6, 0x3932, 0x370a, 0x2fa8, 0x181c, 0x3571, 0x34b6, 0x3724, 0x383d, 0x3950, 0x3962, 0x38b8, 0x3ac6, 0x3bb7, 0x3621, 0x31ae, 0x3bcc, 0x3665, 0x33c8, 0x384d, 0x3bf3, 0x36f3, 0x3a95, 0x3a3d, 0x348f, 0x36ee, + 0x32d6, 0x315b, 0x3840, 0x3b08, 0x3577, 0x3b93, 0x38c3, 0x396e, 0x38c8, 0x3655, 0x3933, 0x3a88, 0x3b6f, 0x35ad, 0x3bae, 0x388d, 0x3a73, 0x3a4b, 0x2fd2, 0x385c, 0x3149, 0x3a77, 0x3866, 0x3406, 0x379a, 0x345a, 0x386b, 0x398a, 0x37c0, 0x388f, 0x3b5b, 0x3af4, + 0x372c, 0x34d3, 0x35b8, 0x3b73, 0x3671, 0x3a69, 0x37e8, 0x2d1d, 0x19fd, 0x3b89, 0x3530, 0x34fd, 0x3aad, 0x24a0, 0x3917, 0x26d4, 0x3b8e, 0x315c, 0x385a, 0x2adc, 0x2d74, 0x37ac, 0x3b83, 0x2fde, 0x3248, 0x2c9a, 0x37b2, 0x3223, 0x3b44, 0x2a05, 0x39c8, 0x3382, + 0x38ce, 0x3904, 0x387e, 0x260c, 0x39c0, 0x3918, 0x389a, 0x38ee, 0x3b93, 0x3a7f, 0x3256, 0x3a81, 0x39ce, 0x349d, 0x2d52, 0x34df, 0x3247, 0x3bb1, 0x38af, 0x383a, 0x3792, 0x2c04, 0x3a5f, 0x3b87, 0x3884, 0x388d, 0x2e1a, 0x33ef, 0x3bb5, 0x3c00, 0x2f22, 0x2c22, + 0x3905, 0x2f44, 0x3387, 0x360c, 0x3950, 0x3762, 0x2fec, 0x3533, 0x39bf, 0x2a47, 0x3ae3, 0x3816, 0x386d, 0x39a9, 0x3b03, 0x380c, 0x39d2, 0x3a52, 0x34d9, 0x2e73, 0x38c6, 0x3aa3, 0x3b64, 0x30df, 0x2b24, 0x39d8, 0x350c, 0x38b9, 0x25ca, 0x38a0, 0x3257, 0x3575, + 0x3610, 0x3868, 0x3491, 0x38b8, 0x2c96, 0x3b25, 0x1dc0, 0x30da, 0x3514, 0x3ad4, 0x325c, 0x3bb4, 0x39d5, 0x3be2, 0x3a60, 0x3476, 0x39a2, 0x3533, 0x374a, 0x39ed, 0x38eb, 0x3126, 0x3822, 0x383a, 0x3b46, 0x2fdc, 0x3a05, 0x378c, 0x2bc5, 0x36a5, 0x37d9, 0x3597, + 0x3a15, 0x3586, 0x3841, 0x355a, 0x3bf2, 0x394b, 0x3529, 0x3a93, 0x30d2, 0x3743, 0x348b, 0x384b, 0x382b, 0x3976, 0x2073, 0x355d, 0x350b, 0x3970, 0x2e3b, 0x3588, 0x32b2, 0x3836, 0x3b42, 0x34ba, 0x3b9d, 0x3b8a, 0x37e5, 0x3abf, 0x3b90, 0x380e, 0x2f83, 0x38db, + 0x310e, 0x3805, 0x3848, 0x38c2, 0x3ab6, 0x39aa, 0x392e, 0x3904, 0x383d, 0x2cba, 0x32b9, 0x312c, 0x38c4, 0x362e, 0x30cc, 0x39e5, 0x2a19, 0x3371, 0x3ae7, 0x2e62, 0x3130, 0x3837, 0x3aaa, 0x2ba7, 0x2db8, 0x38f1, 0x3a8b, 0x25ce, 0x2bdc, 0x362c, 0x3b4e, 0x356d, + 0x3760, 0x3495, 0x393e, 0x39cd, 0x2d92, 0x2d19, 0x3bad, 0x33b0, 0x2dc2, 0x34f6, 0x29d6, 0x3953, 0x3380, 0x378e, 0x2d65, 0x2ee7, 0x2dae, 0x35f7, 0x2f11, 0x34ea, 0x3198, 0x2e1b, 0x2a9d, 0x3979, 0x30dc, 0x3abe, 0x36fa, 0x3685, 0x38d3, 0x31be, 0x3229, 0x3a31, + 0x39ae, 0x3a1f, 0x3712, 0x3864, 0x361e, 0x3b3c, 0x3998, 0x39a2, 0x39ba, 0x3058, 0x3beb, 0x35c5, 0x3813, 0x3995, 0x3a56, 0x387f, 0x3b9e, 0x3116, 0x3a9c, 0x3a67, 0x3950, 0x39c1, 0x30fd, 0x3915, 0x3358, 0x3aa8, 0x3b0c, 0x39d8, 0x38f0, 0x31c0, 0x3855, 0x3aca, + 0x3549, 0x392c, 0x3bc4, 0x36cb, 0x3bb3, 0x33a4, 0x3946, 0x35d9, 0x391a, 0x391b, 0x34a5, 0x3245, 0x396b, 0x3994, 0x3a74, 0x3285, 0x37d8, 0x3aa8, 0x39bc, 0x3883, 0x3292, 0x3ab6, 0x3903, 0x3624, 0x3b03, 0x3869, 0x38bd, 0x38b5, 0x38b8, 0x3986, 0x387c, 0x3736, + 0x3949, 0x3aa4, 0x3411, 0x37ea, 0x36d5, 0x3a83, 0x39f3, 0x3992, 0x3a28, 0x3be9, 0x3391, 0x3606, 0x3acd, 0x315e, 0x2834, 0x3b6f, 0x39d0, 0x3b0f, 0x3b8e, 0x23b0, 0x3565, 0x3970, 0x3b44, 0x390c, 0x3456, 0x3744, 0x3516, 0x3702, 0x3b66, 0x2747, 0x20b3, 0x3a98, + 0x34ce, 0x3752, 0x3abd, 0x3055, 0x3aa8, 0x38c0, 0x363d, 0x3b23, 0x3a86, 0x39dd, 0x3844, 0x3703, 0x2d2f, 0x3b21, 0x38fb, 0x3330, 0x3a1b, 0x3ba2, 0x362b, 0x3b15, 0x3856, 0x362e, 0x372d, 0x2d26, 0x3950, 0x321d, 0x3b09, 0x316e, 0x25ab, 0x3b5a, 0x39b6, 0x34ca, + 0x36b5, 0x3809, 0x38d4, 0x386b, 0x3392, 0x335b, 0x399f, 0x218d, 0x390e, 0x380d, 0x3ac1, 0x31d4, 0x3b8c, 0x39ac, 0x3993, 0x38bd, 0x3943, 0x3308, 0x390f, 0x383b, 0x388a, 0x3b23, 0x3725, 0x3b6a, 0x3a59, 0x38cd, 0x33d2, 0x36d1, 0x3636, 0x399c, 0x3977, 0x3b15, + 0x3813, 0x38ed, 0x3991, 0x3167, 0x250c, 0x2588, 0x3b08, 0x3992, 0x2cee, 0x3a34, 0x312e, 0x398a, 0x3775, 0x3ab5, 0x38a1, 0x2ede, 0x3b74, 0x37df, 0x35f7, 0x2f55, 0x2df4, 0x39d0, 0x2c4d, 0x39ec, 0x3b5d, 0x283b, 0x2dda, 0x3926, 0x343b, 0x3405, 0x32e1, 0x39e2, + 0x39ba, 0x3546, 0x3ab5, 0x3193, 0x3b9d, 0x3089, 0x3902, 0x3966, 0x3578, 0x3985, 0x38dd, 0x24c7, 0x3529, 0x2d88, 0x3b26, 0x3a72, 0x39fb, 0x3a7d, 0x3985, 0x3b5c, 0x380e, 0x389b, 0x224b, 0x3084, 0x376b, 0x3af6, 0x2aa4, 0x3009, 0x3b1f, 0x300a, 0x34d5, 0x3654, + 0x3790, 0x3811, 0x3107, 0x3abe, 0x3b29, 0x39bd, 0x372f, 0x3bdb, 0x3748, 0x319f, 0x3640, 0x371c, 0x38cc, 0x35ed, 0x36ba, 0x39c6, 0x34f7, 0x3b9b, 0x2d4b, 0x3804, 0x3132, 0x363a, 0x3646, 0x3b99, 0x32af, 0x37e7, 0x3ba6, 0x3a79, 0x3b4f, 0x3b80, 0x24ed, 0x35a3, + 0x34de, 0x3b9e, 0x3ae7, 0x3b40, 0x3732, 0x20c2, 0x3a31, 0x394a, 0x3a5f, 0x3b16, 0x36aa, 0x2da5, 0x2f60, 0x3aa3, 0x3bbc, 0x39cc, 0x392f, 0x38d1, 0x397f, 0x31d1, 0x2e65, 0x344f, 0x18e9, 0x3686, 0x32f8, 0x378a, 0x3a8b, 0x3bf6, 0x375f, 0x3952, 0x3958, 0x2c36, + 0x3185, 0x39e5, 0x2f46, 0x3b81, 0x2de9, 0x3672, 0x31f9, 0x39d9, 0x3847, 0x3586, 0x2bf8, 0x3b18, 0x3970, 0x36b8, 0x3790, 0x34d0, 0x393f, 0x3469, 0x340c, 0x3b3c, 0x3653, 0x3aed, 0x3a15, 0x3973, 0x25d5, 0x35aa, 0x3b6a, 0x343b, 0x3770, 0x3664, 0x39f3, 0x3bef, + 0x31bc, 0x39ee, 0x2bff, 0x36a7, 0x3ad0, 0x374a, 0x309b, 0x3b9c, 0x383d, 0x3b45, 0x2389, 0x3ba4, 0x3765, 0x344a, 0x38db, 0x310c, 0x3009, 0x3407, 0x3bed, 0x3898, 0x3b81, 0x3bf2, 0x37d4, 0x3b37, 0x3465, 0x332d, 0x298e, 0x3943, 0x3bda, 0x3991, 0x36e8, 0x3223, + 0x3ab7, 0x36ae, 0x3ad4, 0x309f, 0x2465, 0x3228, 0x385f, 0x3948, 0x2dd8, 0x3a5c, 0x34fe, 0x3339, 0x2e2b, 0x3b26, 0x3188, 0x2e6e, 0x36a9, 0x32ab, 0x375b, 0x3ad3, 0x3bcf, 0x391c, 0x2e7e, 0x2439, 0x389d, 0x391b, 0x3855, 0x34a9, 0x3854, 0x34c2, 0x311c, 0x3416, + 0x3ad7, 0x38a9, 0x3391, 0x1989, 0x348d, 0x30d1, 0x3827, 0x3b7b, 0x3bae, 0x3bcd, 0x3bcf, 0x25e0, 0x3b6e, 0x3b09, 0x38d6, 0x2b31, 0x36fc, 0x3186, 0x3a1e, 0x39d7, 0x384f, 0x3959, 0x386f, 0x349d, 0x1ed0, 0x28db, 0x393e, 0x3946, 0x3a11, 0x2d72, 0x30cb, 0x3b1b, + 0x3bf9, 0x396d, 0x3946, 0x3946, 0x3b30, 0x3b8e, 0x3af9, 0x32c0, 0x3aa9, 0x35fd, 0x24d2, 0x32e1, 0x3785, 0x364e, 0x35bf, 0x3035, 0x35d2, 0x34a7, 0x3530, 0x3be8, 0x385e, 0x397a, 0x399e, 0x379e, 0x3957, 0x387f, 0x3b2d, 0x3a4d, 0x36b9, 0x39fd, 0x34c4, 0x2776, + 0x37ab, 0x35e9, 0x3b6b, 0x318f, 0x22a2, 0x381a, 0x3534, 0x31c7, 0x30f2, 0x3491, 0x351f, 0x392e, 0x3a93, 0x3859, 0x27fd, 0x3882, 0x3539, 0x3101, 0x35c6, 0x3bb2, 0x36de, 0x3586, 0x34ba, 0x2b1a, 0x39e1, 0x3581, 0x37fb, 0x388c, 0x3b10, 0x394f, 0x36cd, 0x3410, + 0x333b, 0x381e, 0x3987, 0x3a0d, 0x3441, 0x3a8f, 0x39aa, 0x366e, 0x3260, 0x3297, 0x36ac, 0x38a9, 0x3b1b, 0x3779, 0x2e12, 0x3b2e, 0x399d, 0x3a87, 0x1f2b, 0x3841, 0x28cc, 0x3a9a, 0x3828, 0x389c, 0x3aff, 0x35ab, 0x3816, 0x3b48, 0x31d2, 0x3ab7, 0x37c1, 0x3ba8, + 0x39e3, 0x37cc, 0x3286, 0x3926, 0x244f, 0x33dc, 0x3079, 0x2a3a, 0x333c, 0x309d, 0x3723, 0x38bf, 0x31dc, 0x3bed, 0x33e5, 0x343c, 0x309b, 0x398b, 0x3bd3, 0x3a8d, 0x293a, 0x3bc7, 0x3a1e, 0x3498, 0x38dd, 0x37d2, 0x29b3, 0x386b, 0x3ba2, 0x3532, 0x29cd, 0x2392, + 0x35ef, 0x37fb, 0x31cd, 0x33be, 0x1d1c, 0x3679, 0x37bb, 0x3ac4, 0x2413, 0x383b, 0x369f, 0x3bb3, 0x39fb, 0x35c1, 0x3817, 0x36b8, 0x3be3, 0x3a3a, 0x3b80, 0x3bfc, 0x39b9, 0x3503, 0x383d, 0x38b7, 0x3a73, 0x3630, 0x38e1, 0x3513, 0x38af, 0x36d1, 0x3b62, 0x2c02, + 0x3aed, 0x32ac, 0x342a, 0x35cb, 0x351b, 0x305b, 0x3aae, 0x3680, 0x3a4b, 0x2fb3, 0x3564, 0x38a4, 0x39b5, 0x20c1, 0x3919, 0x39c0, 0x350d, 0x3106, 0x3a56, 0x35ca, 0x3328, 0x3b92, 0x36a5, 0x3a7d, 0x36d3, 0x341e, 0x3803, 0x2c17, 0x395d, 0x3761, 0x3a30, 0x3b96, + 0x34c1, 0x34cb, 0x3b84, 0x35fc, 0x2e6c, 0x2e81, 0x3766, 0x3837, 0x31b9, 0x37fc, 0x3491, 0x37d1, 0x3570, 0x39f5, 0x327e, 0x3bef, 0x3b2e, 0x3b29, 0x3afb, 0x358e, 0x3602, 0x3574, 0x3811, 0x396a, 0x3430, 0x3527, 0x3b24, 0x3656, 0x38a8, 0x2edc, 0x396b, 0x38bc + }; /* Header file generated by RedMulE Golden Model */ + uint16_t z_oup [1024] = { + 0x47e1, 0x476e, 0x47ea, 0x4788, 0x4860, 0x4727, 0x46be, 0x47cf, 0x47dc, 0x4857, 0x47a8, 0x4791, 0x459c, 0x4564, 0x4743, 0x468a, 0x4840, 0x4726, 0x47b7, 0x483a, 0x47e6, 0x4787, 0x4688, 0x4784, 0x46a2, 0x4766, 0x47f8, 0x4905, 0x4788, 0x484d, 0x483e, 0x4812, + 0x482c, 0x486d, 0x47d0, 0x48a6, 0x494d, 0x481f, 0x47b6, 0x4890, 0x4757, 0x4802, 0x4872, 0x47f7, 0x4691, 0x47b5, 0x4856, 0x4803, 0x486e, 0x483c, 0x486b, 0x4807, 0x4880, 0x4771, 0x479b, 0x48ef, 0x47a5, 0x481f, 0x47fe, 0x4887, 0x48cf, 0x48d7, 0x4878, 0x486c, + 0x483f, 0x479a, 0x4797, 0x4883, 0x48cc, 0x4849, 0x46e0, 0x4863, 0x4866, 0x48d4, 0x47b0, 0x4829, 0x4646, 0x4820, 0x477f, 0x46b3, 0x48d1, 0x47c5, 0x4835, 0x4852, 0x471e, 0x4700, 0x46c7, 0x47df, 0x4695, 0x48d4, 0x484f, 0x487d, 0x48ab, 0x4818, 0x48d4, 0x476c, + 0x4861, 0x4854, 0x48a2, 0x4842, 0x48ea, 0x490a, 0x481d, 0x4851, 0x48c4, 0x4883, 0x4825, 0x4864, 0x46d1, 0x4871, 0x4839, 0x480e, 0x493b, 0x4832, 0x4919, 0x482f, 0x482b, 0x4757, 0x46f6, 0x4889, 0x4721, 0x48d2, 0x4885, 0x4928, 0x48f6, 0x493f, 0x48d0, 0x4803, + 0x4786, 0x46c1, 0x4802, 0x48ec, 0x4974, 0x487f, 0x4742, 0x48e1, 0x480f, 0x4891, 0x4863, 0x4828, 0x46ae, 0x4815, 0x4834, 0x486d, 0x4875, 0x4816, 0x47c8, 0x4810, 0x4795, 0x4867, 0x46a9, 0x4811, 0x4761, 0x4820, 0x4835, 0x4906, 0x48df, 0x4883, 0x48cb, 0x4886, + 0x482c, 0x481a, 0x48b6, 0x487c, 0x495b, 0x48f5, 0x47be, 0x481c, 0x484b, 0x496b, 0x4856, 0x4882, 0x474d, 0x4847, 0x482f, 0x481e, 0x4920, 0x484d, 0x48b7, 0x4768, 0x47f9, 0x4810, 0x480d, 0x486b, 0x46b3, 0x4897, 0x485a, 0x4959, 0x490c, 0x48a7, 0x489c, 0x4843, + 0x47f5, 0x4794, 0x4804, 0x480d, 0x496a, 0x4890, 0x47bf, 0x4867, 0x48e3, 0x488f, 0x485a, 0x4858, 0x461c, 0x477a, 0x470c, 0x479a, 0x4848, 0x4864, 0x4869, 0x47b7, 0x47c7, 0x46ec, 0x46de, 0x486f, 0x47f4, 0x483f, 0x483f, 0x490b, 0x48e4, 0x48fb, 0x48c4, 0x481c, + 0x4678, 0x46a4, 0x46fb, 0x47bf, 0x48be, 0x470e, 0x463e, 0x4822, 0x4809, 0x4789, 0x4808, 0x470d, 0x4566, 0x477f, 0x485d, 0x47f6, 0x47d4, 0x47c9, 0x480b, 0x475b, 0x473e, 0x46f1, 0x46b0, 0x469a, 0x46ad, 0x4850, 0x4778, 0x4880, 0x4824, 0x482c, 0x47dd, 0x46c7, + 0x4805, 0x4761, 0x4806, 0x4872, 0x48e8, 0x4821, 0x456e, 0x47af, 0x471a, 0x4864, 0x4792, 0x481c, 0x4606, 0x484b, 0x476d, 0x4765, 0x48af, 0x4721, 0x4891, 0x4844, 0x4819, 0x4732, 0x46ac, 0x4830, 0x4686, 0x4833, 0x47a7, 0x48d7, 0x4846, 0x4878, 0x4883, 0x47ba, + 0x46b1, 0x4619, 0x46a4, 0x4752, 0x482a, 0x4780, 0x4572, 0x4793, 0x4670, 0x47ed, 0x4682, 0x4660, 0x44d2, 0x4720, 0x468f, 0x462c, 0x47ce, 0x4690, 0x47c8, 0x4708, 0x4645, 0x4680, 0x46d4, 0x46a5, 0x45e3, 0x4875, 0x4657, 0x48bd, 0x4854, 0x483d, 0x4701, 0x472c, + 0x4880, 0x486a, 0x4862, 0x485a, 0x49b3, 0x48d3, 0x473e, 0x489c, 0x48bb, 0x4854, 0x47f0, 0x4877, 0x467e, 0x47c3, 0x4803, 0x48af, 0x48a9, 0x4879, 0x4867, 0x47a1, 0x484f, 0x4815, 0x4808, 0x4857, 0x47d6, 0x484e, 0x48d0, 0x48ae, 0x4862, 0x48e3, 0x48bb, 0x48ea, + 0x4717, 0x469d, 0x47e3, 0x477a, 0x4853, 0x46dc, 0x46e1, 0x470a, 0x46c9, 0x485b, 0x4699, 0x4790, 0x4526, 0x46ee, 0x46e3, 0x4708, 0x480b, 0x4727, 0x4818, 0x472d, 0x47de, 0x459d, 0x475d, 0x4837, 0x4648, 0x4816, 0x4715, 0x488f, 0x47b4, 0x4850, 0x4762, 0x47c5, + 0x46b6, 0x46cb, 0x46ea, 0x4786, 0x47e4, 0x4676, 0x4645, 0x4700, 0x46a7, 0x46ba, 0x4711, 0x463a, 0x44be, 0x46e6, 0x46c7, 0x4688, 0x4812, 0x46ea, 0x4852, 0x4739, 0x4751, 0x4649, 0x45fe, 0x47df, 0x455a, 0x47a6, 0x4728, 0x4817, 0x47a0, 0x4771, 0x47fc, 0x46bb, + 0x4700, 0x47ae, 0x480a, 0x4812, 0x4958, 0x47fb, 0x4836, 0x475a, 0x4822, 0x4880, 0x4830, 0x4800, 0x4687, 0x4849, 0x46f0, 0x46a0, 0x48b3, 0x4837, 0x48fd, 0x473e, 0x4806, 0x4774, 0x47b1, 0x48f5, 0x46d2, 0x4844, 0x47e9, 0x4986, 0x48b4, 0x488b, 0x486b, 0x47a5, + 0x4787, 0x47c8, 0x4817, 0x47e7, 0x48b2, 0x47c1, 0x47ad, 0x4832, 0x4776, 0x4875, 0x4771, 0x4809, 0x460a, 0x463d, 0x46ca, 0x47ec, 0x48b5, 0x4853, 0x489a, 0x46e4, 0x4728, 0x4799, 0x47bc, 0x485a, 0x45b1, 0x4823, 0x4802, 0x4878, 0x48dc, 0x4814, 0x473d, 0x483a, + 0x4773, 0x46f1, 0x4843, 0x4772, 0x4904, 0x4820, 0x47ae, 0x4844, 0x481b, 0x48cb, 0x4828, 0x47da, 0x4555, 0x46bb, 0x4732, 0x46ec, 0x484d, 0x47a3, 0x489f, 0x4792, 0x47a9, 0x473a, 0x4767, 0x480c, 0x4666, 0x480c, 0x4806, 0x48ec, 0x4823, 0x4900, 0x480a, 0x484c, + 0x4866, 0x48e6, 0x48a3, 0x48d0, 0x494c, 0x484f, 0x479c, 0x4884, 0x48f7, 0x4914, 0x4898, 0x4861, 0x473a, 0x4880, 0x48d3, 0x48a1, 0x48d3, 0x487d, 0x48be, 0x4888, 0x483f, 0x48b9, 0x4877, 0x48f4, 0x47fd, 0x4923, 0x4871, 0x49e5, 0x4910, 0x493c, 0x4889, 0x48f4, + 0x46cc, 0x47bb, 0x47e2, 0x47a6, 0x48af, 0x45f1, 0x4596, 0x4803, 0x46bc, 0x47f4, 0x46d2, 0x4720, 0x4577, 0x4801, 0x4699, 0x47af, 0x4818, 0x4742, 0x46fa, 0x470a, 0x4654, 0x46c3, 0x4659, 0x4805, 0x469e, 0x4726, 0x4702, 0x4872, 0x482a, 0x4775, 0x478b, 0x4793, + 0x478e, 0x4778, 0x4881, 0x485f, 0x492e, 0x47f5, 0x46ff, 0x4892, 0x4828, 0x488c, 0x4770, 0x4771, 0x45c0, 0x482c, 0x4880, 0x48de, 0x48d3, 0x4871, 0x4895, 0x4859, 0x47d0, 0x481b, 0x4696, 0x4832, 0x470a, 0x48df, 0x47b1, 0x48d0, 0x48ca, 0x48bb, 0x484f, 0x47b9, + 0x4848, 0x483c, 0x484c, 0x4867, 0x491a, 0x483a, 0x4786, 0x486b, 0x483f, 0x48c1, 0x47f2, 0x4869, 0x4616, 0x4758, 0x485e, 0x480e, 0x48f4, 0x48a2, 0x48d7, 0x48c1, 0x4861, 0x47b2, 0x485c, 0x48e4, 0x4721, 0x48e3, 0x4896, 0x4980, 0x48e7, 0x4934, 0x4854, 0x4849, + 0x47ec, 0x4844, 0x484b, 0x47f6, 0x486f, 0x48a4, 0x4754, 0x4815, 0x4856, 0x4883, 0x4785, 0x4831, 0x4660, 0x47a8, 0x4828, 0x47f2, 0x48f3, 0x480d, 0x48c4, 0x46f6, 0x47ca, 0x4688, 0x45dc, 0x48bb, 0x4607, 0x4855, 0x478a, 0x48dd, 0x4831, 0x48d5, 0x487a, 0x4794, + 0x46de, 0x4784, 0x475d, 0x4834, 0x48b9, 0x4803, 0x46c8, 0x480a, 0x4774, 0x4807, 0x478e, 0x4817, 0x4735, 0x47a2, 0x46e6, 0x4750, 0x486b, 0x4792, 0x4873, 0x4808, 0x478c, 0x473d, 0x46aa, 0x48a1, 0x4600, 0x47ca, 0x476e, 0x48b2, 0x48c6, 0x4802, 0x4858, 0x4793, + 0x476e, 0x480a, 0x4838, 0x47d5, 0x4970, 0x4853, 0x46fc, 0x4847, 0x483f, 0x492b, 0x47c3, 0x4897, 0x4731, 0x47b3, 0x4825, 0x47a8, 0x4862, 0x484e, 0x4935, 0x4856, 0x48dd, 0x470e, 0x4829, 0x48e2, 0x4703, 0x484f, 0x47ad, 0x4984, 0x48aa, 0x48b9, 0x4876, 0x4812, + 0x476b, 0x46e6, 0x46e6, 0x47c0, 0x4839, 0x476a, 0x4688, 0x47a7, 0x46de, 0x478a, 0x46b8, 0x4732, 0x448b, 0x47db, 0x46f6, 0x47c2, 0x4817, 0x4706, 0x47c5, 0x4719, 0x4798, 0x46fc, 0x45e3, 0x47af, 0x4605, 0x47cc, 0x47b0, 0x4825, 0x486b, 0x484e, 0x471a, 0x4709, + 0x4825, 0x47da, 0x47bb, 0x47c2, 0x4904, 0x47b6, 0x45da, 0x485c, 0x4891, 0x486e, 0x478b, 0x4755, 0x463f, 0x486d, 0x479a, 0x4788, 0x489e, 0x4856, 0x4867, 0x4868, 0x4800, 0x46dd, 0x474d, 0x4837, 0x46d5, 0x481d, 0x4810, 0x4901, 0x4879, 0x481a, 0x4883, 0x4875, + 0x4899, 0x489b, 0x48c5, 0x4946, 0x49a5, 0x48f8, 0x47b1, 0x4895, 0x48c9, 0x48ae, 0x481c, 0x4845, 0x46ba, 0x48c5, 0x4898, 0x4884, 0x492a, 0x489b, 0x4904, 0x488c, 0x4888, 0x4842, 0x47d6, 0x48e7, 0x47f9, 0x48e1, 0x4897, 0x4935, 0x490d, 0x4988, 0x48da, 0x483f, + 0x46b8, 0x4525, 0x4791, 0x465e, 0x478c, 0x4731, 0x452d, 0x470b, 0x45ba, 0x4734, 0x4632, 0x46a0, 0x44f7, 0x4674, 0x464e, 0x470a, 0x4764, 0x46bc, 0x466a, 0x46db, 0x46ba, 0x45ae, 0x458c, 0x45c6, 0x464f, 0x4685, 0x468e, 0x4770, 0x4761, 0x480c, 0x4789, 0x4722, + 0x467b, 0x46f4, 0x471d, 0x47d2, 0x4878, 0x4795, 0x45c3, 0x476e, 0x475e, 0x47fb, 0x46da, 0x47be, 0x45bf, 0x469f, 0x4635, 0x46c6, 0x4802, 0x471b, 0x47d4, 0x47ab, 0x460c, 0x4631, 0x4623, 0x4801, 0x462b, 0x4817, 0x4675, 0x4854, 0x4762, 0x47e1, 0x4722, 0x4791, + 0x4879, 0x4869, 0x48ad, 0x48c4, 0x4907, 0x4857, 0x474a, 0x4884, 0x489b, 0x485a, 0x484d, 0x4848, 0x46a6, 0x4893, 0x4833, 0x485e, 0x4892, 0x48f2, 0x491c, 0x481a, 0x4770, 0x4897, 0x4785, 0x4876, 0x475f, 0x489e, 0x4811, 0x48e4, 0x491c, 0x4909, 0x4899, 0x4836, + 0x490d, 0x4901, 0x491c, 0x496c, 0x4a1e, 0x494b, 0x4868, 0x49be, 0x497c, 0x49ea, 0x48fd, 0x4989, 0x47a9, 0x4892, 0x4982, 0x490e, 0x49c2, 0x4985, 0x4a42, 0x499e, 0x496a, 0x48bf, 0x48b6, 0x4938, 0x48fb, 0x49aa, 0x4984, 0x49fd, 0x49c3, 0x4a28, 0x49f9, 0x4984, + 0x46bc, 0x4616, 0x46eb, 0x4748, 0x483e, 0x46a5, 0x4743, 0x47a7, 0x4767, 0x482e, 0x4701, 0x4834, 0x4515, 0x4694, 0x46ec, 0x47cb, 0x4838, 0x468c, 0x4803, 0x471b, 0x46f2, 0x471c, 0x4616, 0x481e, 0x45ef, 0x4812, 0x477c, 0x4874, 0x47f0, 0x47e2, 0x482a, 0x47c1, + 0x4782, 0x4768, 0x47c6, 0x46e7, 0x490d, 0x475e, 0x472b, 0x4806, 0x4727, 0x47ce, 0x4842, 0x47af, 0x466c, 0x472c, 0x46cc, 0x47d4, 0x484b, 0x47fd, 0x4881, 0x46b7, 0x477d, 0x466c, 0x4692, 0x4853, 0x45e2, 0x47d6, 0x47e4, 0x4894, 0x4860, 0x4808, 0x484f, 0x46f4 + }; \ No newline at end of file diff --git a/target/snitch_cluster/sw/apps/redmule/src/redmule.c b/target/snitch_cluster/sw/apps/redmule/src/redmule.c new file mode 100644 index 0000000000..50d5cec74e --- /dev/null +++ b/target/snitch_cluster/sw/apps/redmule/src/redmule.c @@ -0,0 +1,85 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "snrt.h" +#include "data.h" + +#define VERBOSE + +uint16_t *local_x; +uint16_t *local_w; +uint16_t *local_y; + +int main() { + + uint32_t errors = 0; + int offload_id_tmp; + + uint32_t core_idx = snrt_global_core_idx(); + + uint16_t x_size = M_SIZE * N_SIZE * sizeof(uint16_t); + uint16_t w_size = N_SIZE * K_SIZE * sizeof(uint16_t); + uint16_t y_size = M_SIZE * K_SIZE * sizeof(uint16_t); + + // Allocate space in TCDM and copy inputs to TCDM + if (snrt_is_dm_core()) { + local_x = snrt_l1_alloc(x_size); + local_w = snrt_l1_alloc(w_size); + local_y = snrt_l1_alloc(y_size); + snrt_dma_start_1d(local_x, x_inp, x_size); + snrt_dma_start_1d(local_w, w_inp, w_size); + snrt_dma_start_1d(local_y, y_inp, y_size); + snrt_dma_wait_all(); + } + + snrt_cluster_hw_barrier(); + + if (core_idx == 0) { + printf("Starting RedMulE from core %d\n", core_idx); + printf("x dim: %d\n", x_size); + printf("w dim: %d\n", w_size); + printf("y dim: %d\n", y_size); + + printf("local_x: %p\n", local_x); + printf("local_w: %p\n", local_w); + printf("local_y: %p\n", local_y); + + // Enable RedMulE + hwpe_cg_enable(); + + hwpe_soft_clear(); + + while( ( offload_id_tmp = hwpe_acquire_job() ) < 0); + + redmule_cfg ((unsigned int) local_x, + (unsigned int) local_w, + (unsigned int) local_y, + M_SIZE, N_SIZE, K_SIZE, + (uint8_t) GEMM, + (uint8_t) Float16); + // Start RedMulE operation + hwpe_trigger_job(); + } + + snrt_cluster_hw_barrier(); + + if (core_idx == 0) { + printf("Checking RedMulE from core %d\n", core_idx); + + // Disable RedMulE + hwpe_cg_disable(); + + // Check computation is correct + errors = redmule16_compare_int((uint32_t*)local_y, golden, M_SIZE*K_SIZE/2); + + if (errors == 0) + printf("No errors!\n"); + else + printf("Errors: %d\n", errors); + } + + return 0; +}