From 3f319c78e6189f4293a27300013f9cf3d8c641ad Mon Sep 17 00:00:00 2001 From: Tobias Platen Date: Thu, 23 Jan 2020 11:51:52 +0100 Subject: [PATCH] add more converted header files --- src/iommu/axi_rab/axi4_ar_sender.py | 232 ++ src/iommu/axi_rab/axi4_aw_buffer.py | 157 ++ src/iommu/axi_rab/axi4_aw_sender.py | 252 +++ src/iommu/axi_rab/axi4_b_buffer.py | 94 + src/iommu/axi_rab/axi4_b_sender.py | 136 ++ src/iommu/axi_rab/axi4_r_buffer.py | 120 + src/iommu/axi_rab/axi4_r_sender.py | 206 ++ src/iommu/axi_rab/axi4_w_buffer.py | 777 +++++++ src/iommu/axi_rab/axi4_w_sender.py | 78 + src/iommu/axi_rab/axi_buffer_rab.py | 151 ++ src/iommu/axi_rab/axi_buffer_rab_bram.py | 209 ++ src/iommu/axi_rab/axi_rab_cfg.py | 707 ++++++ src/iommu/axi_rab/axi_rab_top.py | 2642 ++++++++++++++++++++++ src/iommu/axi_rab/check_ram.py | 240 ++ src/iommu/axi_rab/fsm.py | 243 ++ src/iommu/axi_rab/l2_tlb.py | 550 +++++ src/iommu/axi_rab/rab_slice.py | 76 + src/iommu/axi_rab/ram_tp_no_change.py | 81 + src/iommu/axi_rab/ram_tp_write_first.py | 79 + src/iommu/axi_rab/slice_top.py | 115 + 20 files changed, 7145 insertions(+) create mode 100644 src/iommu/axi_rab/axi4_ar_sender.py create mode 100644 src/iommu/axi_rab/axi4_aw_buffer.py create mode 100644 src/iommu/axi_rab/axi4_aw_sender.py create mode 100644 src/iommu/axi_rab/axi4_b_buffer.py create mode 100644 src/iommu/axi_rab/axi4_b_sender.py create mode 100644 src/iommu/axi_rab/axi4_r_buffer.py create mode 100644 src/iommu/axi_rab/axi4_r_sender.py create mode 100644 src/iommu/axi_rab/axi4_w_buffer.py create mode 100644 src/iommu/axi_rab/axi4_w_sender.py create mode 100644 src/iommu/axi_rab/axi_buffer_rab.py create mode 100644 src/iommu/axi_rab/axi_buffer_rab_bram.py create mode 100644 src/iommu/axi_rab/axi_rab_cfg.py create mode 100644 src/iommu/axi_rab/axi_rab_top.py create mode 100644 src/iommu/axi_rab/check_ram.py create mode 100644 src/iommu/axi_rab/fsm.py create mode 100644 src/iommu/axi_rab/l2_tlb.py create mode 100644 src/iommu/axi_rab/rab_slice.py create mode 100644 src/iommu/axi_rab/ram_tp_no_change.py create mode 100644 src/iommu/axi_rab/ram_tp_write_first.py create mode 100644 src/iommu/axi_rab/slice_top.py diff --git a/src/iommu/axi_rab/axi4_ar_sender.py b/src/iommu/axi_rab/axi4_ar_sender.py new file mode 100644 index 00000000..4cbd97d5 --- /dev/null +++ b/src/iommu/axi_rab/axi4_ar_sender.py @@ -0,0 +1,232 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_ar_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_save_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_sending_o = Signal() # output + self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input + self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input + self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_arvalid = Signal() # input + self.s_axi4_arready = Signal() # output + self.s_axi4_arlen = Signal(8) # input + self.s_axi4_arsize = Signal(3) # input + self.s_axi4_arburst = Signal(2) # input + self.s_axi4_arlock = Signal() # input + self.s_axi4_arprot = Signal(3) # input + self.s_axi4_arcache = Signal(4) # input + self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output + self.m_axi4_arvalid = Signal() # output + self.m_axi4_arready = Signal() # input + self.m_axi4_arlen = Signal(8) # output + self.m_axi4_arsize = Signal(3) # output + self.m_axi4_arburst = Signal(2) # output + self.m_axi4_arlock = Signal() # output + self.m_axi4_arprot = Signal(3) # output + self.m_axi4_arcache = Signal(4) # output + self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.l1_save.eq(self.None) + m.d.comb += self.l1_done_o.eq(self.None) + m.d.comb += self.m_axi4_arvalid.eq(self.None) + m.d.comb += self.s_axi4_arready.eq(self.None) + m.d.comb += self.m_axi4_aruser.eq(self.None) + m.d.comb += self.m_axi4_arcache.eq(self.None) + m.d.comb += self.m_axi4_arprot.eq(self.None) + m.d.comb += self.m_axi4_arlock.eq(self.None) + m.d.comb += self.m_axi4_arburst.eq(self.None) + m.d.comb += self.m_axi4_arsize.eq(self.None) + m.d.comb += self.m_axi4_arlen.eq(self.None) + m.d.comb += self.m_axi4_araddr.eq(self.None) + m.d.comb += self.m_axi4_arid.eq(self.None) + m.d.comb += self.l2_sending_o.eq(self.None) + m.d.comb += self.l2_sent.eq(self.None) + m.d.comb += self.l2_done_o.eq(self.None) + m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser) + m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache) + m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot) + m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock) + m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst) + m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize) + m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen) + m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i) + m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid) + m.d.comb += self.l2_sending_o.eq(self.1: 'b0) + m.d.comb += self.l2_available_q.eq(self.1: 'b0) + m.d.comb += self.l2_done_o.eq(self.1: 'b0) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_ar_sender +# #( +# parameter AXI_ADDR_WIDTH = 40, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_drop_i, +# input logic l1_save_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# output logic l2_sending_o, +# +# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i, +# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid, +# input logic s_axi4_arvalid, +# output logic s_axi4_arready, +# input logic [7:0] s_axi4_arlen, +# input logic [2:0] s_axi4_arsize, +# input logic [1:0] s_axi4_arburst, +# input logic s_axi4_arlock, +# input logic [2:0] s_axi4_arprot, +# input logic [3:0] s_axi4_arcache, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid, +# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr, +# output logic m_axi4_arvalid, +# input logic m_axi4_arready, +# output logic [7:0] m_axi4_arlen, +# output logic [2:0] m_axi4_arsize, +# output logic [1:0] m_axi4_arburst, +# output logic m_axi4_arlock, +# output logic [2:0] m_axi4_arprot, +# output logic [3:0] m_axi4_arcache, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser +# ); +# +# logic l1_save; +# +# logic l2_sent; +# logic l2_available_q; +# +# assign l1_save = l1_save_i & l2_available_q; +# +# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ; +# +# // if 1: accept and forward a transaction translated by L1 +# // 2: drop or save request (if L2 slot not occupied already) +# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) | +# l2_sending_o; +# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) | +# (s_axi4_arvalid & (l1_drop_i | l1_save)); +# +# generate +# if (ENABLE_L2TLB == 1) begin +# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ; +# logic [3:0] l2_axi4_arcache ; +# logic [3:0] l2_axi4_arregion; +# logic [3:0] l2_axi4_arqos ; +# logic [2:0] l2_axi4_arprot ; +# logic l2_axi4_arlock ; +# logic [1:0] l2_axi4_arburst ; +# logic [2:0] l2_axi4_arsize ; +# logic [7:0] l2_axi4_arlen ; +# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ; +# +# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser; +# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache; +# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot; +# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock; +# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst; +# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize; +# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen; +# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i; +# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid; +# +# // Buffer AXI signals in case of L1 miss +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_axi4_aruser <= 'b0; +# l2_axi4_arcache <= 'b0; +# l2_axi4_arprot <= 'b0; +# l2_axi4_arlock <= 1'b0; +# l2_axi4_arburst <= 'b0; +# l2_axi4_arsize <= 'b0; +# l2_axi4_arlen <= 'b0; +# l2_axi4_arid <= 'b0; +# end else if (l1_save) begin +# l2_axi4_aruser <= s_axi4_aruser; +# l2_axi4_arcache <= s_axi4_arcache; +# l2_axi4_arprot <= s_axi4_arprot; +# l2_axi4_arlock <= s_axi4_arlock; +# l2_axi4_arburst <= s_axi4_arburst; +# l2_axi4_arsize <= s_axi4_arsize; +# l2_axi4_arlen <= s_axi4_arlen; +# l2_axi4_arid <= s_axi4_arid; +# end +# end +# +# // signal that an l1_save_i can be accepted +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_available_q <= 1'b1; +# end else if (l2_sent | l2_drop_i) begin +# l2_available_q <= 1'b1; +# end else if (l1_save) begin +# l2_available_q <= 1'b0; +# end +# end +# +# assign l2_sending_o = l2_accept_i & ~l2_available_q; +# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready; +# +# // if 1: having sent out a transaction translated by L2 +# // 2: drop request (L2 slot is available again) +# assign l2_done_o = l2_sent | l2_drop_i; +# +# end else begin // !`ifdef ENABLE_L2TLB +# assign m_axi4_aruser = s_axi4_aruser; +# assign m_axi4_arcache = s_axi4_arcache; +# assign m_axi4_arprot = s_axi4_arprot; +# assign m_axi4_arlock = s_axi4_arlock; +# assign m_axi4_arburst = s_axi4_arburst; +# assign m_axi4_arsize = s_axi4_arsize; +# assign m_axi4_arlen = s_axi4_arlen; +# assign m_axi4_araddr = l1_araddr_i; +# assign m_axi4_arid = s_axi4_arid; +# +# assign l2_sending_o = 1'b0; +# assign l2_available_q = 1'b0; +# assign l2_done_o = 1'b0; +# end // else: !if(ENABLE_L2TLB == 1) +# endgenerate +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_aw_buffer.py b/src/iommu/axi_rab/axi4_aw_buffer.py new file mode 100644 index 00000000..f5ca37d1 --- /dev/null +++ b/src/iommu/axi_rab/axi4_aw_buffer.py @@ -0,0 +1,157 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_aw_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_awaddr = Signal(32) # input + self.s_axi4_awvalid = Signal() # input + self.s_axi4_awready = Signal() # output + self.s_axi4_awlen = Signal(8) # input + self.s_axi4_awsize = Signal(3) # input + self.s_axi4_awburst = Signal(2) # input + self.s_axi4_awlock = Signal() # input + self.s_axi4_awprot = Signal(3) # input + self.s_axi4_awcache = Signal(4) # input + self.s_axi4_awregion = Signal(4) # input + self.s_axi4_awqos = Signal(4) # input + self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_awaddr = Signal(32) # output + self.m_axi4_awvalid = Signal() # output + self.m_axi4_awready = Signal() # input + self.m_axi4_awlen = Signal(8) # output + self.m_axi4_awsize = Signal(3) # output + self.m_axi4_awburst = Signal(2) # output + self.m_axi4_awlock = Signal() # output + self.m_axi4_awprot = Signal(3) # output + self.m_axi4_awcache = Signal(4) # output + self.m_axi4_awregion = Signal(4) # output + self.m_axi4_awqos = Signal(4) # output + self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.s_axi4_awcache) + m.d.comb += self.None.eq(self.s_axi4_awprot) + m.d.comb += self.None.eq(self.s_axi4_awlock) + m.d.comb += self.None.eq(self.s_axi4_awburst) + m.d.comb += self.None.eq(self.s_axi4_awsize) + m.d.comb += self.None.eq(self.s_axi4_awlen) + m.d.comb += self.None.eq(self.s_axi4_awaddr) + m.d.comb += self.None.eq(self.s_axi4_awregion) + m.d.comb += self.None.eq(self.s_axi4_awqos) + m.d.comb += self.None.eq(self.s_axi4_awid) + m.d.comb += self.None.eq(self.s_axi4_awuser) + m.d.comb += self.m_axi4_awcache.eq(self.None) + m.d.comb += self.m_axi4_awprot.eq(self.None) + m.d.comb += self.m_axi4_awlock.eq(self.None) + m.d.comb += self.m_axi4_awburst.eq(self.None) + m.d.comb += self.m_axi4_awsize.eq(self.None) + m.d.comb += self.m_axi4_awlen.eq(self.None) + m.d.comb += self.m_axi4_awaddr.eq(self.None) + m.d.comb += self.m_axi4_awregion.eq(self.None) + m.d.comb += self.m_axi4_awqos.eq(self.None) + m.d.comb += self.m_axi4_awid.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_aw_buffer +# #( +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic [31:0] s_axi4_awaddr, +# input logic s_axi4_awvalid, +# output logic s_axi4_awready, +# input logic [7:0] s_axi4_awlen, +# input logic [2:0] s_axi4_awsize, +# input logic [1:0] s_axi4_awburst, +# input logic s_axi4_awlock, +# input logic [2:0] s_axi4_awprot, +# input logic [3:0] s_axi4_awcache, +# input logic [3:0] s_axi4_awregion, +# input logic [3:0] s_axi4_awqos, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, +# output logic [31:0] m_axi4_awaddr, +# output logic m_axi4_awvalid, +# input logic m_axi4_awready, +# output logic [7:0] m_axi4_awlen, +# output logic [2:0] m_axi4_awsize, +# output logic [1:0] m_axi4_awburst, +# output logic m_axi4_awlock, +# output logic [2:0] m_axi4_awprot, +# output logic [3:0] m_axi4_awcache, +# output logic [3:0] m_axi4_awregion, +# output logic [3:0] m_axi4_awqos, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser +# ); +# +# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in; +# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out; +# +# assign data_in [3:0] = s_axi4_awcache; +# assign data_in [6:4] = s_axi4_awprot; +# assign data_in [7] = s_axi4_awlock; +# assign data_in [9:8] = s_axi4_awburst; +# assign data_in [12:10] = s_axi4_awsize; +# assign data_in [20:13] = s_axi4_awlen; +# assign data_in [52:21] = s_axi4_awaddr; +# assign data_in [56:53] = s_axi4_awregion; +# assign data_in [60:57] = s_axi4_awqos; +# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid; +# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser; +# +# assign m_axi4_awcache = data_out[3:0]; +# assign m_axi4_awprot = data_out[6:4]; +# assign m_axi4_awlock = data_out[7]; +# assign m_axi4_awburst = data_out[9:8]; +# assign m_axi4_awsize = data_out[12:10]; +# assign m_axi4_awlen = data_out[20:13]; +# assign m_axi4_awaddr = data_out[52:21]; +# assign m_axi4_awregion = data_out[56:53]; +# assign m_axi4_awqos = data_out[60:57]; +# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61]; +# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+61 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# .valid_out ( m_axi4_awvalid ), +# .data_out ( data_out ), +# .ready_in ( m_axi4_awready ), +# .valid_in ( s_axi4_awvalid ), +# .data_in ( data_in ), +# .ready_out ( s_axi4_awready ) +# ); +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_aw_sender.py b/src/iommu/axi_rab/axi4_aw_sender.py new file mode 100644 index 00000000..fbc917df --- /dev/null +++ b/src/iommu/axi_rab/axi4_aw_sender.py @@ -0,0 +1,252 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_aw_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_save_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_sending_o = Signal() # output + self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input + self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input + self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_awvalid = Signal() # input + self.s_axi4_awready = Signal() # output + self.s_axi4_awlen = Signal(8) # input + self.s_axi4_awsize = Signal(3) # input + self.s_axi4_awburst = Signal(2) # input + self.s_axi4_awlock = Signal() # input + self.s_axi4_awprot = Signal(3) # input + self.s_axi4_awcache = Signal(4) # input + self.s_axi4_awregion = Signal(4) # input + self.s_axi4_awqos = Signal(4) # input + self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output + self.m_axi4_awvalid = Signal() # output + self.m_axi4_awready = Signal() # input + self.m_axi4_awlen = Signal(8) # output + self.m_axi4_awsize = Signal(3) # output + self.m_axi4_awburst = Signal(2) # output + self.m_axi4_awlock = Signal() # output + self.m_axi4_awprot = Signal(3) # output + self.m_axi4_awcache = Signal(4) # output + self.m_axi4_awregion = Signal(4) # output + self.m_axi4_awqos = Signal(4) # output + self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.l1_save.eq(self.None) + m.d.comb += self.l1_done_o.eq(self.None) + m.d.comb += self.m_axi4_awvalid.eq(self.None) + m.d.comb += self.s_axi4_awready.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.None) + m.d.comb += self.m_axi4_awcache.eq(self.None) + m.d.comb += self.m_axi4_awregion.eq(self.None) + m.d.comb += self.m_axi4_awqos.eq(self.None) + m.d.comb += self.m_axi4_awprot.eq(self.None) + m.d.comb += self.m_axi4_awlock.eq(self.None) + m.d.comb += self.m_axi4_awburst.eq(self.None) + m.d.comb += self.m_axi4_awsize.eq(self.None) + m.d.comb += self.m_axi4_awlen.eq(self.None) + m.d.comb += self.m_axi4_awaddr.eq(self.None) + m.d.comb += self.m_axi4_awid.eq(self.None) + m.d.comb += self.l2_sending_o.eq(self.None) + m.d.comb += self.l2_sent.eq(self.None) + m.d.comb += self.l2_done_o.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser) + m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache) + m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion) + m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos) + m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot) + m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock) + m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst) + m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize) + m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen) + m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i) + m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid) + m.d.comb += self.l2_sending_o.eq(self.1: 'b0) + m.d.comb += self.l2_available_q.eq(self.1: 'b0) + m.d.comb += self.l2_done_o.eq(self.1: 'b0) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_aw_sender +# #( +# parameter AXI_ADDR_WIDTH = 40, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_drop_i, +# input logic l1_save_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# output logic l2_sending_o, +# +# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i, +# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic s_axi4_awvalid, +# output logic s_axi4_awready, +# input logic [7:0] s_axi4_awlen, +# input logic [2:0] s_axi4_awsize, +# input logic [1:0] s_axi4_awburst, +# input logic s_axi4_awlock, +# input logic [2:0] s_axi4_awprot, +# input logic [3:0] s_axi4_awcache, +# input logic [3:0] s_axi4_awregion, +# input logic [3:0] s_axi4_awqos, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, +# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr, +# output logic m_axi4_awvalid, +# input logic m_axi4_awready, +# output logic [7:0] m_axi4_awlen, +# output logic [2:0] m_axi4_awsize, +# output logic [1:0] m_axi4_awburst, +# output logic m_axi4_awlock, +# output logic [2:0] m_axi4_awprot, +# output logic [3:0] m_axi4_awcache, +# output logic [3:0] m_axi4_awregion, +# output logic [3:0] m_axi4_awqos, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser +# ); +# +# logic l1_save; +# +# logic l2_sent; +# logic l2_available_q; +# +# assign l1_save = l1_save_i & l2_available_q; +# +# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ; +# +# // if 1: accept and forward a transaction translated by L1 +# // 2: drop or save request (if L2 slot not occupied already) +# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) | +# l2_sending_o; +# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) | +# (s_axi4_awvalid & (l1_drop_i | l1_save)); +# +# generate +# if (ENABLE_L2TLB == 1) begin +# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ; +# logic [3:0] l2_axi4_awcache ; +# logic [3:0] l2_axi4_awregion; +# logic [3:0] l2_axi4_awqos ; +# logic [2:0] l2_axi4_awprot ; +# logic l2_axi4_awlock ; +# logic [1:0] l2_axi4_awburst ; +# logic [2:0] l2_axi4_awsize ; +# logic [7:0] l2_axi4_awlen ; +# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ; +# +# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser; +# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache; +# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion; +# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos; +# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot; +# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock; +# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst; +# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize; +# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen; +# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i; +# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid; +# +# // buffer AXI signals in case of L1 miss +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_axi4_awuser <= 'b0; +# l2_axi4_awcache <= 'b0; +# l2_axi4_awregion <= 'b0; +# l2_axi4_awqos <= 'b0; +# l2_axi4_awprot <= 'b0; +# l2_axi4_awlock <= 1'b0; +# l2_axi4_awburst <= 'b0; +# l2_axi4_awsize <= 'b0; +# l2_axi4_awlen <= 'b0; +# l2_axi4_awid <= 'b0; +# end else if (l1_save) begin +# l2_axi4_awuser <= s_axi4_awuser; +# l2_axi4_awcache <= s_axi4_awcache; +# l2_axi4_awregion <= s_axi4_awregion; +# l2_axi4_awqos <= s_axi4_awqos; +# l2_axi4_awprot <= s_axi4_awprot; +# l2_axi4_awlock <= s_axi4_awlock; +# l2_axi4_awburst <= s_axi4_awburst; +# l2_axi4_awsize <= s_axi4_awsize; +# l2_axi4_awlen <= s_axi4_awlen; +# l2_axi4_awid <= s_axi4_awid; +# end +# end +# +# // signal that an l1_save_i can be accepted +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_available_q <= 1'b1; +# end else if (l2_sent | l2_drop_i) begin +# l2_available_q <= 1'b1; +# end else if (l1_save) begin +# l2_available_q <= 1'b0; +# end +# end +# +# assign l2_sending_o = l2_accept_i & ~l2_available_q; +# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready; +# +# // if 1: having sent out a transaction translated by L2 +# // 2: drop request (L2 slot is available again) +# assign l2_done_o = l2_sent | l2_drop_i; +# +# end else begin // !`ifdef ENABLE_L2TLB +# assign m_axi4_awuser = s_axi4_awuser; +# assign m_axi4_awcache = s_axi4_awcache; +# assign m_axi4_awregion = s_axi4_awregion; +# assign m_axi4_awqos = s_axi4_awqos; +# assign m_axi4_awprot = s_axi4_awprot; +# assign m_axi4_awlock = s_axi4_awlock; +# assign m_axi4_awburst = s_axi4_awburst; +# assign m_axi4_awsize = s_axi4_awsize; +# assign m_axi4_awlen = s_axi4_awlen; +# assign m_axi4_awaddr = l1_awaddr_i; +# assign m_axi4_awid = s_axi4_awid; +# +# assign l2_sending_o = 1'b0; +# assign l2_available_q = 1'b0; +# assign l2_done_o = 1'b0; +# end // !`ifdef ENABLE_L2TLB +# endgenerate +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_b_buffer.py b/src/iommu/axi_rab/axi4_b_buffer.py new file mode 100644 index 00000000..42fce1ad --- /dev/null +++ b/src/iommu/axi_rab/axi4_b_buffer.py @@ -0,0 +1,94 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_b_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_bresp = Signal(2) # output + self.s_axi4_bvalid = Signal() # output + self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_bready = Signal() # input + self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_bresp = Signal(2) # input + self.m_axi4_bvalid = Signal() # input + self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_bready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.m_axi4_bresp) + m.d.comb += self.None.eq(self.m_axi4_bid) + m.d.comb += self.None.eq(self.m_axi4_buser) + m.d.comb += self.s_axi4_buser.eq(self.None) + m.d.comb += self.s_axi4_bid.eq(self.None) + m.d.comb += self.s_axi4_bresp.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_b_buffer +# #( +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [1:0] s_axi4_bresp, +# output logic s_axi4_bvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic s_axi4_bready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, +# input logic [1:0] m_axi4_bresp, +# input logic m_axi4_bvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, +# output logic m_axi4_bready +# ); +# +# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in; +# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out; +# +# assign data_in [1:0] = m_axi4_bresp; +# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid; +# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser; +# +# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2]; +# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2]; +# assign s_axi4_bresp = data_out[1:0]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+2 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# .valid_out( s_axi4_bvalid ), +# .data_out ( data_out ), +# .ready_in ( s_axi4_bready ), +# .valid_in ( m_axi4_bvalid ), +# .data_in ( data_in ), +# .ready_out( m_axi4_bready ) +# ); +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_b_sender.py b/src/iommu/axi_rab/axi4_b_sender.py new file mode 100644 index 00000000..1c61a2a5 --- /dev/null +++ b/src/iommu/axi_rab/axi4_b_sender.py @@ -0,0 +1,136 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_b_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.drop_i = Signal() # input + self.done_o = Signal() # output + self.id_i = Signal(AXI_ID_WIDTH) # input + self.prefetch_i = Signal() # input + self.hit_i = Signal() # input + self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_bresp = Signal(2) # output + self.s_axi4_bvalid = Signal() # output + self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_bready = Signal() # input + self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_bresp = Signal(2) # input + self.m_axi4_bvalid = Signal() # input + self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_bready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.fifo_push.eq(self.None) + m.d.comb += self.done_o.eq(self.fifo_push) + m.d.comb += self.fifo_pop.eq(self.None) + m.d.comb += self.s_axi4_buser.eq(self.None) + m.d.comb += self.s_axi4_bid.eq(self.None) + m.d.comb += self.s_axi4_bresp.eq(self.None) + m.d.comb += self.s_axi4_bvalid.eq(self.None) + m.d.comb += self.m_axi4_bready.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_b_sender +# #( +# parameter AXI_ID_WIDTH = 10, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic drop_i, +# output logic done_o, +# input logic [AXI_ID_WIDTH-1:0] id_i, +# input logic prefetch_i, +# input logic hit_i, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [1:0] s_axi4_bresp, +# output logic s_axi4_bvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic s_axi4_bready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, +# input logic [1:0] m_axi4_bresp, +# input logic m_axi4_bvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, +# output logic m_axi4_bready +# ); +# +# logic fifo_valid; +# logic fifo_pop; +# logic fifo_push; +# logic fifo_ready; +# logic [AXI_ID_WIDTH-1:0] id; +# logic prefetch; +# logic hit; +# +# logic dropping; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( 2+AXI_ID_WIDTH ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_fifo +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .data_out ( {prefetch, hit, id} ), +# .valid_out ( fifo_valid ), +# .ready_in ( fifo_pop ), +# // Push +# .valid_in ( fifo_push ), +# .data_in ( {prefetch_i, hit_i, id_i} ), +# .ready_out ( fifo_ready ) +# ); +# +# assign fifo_push = drop_i & fifo_ready; +# assign done_o = fifo_push; +# +# assign fifo_pop = dropping & s_axi4_bready; +# +# always @ (posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# dropping <= 1'b0; +# end else begin +# if (fifo_valid && ~dropping) +# dropping <= 1'b1; +# else if (fifo_pop) +# dropping <= 1'b0; +# end +# end +# +# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser; +# assign s_axi4_bid = dropping ? id : m_axi4_bid; +# +# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot +# (dropping & prefetch ) ? 2'b10 : // prefetch miss +# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot +# (dropping ) ? 2'b10 : // non-prefetch miss +# m_axi4_bresp; +# +# assign s_axi4_bvalid = dropping | m_axi4_bvalid; +# assign m_axi4_bready = ~dropping & s_axi4_bready; +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_r_buffer.py b/src/iommu/axi_rab/axi4_r_buffer.py new file mode 100644 index 00000000..91bdf0a5 --- /dev/null +++ b/src/iommu/axi_rab/axi4_r_buffer.py @@ -0,0 +1,120 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_r_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_rresp = Signal(2) # output + self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi4_rlast = Signal() # output + self.s_axi4_rvalid = Signal() # output + self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_rready = Signal() # input + self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_rresp = Signal(2) # input + self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input + self.m_axi4_rlast = Signal() # input + self.m_axi4_rvalid = Signal() # input + self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_rready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.m_axi4_rresp) + m.d.comb += self.None.eq(self.m_axi4_rlast) + m.d.comb += self.None.eq(self.m_axi4_rid) + m.d.comb += self.None.eq(self.m_axi4_rdata) + m.d.comb += self.None.eq(self.m_axi4_ruser) + m.d.comb += self.s_axi4_rresp.eq(self.None) + m.d.comb += self.s_axi4_rlast.eq(self.None) + m.d.comb += self.s_axi4_rid.eq(self.None) + m.d.comb += self.s_axi4_rdata.eq(self.None) + m.d.comb += self.s_axi4_ruser.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_r_buffer +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [1:0] s_axi4_rresp, +# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic s_axi4_rlast, +# output logic s_axi4_rvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# input logic s_axi4_rready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, +# input logic [1:0] m_axi4_rresp, +# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, +# input logic m_axi4_rlast, +# input logic m_axi4_rvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, +# output logic m_axi4_rready +# ); +# +# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_in; +# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out; +# +# localparam ID_START = 3; +# localparam ID_END = AXI_ID_WIDTH-1 + ID_START; +# localparam DATA_START = ID_END + 1; +# localparam DATA_END = AXI_DATA_WIDTH-1 + DATA_START; +# localparam USER_START = DATA_END + 1; +# localparam USER_END = AXI_USER_WIDTH-1 + USER_START; +# +# assign data_in [1:0] = m_axi4_rresp; +# assign data_in [2] = m_axi4_rlast; +# assign data_in [ID_END:ID_START] = m_axi4_rid; +# assign data_in[DATA_END:DATA_START] = m_axi4_rdata; +# assign data_in[USER_END:USER_START] = m_axi4_ruser; +# +# assign s_axi4_rresp = data_out [1:0]; +# assign s_axi4_rlast = data_out [2]; +# assign s_axi4_rid = data_out [ID_END:ID_START]; +# assign s_axi4_rdata = data_out[DATA_END:DATA_START]; +# assign s_axi4_ruser = data_out[USER_END:USER_START]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .valid_out ( s_axi4_rvalid ), +# .data_out ( data_out ), +# .ready_in ( s_axi4_rready ), +# // Push +# .valid_in ( m_axi4_rvalid ), +# .data_in ( data_in ), +# .ready_out ( m_axi4_rready ) +# ); +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi4_r_sender.py b/src/iommu/axi_rab/axi4_r_sender.py new file mode 100644 index 00000000..d4e22bb2 --- /dev/null +++ b/src/iommu/axi_rab/axi4_r_sender.py @@ -0,0 +1,206 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_r_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.drop_i = Signal() # input + self.drop_len_i = Signal(8) # input + self.done_o = Signal() # output + self.id_i = Signal(AXI_ID_WIDTH) # input + self.prefetch_i = Signal() # input + self.hit_i = Signal() # input + self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_rresp = Signal(2) # output + self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi4_rlast = Signal() # output + self.s_axi4_rvalid = Signal() # output + self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_rready = Signal() # input + self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_rresp = Signal(2) # input + self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input + self.m_axi4_rlast = Signal() # input + self.m_axi4_rvalid = Signal() # input + self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_rready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.fifo_push.eq(self.None) + m.d.comb += self.done_o.eq(self.fifo_push) + m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata) + m.d.comb += self.s_axi4_ruser.eq(self.None) + m.d.comb += self.s_axi4_rid.eq(self.None) + m.d.comb += self.s_axi4_rresp.eq(self.None) + m.d.comb += self.s_axi4_rvalid.eq(self.None) + m.d.comb += self.m_axi4_rready.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# module axi4_r_sender +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic drop_i, +# input logic [7:0] drop_len_i, +# output logic done_o, +# input logic [AXI_ID_WIDTH-1:0] id_i, +# input logic prefetch_i, +# input logic hit_i, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [1:0] s_axi4_rresp, +# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic s_axi4_rlast, +# output logic s_axi4_rvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# input logic s_axi4_rready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, +# input logic [1:0] m_axi4_rresp, +# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, +# input logic m_axi4_rlast, +# input logic m_axi4_rvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, +# output logic m_axi4_rready +# ); +# +# localparam BUFFER_DEPTH = 16; +# +# logic fifo_valid; +# logic fifo_pop; +# logic fifo_push; +# logic fifo_ready; +# logic [AXI_ID_WIDTH-1:0] id; +# logic [7:0] len; +# logic prefetch; +# logic hit; +# +# logic dropping; +# +# enum logic [1:0] { FORWARDING, DROPPING } +# state_d, state_q; +# logic burst_ongoing_d, burst_ongoing_q; +# logic [7:0] drop_cnt_d, drop_cnt_q; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( 2+AXI_ID_WIDTH+8 ), +# .BUFFER_DEPTH ( BUFFER_DEPTH ) +# ) +# u_fifo +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .data_out ( {prefetch, hit, id, len} ), +# .valid_out ( fifo_valid ), +# .ready_in ( fifo_pop ), +# // Push +# .valid_in ( fifo_push ), +# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ), +# .ready_out ( fifo_ready ) +# ); +# +# assign fifo_push = drop_i & fifo_ready; +# assign done_o = fifo_push; +# +# always_comb begin +# burst_ongoing_d = burst_ongoing_q; +# drop_cnt_d = drop_cnt_q; +# dropping = 1'b0; +# s_axi4_rlast = 1'b0; +# fifo_pop = 1'b0; +# state_d = state_q; +# +# case (state_q) +# FORWARDING: begin +# s_axi4_rlast = m_axi4_rlast; +# // Remember whether there is currently a burst ongoing. +# if (m_axi4_rvalid && m_axi4_rready) begin +# if (m_axi4_rlast) begin +# burst_ongoing_d = 1'b0; +# end else begin +# burst_ongoing_d = 1'b1; +# end +# end +# // If there is no burst ongoing and the FIFO has a drop request ready, process it. +# if (!burst_ongoing_d && fifo_valid) begin +# drop_cnt_d = len; +# state_d = DROPPING; +# end +# end +# +# DROPPING: begin +# dropping = 1'b1; +# s_axi4_rlast = (drop_cnt_q == '0); +# // Handshake on slave interface +# if (s_axi4_rready) begin +# drop_cnt_d -= 1; +# if (drop_cnt_q == '0) begin +# drop_cnt_d = '0; +# fifo_pop = 1'b1; +# state_d = FORWARDING; +# end +# end +# end +# +# default: begin +# state_d = FORWARDING; +# end +# endcase +# end +# +# assign s_axi4_rdata = m_axi4_rdata; +# +# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser; +# assign s_axi4_rid = dropping ? id : m_axi4_rid; +# +# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot +# (dropping & prefetch ) ? 2'b10 : // prefetch miss +# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot +# (dropping ) ? 2'b10 : // non-prefetch miss +# m_axi4_rresp; +# +# assign s_axi4_rvalid = dropping | m_axi4_rvalid; +# assign m_axi4_rready = ~dropping & s_axi4_rready; +# +# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# burst_ongoing_q <= 1'b0; +# drop_cnt_q <= 'b0; +# state_q <= FORWARDING; +# end else begin +# burst_ongoing_q <= burst_ongoing_d; +# drop_cnt_q <= drop_cnt_d; +# state_q <= state_d; +# end +# end +# +# endmodule +# +# +# +# diff --git a/src/iommu/axi_rab/axi4_w_buffer.py b/src/iommu/axi_rab/axi4_w_buffer.py new file mode 100644 index 00000000..aa06dc22 --- /dev/null +++ b/src/iommu/axi_rab/axi4_w_buffer.py @@ -0,0 +1,777 @@ +# this file has been generated by sv2nmigen +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_w_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_save_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_master_i = Signal() # input + self.l1_id_i = Signal(AXI_ID_WIDTH) # input + self.l1_len_i = Signal(8) # input + self.l1_prefetch_i = Signal() # input + self.l1_hit_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_master_i = Signal() # input + self.l2_id_i = Signal(AXI_ID_WIDTH) # input + self.l2_len_i = Signal(8) # input + self.l2_prefetch_i = Signal() # input + self.l2_hit_i = Signal() # input + self.master_select_o = Signal() # output + self.input_stall_o = Signal() # output + self.output_stall_o = Signal() # output + self.b_drop_o = Signal() # output + self.b_done_i = Signal() # input + self.id_o = Signal(AXI_ID_WIDTH) # output + self.prefetch_o = Signal() # output + self.hit_o = Signal() # output + self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input + self.s_axi4_wvalid = Signal() # input + self.s_axi4_wready = Signal() # output + self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input + self.s_axi4_wlast = Signal() # input + self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output + self.m_axi4_wvalid = Signal() # output + self.m_axi4_wready = Signal() # input + self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output + self.m_axi4_wlast = Signal() # output + self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# +# //import CfMath::log2; +# +# module axi4_w_buffer +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0, +# parameter HUM_BUFFER_DEPTH = 16 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# // L1 & L2 interfaces +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_save_i, +# input logic l1_drop_i, +# input logic l1_master_i, +# input logic [AXI_ID_WIDTH-1:0] l1_id_i, +# input logic [7:0] l1_len_i, +# input logic l1_prefetch_i, +# input logic l1_hit_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# input logic l2_master_i, +# input logic [AXI_ID_WIDTH-1:0] l2_id_i, +# input logic [7:0] l2_len_i, +# input logic l2_prefetch_i, +# input logic l2_hit_i, +# +# output logic master_select_o, +# output logic input_stall_o, +# output logic output_stall_o, +# +# // B sender interface +# output logic b_drop_o, +# input logic b_done_i, +# output logic [AXI_ID_WIDTH-1:0] id_o, +# output logic prefetch_o, +# output logic hit_o, +# +# // AXI W channel interfaces +# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input logic s_axi4_wvalid, +# output logic s_axi4_wready, +# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input logic s_axi4_wlast, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata, +# output logic m_axi4_wvalid, +# input logic m_axi4_wready, +# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, +# output logic m_axi4_wlast, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser +# ); +# +""" + + localparam BUFFER_WIDTH = AXI_DATA_WIDTH+AXI_USER_WIDTH+AXI_DATA_WIDTH/8+1; + + localparam INPUT_BUFFER_DEPTH = 4; + localparam L1_FIFO_DEPTH = 8; + localparam L2_FIFO_DEPTH = 4; + + logic [AXI_DATA_WIDTH-1:0] axi4_wdata; + logic axi4_wvalid; + logic axi4_wready; + logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb; + logic axi4_wlast; + logic [AXI_USER_WIDTH-1:0] axi4_wuser; + + logic l1_fifo_valid_out; + logic l1_fifo_ready_in; + logic l1_fifo_valid_in; + logic l1_fifo_ready_out; + + logic l1_req; + logic l1_accept_cur, l1_save_cur, l1_drop_cur; + logic l1_master_cur; + logic [AXI_ID_WIDTH-1:0] l1_id_cur; + logic [7:0] l1_len_cur; + logic l1_hit_cur, l1_prefetch_cur; + logic l1_save_in, l1_save_out; + logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP; + + logic l2_fifo_valid_out; + logic l2_fifo_ready_in; + logic l2_fifo_valid_in; + logic l2_fifo_ready_out; + + logic l2_req; + logic l2_accept_cur, l2_drop_cur; + logic l2_master_cur; + logic [AXI_ID_WIDTH-1:0] l2_id_cur; + logic [7:0] l2_len_cur; + logic l2_hit_cur, l2_prefetch_cur; + + logic fifo_select, fifo_select_SN, fifo_select_SP; + logic w_done; + logic b_drop_set; + + // HUM buffer signals + logic hum_buf_ready_out; + logic hum_buf_valid_in; + logic hum_buf_ready_in; + logic hum_buf_valid_out; + logic hum_buf_underfull; + + logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata; + logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb; + logic hum_buf_wlast; + logic [AXI_USER_WIDTH-1:0] hum_buf_wuser; + + logic hum_buf_drop_req_SN, hum_buf_drop_req_SP; + logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP; + logic hum_buf_almost_full; + + logic stop_store; + logic wlast_in, wlast_out; + logic signed [3:0] n_wlast_SN, n_wlast_SP; + logic block_forwarding; + + // Search FSM + typedef enum logic [3:0] {STORE, BYPASS, + WAIT_L1_BYPASS_YES, WAIT_L2_BYPASS_YES, + WAIT_L1_BYPASS_NO, WAIT_L2_BYPASS_NO, + FLUSH, DISCARD, + DISCARD_FINISH} + hum_buf_state_t; + hum_buf_state_t hum_buf_SP; // Present state + hum_buf_state_tbg hum_buf_SN; // Next State + + axi_buffer_rab + #( + .DATA_WIDTH ( BUFFER_WIDTH ), + .BUFFER_DEPTH ( INPUT_BUFFER_DEPTH ) + ) + u_input_buf + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ), + .valid_in ( s_axi4_wvalid ), + .ready_out ( s_axi4_wready ), + // Pop + .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), + .valid_out ( axi4_wvalid ), + .ready_in ( axi4_wready ) + ); + + axi_buffer_rab + #( + .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+4 ), + .BUFFER_DEPTH ( L1_FIFO_DEPTH ) + ) + u_l1_fifo + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ), + .valid_in ( l1_fifo_valid_in ), + .ready_out ( l1_fifo_ready_out ), + // Pop + .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ), + .valid_out ( l1_fifo_valid_out ), + .ready_in ( l1_fifo_ready_in ) + ); + + // Push upon receiving new requests from the TLB. + assign l1_req = l1_accept_i | l1_save_i | l1_drop_i; + assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out; + + // Signal handshake + assign l1_done_o = l1_fifo_valid_in; + assign l2_done_o = l2_fifo_valid_in; + + // Stall AW input of L1 TLB + assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out); + + // Interface b_drop signals + handshake + always_comb begin + if (fifo_select == 1'b0) begin + prefetch_o = l1_prefetch_cur; + hit_o = l1_hit_cur; + id_o = l1_id_cur; + + l1_fifo_ready_in = w_done | b_done_i; + l2_fifo_ready_in = 1'b0; + end else begin + prefetch_o = l2_prefetch_cur; + hit_o = l2_hit_cur; + id_o = l2_id_cur; + + l1_fifo_ready_in = 1'b0; + l2_fifo_ready_in = w_done | b_done_i; + end + end + + // Detect when an L1 transaction save request enters or exits the L1 FIFO. + assign l1_save_in = l1_fifo_valid_in & l1_save_i; + assign l1_save_out = l1_fifo_ready_in & l1_save_cur; + + // Count the number of L1 transaction to save in the L1 FIFO. + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + n_l1_save_SP <= '0; + end else if (l1_save_in ^ l1_save_out) begin + if (l1_save_in) begin + n_l1_save_SP <= n_l1_save_SP + 1'b1; + end else if (l1_save_out) begin + n_l1_save_SP <= n_l1_save_SP - 1'b1; + end + end + end + + // Stall forwarding of AW L1 hits if: + // 1. The HUM buffer does not allow to be bypassed. + // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending. + assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1); + + generate + if (ENABLE_L2TLB == 1) begin : HUM_BUFFER + + axi_buffer_rab_bram + #( + .DATA_WIDTH ( BUFFER_WIDTH ), + .BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) + ) + u_hum_buf + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), + .valid_in ( hum_buf_valid_in ), + .ready_out ( hum_buf_ready_out ), + // Pop + .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ), + .valid_out ( hum_buf_valid_out ), + .ready_in ( hum_buf_ready_in ), + // Clear + .almost_full ( hum_buf_almost_full ), + .underfull ( hum_buf_underfull ), + .drop_req ( hum_buf_drop_req_SP ), + .drop_len ( hum_buf_drop_len_SP ) + ); + + axi_buffer_rab + #( + .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+3 ), + .BUFFER_DEPTH ( L2_FIFO_DEPTH ) + ) + u_l2_fifo + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ), + .valid_in ( l2_fifo_valid_in ), + .ready_out ( l2_fifo_ready_out ), + // Pop + .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ), + .valid_out ( l2_fifo_valid_out ), + .ready_in ( l2_fifo_ready_in ) + ); + + // Push upon receiving new result from TLB. + assign l2_req = l2_accept_i | l2_drop_i; + assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out; + + assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out; + assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in; + + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + fifo_select_SP <= 1'b0; + hum_buf_drop_len_SP <= 'b0; + hum_buf_drop_req_SP <= 1'b0; + hum_buf_SP <= STORE; + n_wlast_SP <= 'b0; + end else begin + fifo_select_SP <= fifo_select_SN; + hum_buf_drop_len_SP <= hum_buf_drop_len_SN; + hum_buf_drop_req_SP <= hum_buf_drop_req_SN; + hum_buf_SP <= hum_buf_SN; + n_wlast_SP <= n_wlast_SN; + end + end + + always_comb begin + n_wlast_SN = n_wlast_SP; + if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped. + n_wlast_SN -= 1; + end + if (wlast_in) begin + n_wlast_SN += 1; + end + if (wlast_out) begin + n_wlast_SN -= 1; + end + end + + always_comb begin : HUM_BUFFER_FSM + hum_buf_SN = hum_buf_SP; + + m_axi4_wlast = 1'b0; + m_axi4_wdata = 'b0; + m_axi4_wstrb = 'b0; + m_axi4_wuser = 'b0; + + m_axi4_wvalid = 1'b0; + axi4_wready = 1'b0; + + hum_buf_valid_in = 1'b0; + hum_buf_ready_in = 1'b0; + + hum_buf_drop_req_SN = hum_buf_drop_req_SP; + hum_buf_drop_len_SN = hum_buf_drop_len_SP; + master_select_o = 1'b0; + + w_done = 1'b0; // read from FIFO without handshake with B sender + b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake) + fifo_select = 1'b0; + + fifo_select_SN = fifo_select_SP; + stop_store = 1'b0; + + block_forwarding = 1'b0; + + unique case (hum_buf_SP) + + STORE : begin + // Simply store the data in the buffer. + hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out; + axi4_wready = hum_buf_ready_out; + + // We have got a full burst in the HUM buffer, thus stop storing. + if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin + hum_buf_SN = WAIT_L1_BYPASS_YES; + + // The buffer is full, thus wait for decision. + end else if (~hum_buf_ready_out) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end + + // Avoid the forwarding of L1 hits until we know whether we can bypass. + if (l1_fifo_valid_out & l1_save_cur) begin + block_forwarding = 1'b1; + end + end + + WAIT_L1_BYPASS_YES : begin + // Wait for orders from L1 TLB. + if (l1_fifo_valid_out) begin + + // L1 hit - forward data from buffer + if (l1_accept_cur) begin + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l1_master_cur; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = STORE; + end + + // L1 miss - wait for L2 + end else if (l1_save_cur) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_YES; + + // L1 prefetch, prot, multi - drop data + end else if (l1_drop_cur) begin + fifo_select_SN = 1'b0; // L1 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l1_len_cur; + hum_buf_SN = FLUSH; + end + end + end + + WAIT_L2_BYPASS_YES : begin + // Wait for orders from L2 TLB. + if (l2_fifo_valid_out) begin + + // L2 hit - forward data from buffer + if (l2_accept_cur) begin + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l2_master_cur; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b1; + w_done = 1'b1; + hum_buf_SN = STORE; + end + + // L2 miss/prefetch hit + end else if (l2_drop_cur) begin + fifo_select_SN = 1'b1; // L2 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l2_len_cur; + hum_buf_SN = FLUSH; + end + + // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions. + end else if (l1_fifo_valid_out) begin + + // L1 hit + if (l1_accept_cur) begin + hum_buf_SN = BYPASS; + + // L1 prefetch/prot/multi + end else if (l1_drop_cur) begin + hum_buf_SN = DISCARD; + end + end + end + + FLUSH : begin + // Clear HUM buffer flush request. + hum_buf_drop_req_SN = 1'b0; + + // perform handshake with B sender + fifo_select = fifo_select_SP; + b_drop_o = 1'b1; + if (b_done_i) begin + hum_buf_SN = STORE; + end + end + + BYPASS : begin + // Forward one full transaction from input buffer. + m_axi4_wlast = axi4_wlast; + m_axi4_wdata = axi4_wdata; + m_axi4_wstrb = axi4_wstrb; + m_axi4_wuser = axi4_wuser; + + m_axi4_wvalid = axi4_wvalid; + axi4_wready = m_axi4_wready; + + master_select_o = l1_master_cur; + + // We have got a full transaction. + if (axi4_wlast & axi4_wready & axi4_wvalid) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_YES; + end + end + + DISCARD : begin + // Discard one full transaction from input buffer. + axi4_wready = 1'b1; + + // We have got a full transaction. + if (axi4_wlast & axi4_wready & axi4_wvalid) begin + // Try to perform handshake with B sender. + fifo_select = 1'b0; + b_drop_o = 1'b1; + // We cannot wait here due to axi4_wready. + if (b_done_i) begin + hum_buf_SN = WAIT_L2_BYPASS_YES; + end else begin + hum_buf_SN = DISCARD_FINISH; + end + end + end + + DISCARD_FINISH : begin + // Perform handshake with B sender. + fifo_select = 1'b0; + b_drop_o = 1'b1; + if (b_done_i) begin + hum_buf_SN = WAIT_L2_BYPASS_YES; + end + end + + WAIT_L1_BYPASS_NO : begin + // Do not allow the forwarding of L1 hits. + block_forwarding = 1'b1; + + // Wait for orders from L1 TLB. + if (l1_fifo_valid_out) begin + + // L1 hit - forward data from/through HUM buffer and refill the buffer + if (l1_accept_cur) begin + // Forward data from HUM buffer. + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l1_master_cur; + + // Refill the HUM buffer. Stop when buffer full. + stop_store = ~hum_buf_ready_out; + hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; + axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b0; + w_done = 1'b1; + if (~hum_buf_ready_out | hum_buf_almost_full) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end else begin + hum_buf_SN = STORE; + end + end + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + + // L1 miss - wait for L2 + end else if (l1_save_cur) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_NO; + + // L1 prefetch, prot, multi - drop data + end else if (l1_drop_cur) begin + fifo_select_SN = 1'b0; // L1 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l1_len_cur; + hum_buf_SN = FLUSH; + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + end + end + end + + WAIT_L2_BYPASS_NO : begin + // Do not allow the forwarding of L1 hits. + block_forwarding = 1'b1; + + // Wait for orders from L2 TLB. + if (l2_fifo_valid_out) begin + + // L2 hit - forward first part from HUM buffer, rest from input buffer + if (l2_accept_cur) begin + // Forward data from HUM buffer. + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l2_master_cur; + + // Refill the HUM buffer. Stop when buffer full. + stop_store = ~hum_buf_ready_out; + hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; + axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b1; + w_done = 1'b1; + if (~hum_buf_ready_out | hum_buf_almost_full) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end else begin + hum_buf_SN = STORE; + end + end + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + + // L2 miss/prefetch hit - drop data + end else if (l2_drop_cur) begin + fifo_select_SN = 1'b1; // L2 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l2_len_cur; + hum_buf_SN = FLUSH; + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + end + end + end + + + default: begin + hum_buf_SN = STORE; + end + + endcase // hum_buf_SP + end // HUM_BUFFER_FSM + + assign b_drop_set = 1'b0; + + end else begin // HUM_BUFFER + + // register to perform the handshake with B sender + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + b_drop_o <= 1'b0; + end else if (b_done_i) begin + b_drop_o <= 1'b0; + end else if (b_drop_set) begin + b_drop_o <= 1'b1;; + end + end + + always_comb begin : OUTPUT_CTRL + + fifo_select = 1'b0; + w_done = 1'b0; + b_drop_set = 1'b0; + + m_axi4_wlast = 1'b0; + m_axi4_wdata = 'b0; + m_axi4_wstrb = 'b0; + m_axi4_wuser = 'b0; + + m_axi4_wvalid = 1'b0; + axi4_wready = 1'b0; + + if (l1_fifo_valid_out) begin + // forward data + if (l1_accept_cur) begin + m_axi4_wlast = axi4_wlast; + m_axi4_wdata = axi4_wdata; + m_axi4_wstrb = axi4_wstrb; + m_axi4_wuser = axi4_wuser; + + m_axi4_wvalid = axi4_wvalid; + axi4_wready = m_axi4_wready; + + // Simply pop from FIFO upon last data beat. + w_done = axi4_wlast & axi4_wvalid & axi4_wready; + + // discard entire burst + end else if (b_drop_o == 1'b0) begin + axi4_wready = 1'b1; + + // Simply pop from FIFO upon last data beat. Perform handshake with B sender. + if (axi4_wlast & axi4_wvalid & axi4_wready) + b_drop_set = 1'b1; + end + end + + end // OUTPUT_CTRL + + assign master_select_o = l1_master_cur; + assign l2_fifo_ready_out = 1'b1; + assign block_forwarding = 1'b0; + + // unused signals + assign hum_buf_ready_out = 1'b0; + assign hum_buf_valid_in = 1'b0; + assign hum_buf_ready_in = 1'b0; + assign hum_buf_valid_out = 1'b0; + assign hum_buf_wdata = 'b0; + assign hum_buf_wstrb = 'b0; + assign hum_buf_wlast = 1'b0; + assign hum_buf_wuser = 'b0; + assign hum_buf_drop_len_SN = 'b0; + assign hum_buf_drop_req_SN = 1'b0; + assign hum_buf_almost_full = 1'b0; + + assign l2_fifo_valid_in = 1'b0; + assign l2_fifo_valid_out = 1'b0; + assign l2_prefetch_cur = 1'b0; + assign l2_hit_cur = 1'b0; + assign l2_id_cur = 'b0; + assign l2_len_cur = 'b0; + assign l2_master_cur = 1'b0; + assign l2_accept_cur = 1'b0; + assign l2_drop_cur = 1'b0; + + assign l2_req = 1'b0; + + assign fifo_select_SN = 1'b0; + assign fifo_select_SP = 1'b0; + + assign stop_store = 1'b0; + assign n_wlast_SP = 'b0; + assign wlast_in = 1'b0; + assign wlast_out = 1'b0; + + end // HUM_BUFFER + + endgenerate +""" diff --git a/src/iommu/axi_rab/axi4_w_sender.py b/src/iommu/axi_rab/axi4_w_sender.py new file mode 100644 index 00000000..9916334f --- /dev/null +++ b/src/iommu/axi_rab/axi4_w_sender.py @@ -0,0 +1,78 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_w_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_wdata = Signal() # input + self.s_axi4_wvalid = Signal() # input + self.s_axi4_wready = Signal() # output + self.s_axi4_wstrb = Signal() # input + self.s_axi4_wlast = Signal() # input + self.s_axi4_wuser = Signal() # input + self.m_axi4_wdata = Signal() # output + self.m_axi4_wvalid = Signal() # output + self.m_axi4_wready = Signal() # input + self.m_axi4_wstrb = Signal() # output + self.m_axi4_wlast = Signal() # output + self.m_axi4_wuser = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata) + m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb) + m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast) + m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser) + m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid) + m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_w_sender +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_USER_WIDTH = 2 +# ) +# ( +# input axi4_aclk, +# input axi4_arstn, +# +# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input s_axi4_wvalid, +# output s_axi4_wready, +# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input s_axi4_wlast, +# input [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata, +# output m_axi4_wvalid, +# input m_axi4_wready, +# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, +# output m_axi4_wlast, +# output [AXI_USER_WIDTH-1:0] m_axi4_wuser +# ); +# +# assign m_axi4_wdata = s_axi4_wdata; +# assign m_axi4_wstrb = s_axi4_wstrb; +# assign m_axi4_wlast = s_axi4_wlast; +# assign m_axi4_wuser = s_axi4_wuser; +# +# assign m_axi4_wvalid = s_axi4_wvalid; +# assign s_axi4_wready = m_axi4_wready; +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi_buffer_rab.py b/src/iommu/axi_rab/axi_buffer_rab.py new file mode 100644 index 00000000..b4d99299 --- /dev/null +++ b/src/iommu/axi_rab/axi_buffer_rab.py @@ -0,0 +1,151 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_buffer_rab(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.rstn = Signal() # input + self.data_out = Signal(DATA_WIDTH) # output + self.valid_out = Signal() # output + self.ready_in = Signal() # input + self.valid_in = Signal() # input + self.data_in = Signal(DATA_WIDTH) # input + self.ready_out = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.full.eq(self.None) + m.d.comb += self.data_out.eq(self.None) + m.d.comb += self.valid_out.eq(self.None) + m.d.comb += self.ready_out.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# module axi_buffer_rab +# //#( +# // parameter DATA_WIDTH, +# // parameter BUFFER_DEPTH +# //) +# ( +# input logic clk, +# input logic rstn, +# +# // Downstream port +# output logic [DATA_WIDTH-1:0] data_out, +# output logic valid_out, +# input logic ready_in, +# +# // Upstream port +# input logic valid_in, +# input logic [DATA_WIDTH-1:0] data_in, +# output logic ready_out +# ); +# +# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH); +# +# // Internal data structures +# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote +# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent +# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer +# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0]; +# +# wire full; +# +# integer loop1; +# +# assign full = (elements == BUFFER_DEPTH); +# +# always @(posedge clk or negedge rstn) +# begin: elements_sequential +# if (rstn == 1'b0) +# elements <= 0; +# else +# begin +# // ------------------ +# // Are we filling up? +# // ------------------ +# // One out, none in +# if (ready_in && valid_out && (!valid_in || full)) +# elements <= elements - 1; +# // None out, one in +# else if ((!valid_out || !ready_in) && valid_in && !full) +# elements <= elements + 1; +# // Else, either one out and one in, or none out and none in - stays unchanged +# end +# end +# +# always @(posedge clk or negedge rstn) +# begin: buffers_sequential +# if (rstn == 1'b0) +# begin +# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1) +# buffer[loop1] <= 0; +# end +# else +# begin +# // Update the memory +# if (valid_in && !full) +# buffer[pointer_in] <= data_in; +# end +# end +# +# always @(posedge clk or negedge rstn) +# begin: sequential +# if (rstn == 1'b0) +# begin +# pointer_out <= 0; +# pointer_in <= 0; +# end +# else +# begin +# // ------------------------------------ +# // Check what to do with the input side +# // ------------------------------------ +# // We have some input, increase by 1 the input pointer +# if (valid_in && !full) +# begin +# if (pointer_in == $unsigned(BUFFER_DEPTH - 1)) +# pointer_in <= 0; +# else +# pointer_in <= pointer_in + 1; +# end +# // Else we don't have any input, the input pointer stays the same +# +# // ------------------------------------- +# // Check what to do with the output side +# // ------------------------------------- +# // We had pushed one flit out, we can try to go for the next one +# if (ready_in && valid_out) +# begin +# if (pointer_out == $unsigned(BUFFER_DEPTH - 1)) +# pointer_out <= 0; +# else +# pointer_out <= pointer_out + 1; +# end +# // Else stay on the same output location +# end +# end +# +# // Update output ports +# assign data_out = buffer[pointer_out]; +# assign valid_out = (elements != 0); +# +# assign ready_out = ~full; +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi_buffer_rab_bram.py b/src/iommu/axi_rab/axi_buffer_rab_bram.py new file mode 100644 index 00000000..349b314e --- /dev/null +++ b/src/iommu/axi_rab/axi_buffer_rab_bram.py @@ -0,0 +1,209 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_buffer_rab_bram(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.rstn = Signal() # input + self.data_out = Signal(DATA_WIDTH) # output + self.valid_out = Signal() # output + self.ready_in = Signal() # input + self.valid_in = Signal() # input + self.data_in = Signal(DATA_WIDTH) # input + self.ready_out = Signal() # output + self.almost_full = Signal() # output + self.underfull = Signal() # output + self.drop_req = Signal() # input + self.drop_len = Signal(8) # input + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# ////import CfMath::log2; +# +# module axi_buffer_rab_bram +# //#( +# // parameter DATA_WIDTH, +# // parameter BUFFER_DEPTH +# // ) +# ( +# input logic clk, +# input logic rstn, +# +# // Downstream port +# output logic [DATA_WIDTH-1:0] data_out, +# output logic valid_out, +# input logic ready_in, +# +# // Upstream port +# input logic valid_in, +# input logic [DATA_WIDTH-1:0] data_in, +# output logic ready_out, +# +# // Status and drop control +# output logic almost_full, +# output logic underfull, +# input logic drop_req, +# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0` +# // and `drop_req` means drop one item. +# input logic [7:0] drop_len +# ); +# +""" #docstring_begin + // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior. + // To still push and pop simultaneously if the buffer is full, we internally increase the + // buffer depth by 1. + localparam ACT_BUFFER_DEPTH = BUFFER_DEPTH+1; + localparam ACT_LOG_BUFFER_DEPTH = log2(ACT_BUFFER_DEPTH+1); + + /** + * Internal data structures + */ + // Location to which we last wrote + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q; + // Location from which we last sent + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q; + // Required for fall-through behavior on the first word + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram; + // Number of elements in the buffer. Can be negative if elements that have been dropped have not + // yet been written. + logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q; + + logic [DATA_WIDTH-1:0] data_out_bram, data_out_q; + logic valid_out_q; + + logic full; + + assign almost_full = (n_elems_q == BUFFER_DEPTH-1); + assign full = (n_elems_q == BUFFER_DEPTH); + + always_ff @(posedge clk, negedge rstn) begin + if (~rstn) begin + n_elems_q <= '0; + ptr_in_q <= '0; + ptr_out_q <= '0; + end else begin + n_elems_q <= n_elems_d; + ptr_in_q <= ptr_in_d; + ptr_out_q <= ptr_out_d; + end + end + + // Update the number of elements. + always_comb begin + n_elems_d = n_elems_q; + if (drop_req) begin + n_elems_d -= (drop_len + 1); + end + if (valid_in && ready_out) begin + n_elems_d += 1; + end + if (valid_out && ready_in) begin + n_elems_d -= 1; + end + end + + // Update the output pointer. + always_comb begin + ptr_out_d = ptr_out_q; + if (drop_req) begin + if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin + ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q); + end else begin + ptr_out_d += (drop_len + 1); + end + end + if (valid_out && ready_in) begin + if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin + ptr_out_d = '0; + end else begin + ptr_out_d += 1; + end + end + end + + // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for + // first-word fall-through FIFO behavior. + //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1); + assign ptr_out_bram = ptr_out_d; + + // Update the input pointer. + always_comb begin + ptr_in_d = ptr_in_q; + if (valid_in && ready_out) begin + if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin + ptr_in_d = '0; + end else begin + ptr_in_d += 1; + end + end + end + + // Update output ports. + assign valid_out = (n_elems_q > $signed(0)); + assign underfull = (n_elems_q < $signed(0)); + assign ready_out = ~full; + + ram_tp_write_first #( + .ADDR_WIDTH ( ACT_LOG_BUFFER_DEPTH ), + .DATA_WIDTH ( DATA_WIDTH ) + ) + ram_tp_write_first_0 + ( + .clk ( clk ), + .we ( valid_in & ~full ), + .addr0 ( ptr_in_q ), + .addr1 ( ptr_out_bram ), + .d_i ( data_in ), + .d0_o ( ), + .d1_o ( data_out_bram ) + ); + + // When reading from/writing two the same address on both ports ("Write-Read Collision"), + // the data on the read port is invalid (during the write cycle). In this implementation, + // this can happen only when the buffer is empty. Thus, we forward the data from an + // register in this case. + always @(posedge clk) begin + if (rstn == 1'b0) begin + data_out_q <= 'b0; + end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin + data_out_q <= data_in; + end + end + + always @(posedge clk) begin + if (rstn == 1'b0) begin + valid_out_q <= 'b0; + end else begin + valid_out_q <= valid_out; + end + end + + // Drive output data + always_comb begin + if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO + data_out = data_out_q; + end else begin + data_out = data_out_bram; + end + end + +""" +# endmodule +# +# diff --git a/src/iommu/axi_rab/axi_rab_cfg.py b/src/iommu/axi_rab/axi_rab_cfg.py new file mode 100644 index 00000000..43843b95 --- /dev/null +++ b/src/iommu/axi_rab/axi_rab_cfg.py @@ -0,0 +1,707 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_rab_cfg(Elaboratable): + + def __init__(self): + self.Clk_CI = Signal() # input + self.Rst_RBI = Signal() # input + self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input + self.s_axi_awvalid = Signal() # input + self.s_axi_awready = Signal() # output + self.s_axi_wdata = Signal() # input + self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input + self.s_axi_wvalid = Signal() # input + self.s_axi_wready = Signal() # output + self.s_axi_bresp = Signal(2) # output + self.s_axi_bvalid = Signal() # output + self.s_axi_bready = Signal() # input + self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input + self.s_axi_arvalid = Signal() # input + self.s_axi_arready = Signal() # output + self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi_rresp = Signal(2) # output + self.s_axi_rvalid = Signal() # output + self.s_axi_rready = Signal() # input + self.L1Cfg_DO = Signal() # output + self.L1AllowMultiHit_SO = Signal() # output + self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input + self.MissMeta_DI = Signal(MISS_META_WIDTH) # input + self.Miss_SI = Signal() # input + self.MhFifoFull_SO = Signal() # output + self.wdata_l2 = Signal() # output + self.waddr_l2 = Signal() # output + self.wren_l2 = Signal(N_PORTS) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# // --=========================================================================-- +# // +# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗ +# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝ +# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗ +# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║ +# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝ +# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝ +# // +# // +# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch +# // +# // Purpose : AXI4-Lite configuration and miss handling interface for RAB +# // +# // --=========================================================================-- +# +# //import CfMath::log2; +# +# module axi_rab_cfg +# #( +# parameter N_PORTS = 3, +# parameter N_REGS = 196, +# parameter N_L2_SETS = 32, +# parameter N_L2_SET_ENTRIES= 32, +# parameter ADDR_WIDTH_PHYS = 40, +# parameter ADDR_WIDTH_VIRT = 32, +# parameter N_FLAGS = 4, +# parameter AXI_DATA_WIDTH = 64, +# parameter AXI_ADDR_WIDTH = 32, +# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH +# parameter MH_FIFO_DEPTH = 16 +# ) +# ( +# input logic Clk_CI, +# input logic Rst_RBI, +# +# // AXI Lite interface +# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, +# input logic s_axi_awvalid, +# output logic s_axi_awready, +# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata, +# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, +# input logic s_axi_wvalid, +# output logic s_axi_wready, +# output logic [1:0] s_axi_bresp, +# output logic s_axi_bvalid, +# input logic s_axi_bready, +# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr, +# input logic s_axi_arvalid, +# output logic s_axi_arready, +# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata, +# output logic [1:0] s_axi_rresp, +# output logic s_axi_rvalid, +# input logic s_axi_rready, +# +# // Slice configuration +# output logic [N_REGS-1:0][63:0] L1Cfg_DO, +# output logic L1AllowMultiHit_SO, +# +# // Miss handling +# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI, +# input logic [MISS_META_WIDTH-1:0] MissMeta_DI, +# input logic Miss_SI, +# output logic MhFifoFull_SO, +# +# // L2 TLB +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2, +# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2, +# output logic [N_PORTS-1:0] wren_l2 +# ); +# +""" #docstring_begin + + localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32, + // because RAB slices are 64 bit wide. + localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1; + + localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2 + + localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES; + + localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2; + + logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit] + genvar j; + + // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗ + // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝ + // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗ + // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝ + // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗ + // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝ + // + logic [AXI_ADDR_WIDTH-1:0] awaddr_reg; + logic awaddr_done_rise; + logic awaddr_done_reg; + logic awaddr_done_reg_dly; + + logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg; + logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg; + logic wdata_done_rise; + logic wdata_done_reg; + logic wdata_done_reg_dly; + + logic wresp_done_reg; + logic wresp_running_reg; + + logic [AXI_ADDR_WIDTH-1:0] araddr_reg; + logic araddr_done_reg; + + logic [AXI_DATA_WIDTH-1:0] rdata_reg; + logic rresp_done_reg; + logic rresp_running_reg; + + logic awready; + logic wready; + logic bvalid; + + logic arready; + logic rvalid; + + logic wren; + logic wren_l1; + + assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg ); + assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly; + assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly; + + // reg_dly + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + wdata_done_reg_dly <= 1'b0; + awaddr_done_reg_dly <= 1'b0; + end + else + begin + wdata_done_reg_dly <= wdata_done_reg; + awaddr_done_reg_dly <= awaddr_done_reg; + end + end + + // AW Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + awaddr_done_reg <= 1'b0; + awaddr_reg <= '0; + awready <= 1'b1; + end + else + begin + if (awready && s_axi_awvalid) + begin + awready <= 1'b0; + awaddr_done_reg <= 1'b1; + awaddr_reg <= s_axi_awaddr; + end + else if (awaddr_done_reg && wresp_done_reg) + begin + awready <= 1'b1; + awaddr_done_reg <= 1'b0; + end + end + end + + // W Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + wdata_done_reg <= 1'b0; + wready <= 1'b1; + wdata_reg <= '0; + wstrb_reg <= '0; + end + else + begin + if (wready && s_axi_wvalid) + begin + wready <= 1'b0; + wdata_done_reg <= 1'b1; + wdata_reg <= s_axi_wdata; + wstrb_reg <= s_axi_wstrb; + end + else if (wdata_done_reg && wresp_done_reg) + begin + wready <= 1'b1; + wdata_done_reg <= 1'b0; + end + end + end + + // B Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b0; + wresp_running_reg <= 1'b0; + end + else + begin + if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg) + begin + if (!wresp_running_reg) + begin + bvalid <= 1'b1; + wresp_running_reg <= 1'b1; + end + else if (s_axi_bready) + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b1; + wresp_running_reg <= 1'b0; + end + end + else + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b0; + wresp_running_reg <= 1'b0; + end + end + end + + // AR Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + araddr_done_reg <= 1'b0; + arready <= 1'b1; + araddr_reg <= '0; + end + else + begin + if (arready && s_axi_arvalid) + begin + arready <= 1'b0; + araddr_done_reg <= 1'b1; + araddr_reg <= s_axi_araddr; + end + else if (araddr_done_reg && rresp_done_reg) + begin + arready <= 1'b1; + araddr_done_reg <= 1'b0; + end + end + end + + // R Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + rresp_done_reg <= 1'b0; + rvalid <= 1'b0; + rresp_running_reg <= 1'b0; + end + else + begin + if (araddr_done_reg && !rresp_done_reg) + begin + if (!rresp_running_reg) + begin + rvalid <= 1'b1; + rresp_running_reg <= 1'b1; + end + else if (s_axi_rready) + begin + rvalid <= 1'b0; + rresp_done_reg <= 1'b1; + rresp_running_reg <= 1'b0; + end + end + else + begin + rvalid <= 1'b0; + rresp_done_reg <= 1'b0; + rresp_running_reg <= 1'b0; + end + end + end + + // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗ + // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝ + // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗ + // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║ + // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝ + // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝ + // + assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE); + + always @( posedge Clk_CI or negedge Rst_RBI ) + begin + var integer idx_reg, idx_byte; + if ( Rst_RBI == 1'b0 ) + begin + for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ ) + L1Cfg_DP[idx_reg] <= '0; + end + else if ( wren_l1 ) + begin + if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < 1) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} }; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + end + end // always @ ( posedge Clk_CI or negedge Rst_RBI ) + + generate + // Mask unused bits -> Synthesizer should optimize away unused registers + for( j=0; j= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR); + assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000); + assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000); + end else begin + assign l2_addr_is_in_va_rams[j] = 1'b0; + assign upper_word_is_written[j] = 1'b0; + assign lower_word_is_written[j] = 1'b0; + end + + always @( posedge Clk_CI or negedge Rst_RBI ) begin + var integer idx_byte, off_byte; + if ( Rst_RBI == 1'b0 ) + begin + wren_l2[j] <= 1'b0; + wdata_l2[j] <= '0; + end + else if (wren) + begin + if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) ) + wren_l2[j] <= 1'b1; + if (AXI_DATA_WIDTH == 32) begin + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) + wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}}; + end + else if (AXI_DATA_WIDTH == 64) begin + if (lower_word_is_written[j] == 1'b1) + off_byte = 0; + else + off_byte = 4; + // always put the payload in the lower word and set upper word to 0 + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ ) + wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}}; + wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0; + end + // pragma translate_off + else + $fatal(1, "Unsupported AXI_DATA_WIDTH!"); + // pragma translate_on + end + else + wren_l2[j] <= '0; + end // always @ ( posedge Clk_CI or negedge Rst_RBI ) + + // Properly align the 32-bit word address when writing from 64-bit interface: + // Depending on the system, the incoming address is (non-)aligned to the 64-bit + // word when writing the upper 32-bit word. + always_comb begin + waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4; + if (wren_l2[j]) begin + if (AXI_DATA_WIDTH == 64) begin + if (upper_word_is_written[j] == 1'b1) begin + // address must be non-aligned + waddr_l2[j][0] = 1'b1; + end + end + // pragma translate_off + else if (AXI_DATA_WIDTH != 32) begin + $fatal(1, "Unsupported AXI_DATA_WIDTH!"); + end + // pragma translate_on + end + end + + // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data + // systems. + // pragma translate_off + always_ff @ (posedge Clk_CI) begin + if (AXI_DATA_WIDTH == 64) begin + if (l2_addr_is_in_va_rams[j]) begin + if (upper_word_is_written[j]) begin + assert (!lower_word_is_written[j]) + else $error("Unsupported write across two 32-bit words to VA RAMs!"); + end + else if (lower_word_is_written[j]) begin + assert (!upper_word_is_written[j]) + else $error("Unsupported write across two 32-bit words to VA RAMs!"); + end + end + end + end + // pragma translate_on + + end // for (j=0; j< N_PORTS; j++) + endgenerate + + // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗ + // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝ + // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗ + // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║ + // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║ + // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝ + // + logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D; + logic AddrFifoWen_S; + logic AddrFifoRen_S; + logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D; + logic AddrFifoFull_S; + logic AddrFifoEmpty_S; + logic AddrFifoEmpty_SB; + logic AddrFifoFull_SB; + + logic [MISS_META_WIDTH-1:0] MetaFifoDin_D; + logic MetaFifoWen_S; + logic MetaFifoRen_S; + logic [MISS_META_WIDTH-1:0] MetaFifoDout_D; + logic MetaFifoFull_S; + logic MetaFifoEmpty_S; + logic MetaFifoEmpty_SB; + logic MetaFifoFull_SB; + + logic FifosDisabled_S; + logic ConfRegWen_S; + logic [1:0] ConfReg_DN; + logic [1:0] ConfReg_DP; + + logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec; + + assign FifosDisabled_S = ConfReg_DP[0]; + assign L1AllowMultiHit_SO = ConfReg_DP[1]; + + assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB; + assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB; + + assign AddrFifoFull_S = ~AddrFifoFull_SB; + assign MetaFifoFull_S = ~MetaFifoFull_SB; + + assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S); + + generate + for ( j=0; j +# * Conrad Burchert +# * Maheshwara Sharma +# * Andreas Kurth +# * Johannes Weinbuch +# * Pirmin Vogel +# */ +# +# //`include "pulp_soc_defines.sv" +# +# ////import CfMath::log2; +# +# module axi_rab_top +# +# // Parameters {{{ +# #( +# parameter N_PORTS = 2, +# parameter N_L2_SETS = 32, +# parameter N_L2_SET_ENTRIES = 32, +# parameter AXI_DATA_WIDTH = 64, +# parameter AXI_S_ADDR_WIDTH = 32, +# parameter AXI_M_ADDR_WIDTH = 40, +# parameter AXI_LITE_DATA_WIDTH = 64, +# parameter AXI_LITE_ADDR_WIDTH = 32, +# parameter AXI_ID_WIDTH = 10, +# parameter AXI_USER_WIDTH = 6, +# parameter MH_FIFO_DEPTH = 16 +# ) +# // }}} +# +# // Ports {{{ +# ( +# +# input logic Clk_CI, // This clock may be gated. +# input logic NonGatedClk_CI, +# input logic Rst_RBI, +# +# // For every slave port there are two master ports. The master +# // port to use can be set using the master_select flag of the protection +# // bits of a slice +# +# // AXI4 Slave {{{ +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr, +# input logic [N_PORTS-1:0] s_axi4_awvalid, +# output logic [N_PORTS-1:0] s_axi4_awready, +# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen, +# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize, +# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst, +# input logic [N_PORTS-1:0] s_axi4_awlock, +# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input logic [N_PORTS-1:0] s_axi4_wvalid, +# output logic [N_PORTS-1:0] s_axi4_wready, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input logic [N_PORTS-1:0] s_axi4_wlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp, +# output logic [N_PORTS-1:0] s_axi4_bvalid, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic [N_PORTS-1:0] s_axi4_bready, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid, +# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr, +# input logic [N_PORTS-1:0] s_axi4_arvalid, +# output logic [N_PORTS-1:0] s_axi4_arready, +# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen, +# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize, +# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst, +# input logic [N_PORTS-1:0] s_axi4_arlock, +# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot, +# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp, +# output logic [N_PORTS-1:0] s_axi4_rvalid, +# input logic [N_PORTS-1:0] s_axi4_rready, +# output logic [N_PORTS-1:0] s_axi4_rlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# // }}} +# +# // AXI4 Master 0 {{{ +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr, +# output logic [N_PORTS-1:0] m0_axi4_awvalid, +# input logic [N_PORTS-1:0] m0_axi4_awready, +# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize, +# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst, +# output logic [N_PORTS-1:0] m0_axi4_awlock, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser, +# +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata, +# output logic [N_PORTS-1:0] m0_axi4_wvalid, +# input logic [N_PORTS-1:0] m0_axi4_wready, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb, +# output logic [N_PORTS-1:0] m0_axi4_wlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid, +# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp, +# input logic [N_PORTS-1:0] m0_axi4_bvalid, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser, +# output logic [N_PORTS-1:0] m0_axi4_bready, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr, +# output logic [N_PORTS-1:0] m0_axi4_arvalid, +# input logic [N_PORTS-1:0] m0_axi4_arready, +# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize, +# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst, +# output logic [N_PORTS-1:0] m0_axi4_arlock, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata, +# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp, +# input logic [N_PORTS-1:0] m0_axi4_rvalid, +# output logic [N_PORTS-1:0] m0_axi4_rready, +# input logic [N_PORTS-1:0] m0_axi4_rlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser, +# // }}} +# +# // AXI4 Master 1 {{{ +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr, +# output logic [N_PORTS-1:0] m1_axi4_awvalid, +# input logic [N_PORTS-1:0] m1_axi4_awready, +# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize, +# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst, +# output logic [N_PORTS-1:0] m1_axi4_awlock, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser, +# +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata, +# output logic [N_PORTS-1:0] m1_axi4_wvalid, +# input logic [N_PORTS-1:0] m1_axi4_wready, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb, +# output logic [N_PORTS-1:0] m1_axi4_wlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid, +# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp, +# input logic [N_PORTS-1:0] m1_axi4_bvalid, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser, +# output logic [N_PORTS-1:0] m1_axi4_bready, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr, +# output logic [N_PORTS-1:0] m1_axi4_arvalid, +# input logic [N_PORTS-1:0] m1_axi4_arready, +# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize, +# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst, +# output logic [N_PORTS-1:0] m1_axi4_arlock, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata, +# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp, +# input logic [N_PORTS-1:0] m1_axi4_rvalid, +# output logic [N_PORTS-1:0] m1_axi4_rready, +# input logic [N_PORTS-1:0] m1_axi4_rlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser, +# // }}} +# +# // AXI 4 Lite Slave (Configuration Interface) {{{ +# // AXI4-Lite port to setup the rab slices +# // use this to program the configuration registers +# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr, +# input logic s_axi4lite_awvalid, +# output logic s_axi4lite_awready, +# +# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata, +# input logic s_axi4lite_wvalid, +# output logic s_axi4lite_wready, +# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb, +# +# output logic [1:0] s_axi4lite_bresp, +# output logic s_axi4lite_bvalid, +# input logic s_axi4lite_bready, +# +# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr, +# input logic s_axi4lite_arvalid, +# output logic s_axi4lite_arready, +# +# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata, +# output logic [1:0] s_axi4lite_rresp, +# output logic s_axi4lite_rvalid, +# input logic s_axi4lite_rready, +# // }}} +# +# // BRAMs {{{ +# //`ifdef RAB_AX_LOG_EN +# // BramPort.Slave ArBram_PS, +# // BramPort.Slave AwBram_PS, +# //`endif +# // }}} +# +# // Logger Control {{{ +# //`ifdef RAB_AX_LOG_EN +# // input logic LogEn_SI, +# // input logic ArLogClr_SI, +# // input logic AwLogClr_SI, +# // output logic ArLogRdy_SO, +# // output logic AwLogRdy_SO, +# //`endif +# // }}} +# +# // Interrupt Outputs {{{ +# // Interrupt lines to handle misses, collisions of slices/multiple hits, +# // protection faults and overflow of the miss handling fifo +# //`ifdef RAB_AX_LOG_EN +# // output logic int_ar_log_full, +# // output logic int_aw_log_full, +# //`endif +# output logic [N_PORTS-1:0] int_miss, +# output logic [N_PORTS-1:0] int_multi, +# output logic [N_PORTS-1:0] int_prot, +# output logic int_mhf_full +# // }}} +# +# ); +# +"""#docstring_begin + + // }}} + + // Signals {{{ + // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗ + // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝ + // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗ + // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║ + // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║ + // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝ + // + + // Internal AXI4 lines, these connect buffers on the slave side to the rab core and + // multiplexers which switch between the two master outputs + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr; + logic [N_PORTS-1:0] int_awvalid; + logic [N_PORTS-1:0] int_awready; + logic [N_PORTS-1:0] [7:0] int_awlen; + logic [N_PORTS-1:0] [2:0] int_awsize; + logic [N_PORTS-1:0] [1:0] int_awburst; + logic [N_PORTS-1:0] int_awlock; + logic [N_PORTS-1:0] [2:0] int_awprot; + logic [N_PORTS-1:0] [3:0] int_awcache; + logic [N_PORTS-1:0] [3:0] int_awregion; + logic [N_PORTS-1:0] [3:0] int_awqos; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser; + + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata; + logic [N_PORTS-1:0] int_wvalid; + logic [N_PORTS-1:0] int_wready; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb; + logic [N_PORTS-1:0] int_wlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid; + logic [N_PORTS-1:0] [1:0] int_bresp; + logic [N_PORTS-1:0] int_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser; + logic [N_PORTS-1:0] int_bready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr; + logic [N_PORTS-1:0] int_arvalid; + logic [N_PORTS-1:0] int_arready; + logic [N_PORTS-1:0] [7:0] int_arlen; + logic [N_PORTS-1:0] [2:0] int_arsize; + logic [N_PORTS-1:0] [1:0] int_arburst; + logic [N_PORTS-1:0] int_arlock; + logic [N_PORTS-1:0] [2:0] int_arprot; + logic [N_PORTS-1:0] [3:0] int_arcache; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid; + logic [N_PORTS-1:0] [1:0] int_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata; + logic [N_PORTS-1:0] int_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser; + logic [N_PORTS-1:0] int_rvalid; + logic [N_PORTS-1:0] int_rready; + + // rab_core outputs + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr; + logic [N_PORTS-1:0] int_wtrans_accept; + logic [N_PORTS-1:0] int_wtrans_drop; + logic [N_PORTS-1:0] int_wtrans_miss; + logic [N_PORTS-1:0] int_wtrans_sent; + logic [N_PORTS-1:0] int_wtrans_cache_coherent; + logic [N_PORTS-1:0] int_wmaster_select; + + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr; + logic [N_PORTS-1:0] int_rtrans_accept; + logic [N_PORTS-1:0] int_rtrans_drop; + logic [N_PORTS-1:0] int_rtrans_miss; + logic [N_PORTS-1:0] int_rtrans_sent; + logic [N_PORTS-1:0] int_rtrans_cache_coherent; + logic [N_PORTS-1:0] int_rmaster_select; + + logic [N_PORTS-1:0] w_master_select; + + // Internal master0 AXI4 lines. These connect the first master port to the + // multiplexers + // For channels read address, write address and write data the other lines + // are ignored if valid is not set, therefore we only need to multiplex those + logic [N_PORTS-1:0] int_m0_awvalid; + logic [N_PORTS-1:0] int_m0_awready; + + logic [N_PORTS-1:0] int_m0_wvalid; + logic [N_PORTS-1:0] int_m0_wready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid; + logic [N_PORTS-1:0] [1:0] int_m0_bresp; + logic [N_PORTS-1:0] int_m0_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser; + logic [N_PORTS-1:0] int_m0_bready; + + logic [N_PORTS-1:0] int_m0_arvalid; + logic [N_PORTS-1:0] int_m0_arready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid; + logic [N_PORTS-1:0] [1:0] int_m0_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata; + logic [N_PORTS-1:0] int_m0_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser; + logic [N_PORTS-1:0] int_m0_rready; + logic [N_PORTS-1:0] int_m0_rvalid; + + logic [N_PORTS-1:0] l1_m0_ar_accept; + logic [N_PORTS-1:0] l1_m0_ar_drop; + logic [N_PORTS-1:0] l1_m0_ar_save; + logic [N_PORTS-1:0] l1_m0_ar_done; + logic [N_PORTS-1:0] l2_m0_ar_accept; + logic [N_PORTS-1:0] l2_m0_ar_drop; + logic [N_PORTS-1:0] l2_m0_ar_done; + logic [N_PORTS-1:0] l2_m0_ar_sending; + + logic [N_PORTS-1:0] l1_m0_aw_accept; + logic [N_PORTS-1:0] l1_m0_aw_drop; + logic [N_PORTS-1:0] l1_m0_aw_save; + logic [N_PORTS-1:0] l1_m0_aw_done; + logic [N_PORTS-1:0] l2_m0_aw_accept; + logic [N_PORTS-1:0] l2_m0_aw_drop; + logic [N_PORTS-1:0] l2_m0_aw_done; + logic [N_PORTS-1:0] l2_m0_aw_sending; + + // Internal master1 AXI4 lines. These connect the second master port to the + // multiplexers + // For channels read address, write address and write data the other lines + // are ignored if valid is not set, therefore we only need to multiplex those + logic [N_PORTS-1:0] int_m1_awvalid; + logic [N_PORTS-1:0] int_m1_awready; + + logic [N_PORTS-1:0] int_m1_wvalid; + logic [N_PORTS-1:0] int_m1_wready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid; + logic [N_PORTS-1:0] [1:0] int_m1_bresp; + logic [N_PORTS-1:0] int_m1_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser; + logic [N_PORTS-1:0] int_m1_bready; + + logic [N_PORTS-1:0] int_m1_arvalid; + logic [N_PORTS-1:0] int_m1_arready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid; + logic [N_PORTS-1:0] [1:0] int_m1_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata; + logic [N_PORTS-1:0] int_m1_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser; + logic [N_PORTS-1:0] int_m1_rvalid; + logic [N_PORTS-1:0] int_m1_rready; + + logic [N_PORTS-1:0] l1_m1_ar_accept; + logic [N_PORTS-1:0] l1_m1_ar_drop; + logic [N_PORTS-1:0] l1_m1_ar_save; + logic [N_PORTS-1:0] l1_m1_ar_done; + logic [N_PORTS-1:0] l2_m1_ar_accept; + logic [N_PORTS-1:0] l2_m1_ar_drop; + logic [N_PORTS-1:0] l2_m1_ar_done; + + logic [N_PORTS-1:0] l1_m1_aw_accept; + logic [N_PORTS-1:0] l1_m1_aw_drop; + logic [N_PORTS-1:0] l1_m1_aw_save; + logic [N_PORTS-1:0] l1_m1_aw_done; + logic [N_PORTS-1:0] l2_m1_aw_accept; + logic [N_PORTS-1:0] l2_m1_aw_drop; + logic [N_PORTS-1:0] l2_m1_aw_done; + + // L1 outputs + logic [N_PORTS-1:0] rab_miss; // L1 RAB miss + logic [N_PORTS-1:0] rab_prot; + logic [N_PORTS-1:0] rab_multi; + logic [N_PORTS-1:0] rab_prefetch; + + // + // Signals used to support L2 TLB + // + // L2 RAM configuration signals + logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D; + logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D; + logic [N_PORTS-1:0] L2CfgWE_S; + + // L1 output and drop Buffer + logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP; + logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP; + logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP; + logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP; + logic [N_PORTS-1:0] L1DropEn_S; + logic [N_PORTS-1:0] L1DropPrefetch_S; + + logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP; + + // L2 input Buffer + logic [N_PORTS-1:0] L2InRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP; + logic [N_PORTS-1:0] [7:0] L2InLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP; + logic [N_PORTS-1:0] L2InEn_S; + + // L2 output Buffer + logic [N_PORTS-1:0] L2OutRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP; + logic [N_PORTS-1:0] [7:0] L2OutLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP; + + logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP; + logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP; + logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP; + logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP; + logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP; + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP; + + logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP; + logic [N_PORTS-1:0] L2OutPrefetch_S; + logic [N_PORTS-1:0] L2OutReady_S; + logic [N_PORTS-1:0] L2OutEn_S; + + // L2 outputs + logic [N_PORTS-1:0] L2Busy_S; + logic [N_PORTS-1:0] L2OutValid_S; + + logic [N_PORTS-1:0] L2Miss_S; + + // Signals for interfacing the AXI modules + logic [N_PORTS-1:0] l1_ar_accept; + logic [N_PORTS-1:0] l1_aw_accept; + logic [N_PORTS-1:0] l1_w_accept; + logic [N_PORTS-1:0] l1_xw_accept; + + logic [N_PORTS-1:0] l1_ar_drop; + logic [N_PORTS-1:0] l1_aw_drop; + logic [N_PORTS-1:0] l1_w_drop; + logic [N_PORTS-1:0] l1_xw_drop; + + logic [N_PORTS-1:0] l1_ar_save; + logic [N_PORTS-1:0] l1_aw_save; + logic [N_PORTS-1:0] l1_w_save; + logic [N_PORTS-1:0] l1_xw_save; + + logic [N_PORTS-1:0] l1_ar_done; + logic [N_PORTS-1:0] l1_r_done; + logic [N_PORTS-1:0] l1_r_drop; + logic [N_PORTS-1:0] lx_r_drop; + logic [N_PORTS-1:0] lx_r_done; + + logic [N_PORTS-1:0] l1_aw_done; + logic [N_PORTS-1:0] l1_w_done; + logic [N_PORTS-1:0] l1_xw_done; + logic [N_PORTS-1:0] l1_aw_done_SP; + logic [N_PORTS-1:0] l1_w_done_SP; + + logic [N_PORTS-1:0] l2_ar_accept; + logic [N_PORTS-1:0] l2_aw_accept; + logic [N_PORTS-1:0] l2_w_accept; + logic [N_PORTS-1:0] l2_xw_accept; + + logic [N_PORTS-1:0] l2_ar_drop; + logic [N_PORTS-1:0] l2_r_drop; + logic [N_PORTS-1:0] l2_xr_drop; + logic [N_PORTS-1:0] l2_aw_drop; + logic [N_PORTS-1:0] l2_w_drop; + logic [N_PORTS-1:0] l2_xw_drop; + + logic [N_PORTS-1:0] l2_aw_done; + logic [N_PORTS-1:0] l2_w_done; + logic [N_PORTS-1:0] l2_xw_done; + logic [N_PORTS-1:0] l2_aw_done_SP; + logic [N_PORTS-1:0] l2_w_done_SP; + + logic [N_PORTS-1:0] l2_ar_done; + logic [N_PORTS-1:0] l2_r_done; + logic [N_PORTS-1:0] l2_xr_done; + logic [N_PORTS-1:0] l2_ar_done_SP; + logic [N_PORTS-1:0] l2_r_done_SP; + + logic [N_PORTS-1:0] l1_mx_aw_done; + logic [N_PORTS-1:0] l1_mx_ar_done; + logic [N_PORTS-1:0] l1_m0_aw_done_SP; + logic [N_PORTS-1:0] l1_m0_ar_done_SP; + logic [N_PORTS-1:0] l1_m1_aw_done_SP; + logic [N_PORTS-1:0] l1_m1_ar_done_SP; + + logic [N_PORTS-1:0] l2_mx_aw_done; + logic [N_PORTS-1:0] l2_mx_ar_done; + logic [N_PORTS-1:0] l2_m0_aw_done_SP; + logic [N_PORTS-1:0] l2_m0_ar_done_SP; + logic [N_PORTS-1:0] l2_m1_aw_done_SP; + logic [N_PORTS-1:0] l2_m1_ar_done_SP; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop; + logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop; + logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop; + logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop; + + logic [N_PORTS-1:0] b_drop; + logic [N_PORTS-1:0] b_done; + + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr; + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr; + + logic [N_PORTS-1:0] l2_cache_coherent; + logic [N_PORTS-1:0] l2_master_select; + + logic [N_PORTS-1:0] aw_in_stall; + logic [N_PORTS-1:0] aw_out_stall; + + genvar i; + + // RRESP FSM + typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t; + r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP; + logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP; + logic [N_PORTS-1:0] RRespBurst_S; + logic [N_PORTS-1:0] RRespSelIm_S; + + // }}} + + // Local parameters {{{ + + // Enable L2 for select ports + localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY; + + // L2TLB parameters + localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13; + + // }}} + + // Derive `master_select` from cache coherency flag. {{{ + `ifdef EN_ACP + assign int_wmaster_select = int_wtrans_cache_coherent; + assign int_rmaster_select = int_rtrans_cache_coherent; + assign l2_master_select = l2_cache_coherent; + `else + assign int_wmaster_select = '0; + assign int_rmaster_select = '0; + assign l2_master_select = '0; + `endif + // }}} + + // Buf and Send {{{ + // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗ + // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗ + // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║ + // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║ + // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝ + // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝ + // + logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst; + logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst; + + generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND + + // Write Address channel (aw) {{{ + /* + * write address channel (aw) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ + * + */ + + axi4_aw_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_aw_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_awid ( s_axi4_awid[i] ), + .s_axi4_awaddr ( s_axi4_awaddr[i] ), + .s_axi4_awvalid ( s_axi4_awvalid[i] ), + .s_axi4_awready ( s_axi4_awready[i] ), + .s_axi4_awlen ( s_axi4_awlen[i] ), + .s_axi4_awsize ( s_axi4_awsize[i] ), + .s_axi4_awburst ( s_axi4_awburst[i] ), + .s_axi4_awlock ( s_axi4_awlock[i] ), + .s_axi4_awprot ( s_axi4_awprot[i] ), + .s_axi4_awcache ( s_axi4_awcache[i] ), + .s_axi4_awregion ( s_axi4_awregion[i] ), + .s_axi4_awqos ( s_axi4_awqos[i] ), + .s_axi4_awuser ( s_axi4_awuser[i] ), + .m_axi4_awid ( int_awid[i] ), + .m_axi4_awaddr ( int_awaddr[i] ), + .m_axi4_awvalid ( int_awvalid[i] ), + .m_axi4_awready ( int_awready[i] ), + .m_axi4_awlen ( int_awlen[i] ), + .m_axi4_awsize ( int_awsize[i] ), + .m_axi4_awburst ( int_awburst[i] ), + .m_axi4_awlock ( int_awlock[i] ), + .m_axi4_awprot ( int_awprot[i] ), + .m_axi4_awcache ( int_awcache[i] ), + .m_axi4_awregion ( int_awregion[i] ), + .m_axi4_awqos ( int_awqos[i] ), + .m_axi4_awuser ( int_awuser[i] ) + ); + + axi4_aw_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_aw_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m0_aw_done[i] ), + .l1_accept_i ( l1_m0_aw_accept[i] ), + .l1_drop_i ( l1_m0_aw_drop[i] ), + .l1_save_i ( l1_m0_aw_save[i] ), + .l2_done_o ( l2_m0_aw_done[i] ), + .l2_accept_i ( l2_m0_aw_accept[i] ), + .l2_drop_i ( l2_m0_aw_drop[i] ), + .l2_sending_o ( l2_m0_aw_sending[i] ), + .l1_awaddr_i ( int_wtrans_addr[i] ), + .l2_awaddr_i ( l2_aw_addr[i] ), + .s_axi4_awid ( int_awid[i] ), + .s_axi4_awvalid ( int_m0_awvalid[i] ), + .s_axi4_awready ( int_m0_awready[i] ), + .s_axi4_awlen ( int_awlen[i] ), + .s_axi4_awsize ( int_awsize[i] ), + .s_axi4_awburst ( int_awburst[i] ), + .s_axi4_awlock ( int_awlock[i] ), + .s_axi4_awprot ( int_awprot[i] ), + .s_axi4_awcache ( int_awcache[i] ), + .s_axi4_awregion ( int_awregion[i] ), + .s_axi4_awqos ( int_awqos[i] ), + .s_axi4_awuser ( int_awuser[i] ), + .m_axi4_awid ( m0_axi4_awid[i] ), + .m_axi4_awaddr ( m0_axi4_awaddr[i] ), + .m_axi4_awvalid ( m0_axi4_awvalid[i] ), + .m_axi4_awready ( m0_axi4_awready[i] ), + .m_axi4_awlen ( m0_axi4_awlen[i] ), + .m_axi4_awsize ( m0_axi4_awsize[i] ), + .m_axi4_awburst ( m0_axi4_awburst[i] ), + .m_axi4_awlock ( m0_axi4_awlock[i] ), + .m_axi4_awprot ( m0_axi4_awprot[i] ), + .m_axi4_awcache ( ), + .m_axi4_awregion ( m0_axi4_awregion[i] ), + .m_axi4_awqos ( m0_axi4_awqos[i] ), + .m_axi4_awuser ( m0_axi4_awuser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00); + `ifndef EN_ACP + always_comb begin + if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin + if (m0_write_is_burst[i]) begin + m0_axi4_awcache[i] = 4'b0111; + end else begin + m0_axi4_awcache[i] = 4'b1111; + end + end else begin + m0_axi4_awcache[i] = 4'b0011; + end + end + `else + assign m0_axi4_awcache[i] = 4'b0011; + `endif + + axi4_aw_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_aw_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_accept_i ( l1_m1_aw_accept[i] ), + .l1_drop_i ( l1_m1_aw_drop[i] ), + .l1_save_i ( l1_m1_aw_save[i] ), + .l1_done_o ( l1_m1_aw_done[i] ), + .l2_accept_i ( l2_m1_aw_accept[i] ), + .l2_drop_i ( l2_m1_aw_drop[i] ), + .l2_done_o ( l2_m1_aw_done[i] ), + .l2_sending_o ( ), // just helps to set axcache + .l1_awaddr_i ( int_wtrans_addr[i] ), + .l2_awaddr_i ( l2_aw_addr[i] ), + .s_axi4_awid ( int_awid[i] ), + .s_axi4_awvalid ( int_m1_awvalid[i] ), + .s_axi4_awready ( int_m1_awready[i] ), + .s_axi4_awlen ( int_awlen[i] ), + .s_axi4_awsize ( int_awsize[i] ), + .s_axi4_awburst ( int_awburst[i] ), + .s_axi4_awlock ( int_awlock[i] ), + .s_axi4_awprot ( int_awprot[i] ), + .s_axi4_awcache ( int_awcache[i] ), + .s_axi4_awregion ( int_awregion[i] ), + .s_axi4_awqos ( int_awqos[i] ), + .s_axi4_awuser ( int_awuser[i] ), + .m_axi4_awid ( m1_axi4_awid[i] ), + .m_axi4_awaddr ( m1_axi4_awaddr[i] ), + .m_axi4_awvalid ( m1_axi4_awvalid[i] ), + .m_axi4_awready ( m1_axi4_awready[i] ), + .m_axi4_awlen ( m1_axi4_awlen[i] ), + .m_axi4_awsize ( m1_axi4_awsize[i] ), + .m_axi4_awburst ( m1_axi4_awburst[i] ), + .m_axi4_awlock ( m1_axi4_awlock[i] ), + .m_axi4_awprot ( m1_axi4_awprot[i] ), + .m_axi4_awcache ( ), + .m_axi4_awregion ( m1_axi4_awregion[i] ), + .m_axi4_awqos ( m1_axi4_awqos[i] ), + .m_axi4_awuser ( m1_axi4_awuser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00); + `ifdef EN_ACP + always_comb begin + if (m1_write_is_burst[i]) begin + m1_axi4_awcache[i] = 4'b1011; + end else begin + m1_axi4_awcache[i] = 4'b1111; + end + end + `else + assign m1_axi4_awcache[i] = 4'b0011; + `endif + + // }}} + + // Write Data channel (w) {{{ + /* + * write data channel (w) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ + * + */ + axi4_w_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ), + .HUM_BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) + ) + u_w_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + + // L1 interface + .l1_done_o ( l1_w_done[i] ), + .l1_accept_i ( l1_w_accept[i] ), + .l1_save_i ( l1_w_save[i] ), + .l1_drop_i ( l1_w_drop[i] ), + .l1_master_i ( int_wmaster_select[i] ), + .l1_id_i ( l1_id_drop[i] ), + .l1_len_i ( l1_len_drop[i] ), + .l1_prefetch_i ( l1_prefetch_drop[i] ), + .l1_hit_i ( l1_hit_drop[i] ), + + // L2 interface + .l2_done_o ( l2_w_done[i] ), + .l2_accept_i ( l2_w_accept[i] ), + .l2_drop_i ( l2_w_drop[i] ), + .l2_master_i ( l2_master_select[i] ), + .l2_id_i ( lx_id_drop[i] ), + .l2_len_i ( lx_len_drop[i] ), + .l2_prefetch_i ( lx_prefetch_drop[i] ), + .l2_hit_i ( lx_hit_drop[i] ), + + // Top-level control outputs + .master_select_o ( w_master_select[i] ), + .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full + .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible + + // B sender interface + .b_drop_o ( b_drop[i] ), + .b_done_i ( b_done[i] ), + .id_o ( b_id_drop[i] ), + .prefetch_o ( b_prefetch_drop[i] ), + .hit_o ( b_hit_drop[i] ), + + // AXI W channel interfaces + .s_axi4_wdata ( s_axi4_wdata[i] ), + .s_axi4_wvalid ( s_axi4_wvalid[i] ), + .s_axi4_wready ( s_axi4_wready[i] ), + .s_axi4_wstrb ( s_axi4_wstrb[i] ), + .s_axi4_wlast ( s_axi4_wlast[i] ), + .s_axi4_wuser ( s_axi4_wuser[i] ), + .m_axi4_wdata ( int_wdata[i] ), + .m_axi4_wvalid ( int_wvalid[i] ), + .m_axi4_wready ( int_wready[i] ), + .m_axi4_wstrb ( int_wstrb[i] ), + .m_axi4_wlast ( int_wlast[i] ), + .m_axi4_wuser ( int_wuser[i] ) + ); + + axi4_w_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_w_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_wdata ( int_wdata[i] ), + .s_axi4_wvalid ( int_m0_wvalid[i] ), + .s_axi4_wready ( int_m0_wready[i] ), + .s_axi4_wstrb ( int_wstrb[i] ), + .s_axi4_wlast ( int_wlast[i] ), + .s_axi4_wuser ( int_wuser[i] ), + .m_axi4_wdata ( m0_axi4_wdata[i] ), + .m_axi4_wvalid ( m0_axi4_wvalid[i] ), + .m_axi4_wready ( m0_axi4_wready[i] ), + .m_axi4_wstrb ( m0_axi4_wstrb[i] ), + .m_axi4_wlast ( m0_axi4_wlast[i] ), + .m_axi4_wuser ( m0_axi4_wuser[i] ) + ); + + axi4_w_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + + ) + u_w_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_wdata ( int_wdata[i] ), + .s_axi4_wvalid ( int_m1_wvalid[i] ), + .s_axi4_wready ( int_m1_wready[i] ), + .s_axi4_wstrb ( int_wstrb[i] ), + .s_axi4_wlast ( int_wlast[i] ), + .s_axi4_wuser ( int_wuser[i] ), + .m_axi4_wdata ( m1_axi4_wdata[i] ), + .m_axi4_wvalid ( m1_axi4_wvalid[i] ), + .m_axi4_wready ( m1_axi4_wready[i] ), + .m_axi4_wstrb ( m1_axi4_wstrb[i] ), + .m_axi4_wlast ( m1_axi4_wlast[i] ), + .m_axi4_wuser ( m1_axi4_wuser[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the write data (w) channel + */ + always_comb begin + /* Only one output can be selected at any time */ + if (w_master_select[i] == 1'b0) begin + int_m0_wvalid[i] = int_wvalid[i]; + int_m1_wvalid[i] = 1'b0; + int_wready[i] = int_m0_wready[i]; + end else begin + int_m0_wvalid[i] = 1'b0; + int_m1_wvalid[i] = int_wvalid[i]; + int_wready[i] = int_m1_wready[i]; + end + end + + // }}} + + // Write Response channel (b) {{{ + /* + * write response channel (b) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ + * + */ + axi4_b_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_buffer_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_bid ( int_m0_bid[i] ), + .s_axi4_bresp ( int_m0_bresp[i] ), + .s_axi4_bvalid ( int_m0_bvalid[i] ), + .s_axi4_buser ( int_m0_buser[i] ), + .s_axi4_bready ( int_m0_bready[i] ), + .m_axi4_bid ( m0_axi4_bid[i] ), + .m_axi4_bresp ( m0_axi4_bresp[i] ), + .m_axi4_bvalid ( m0_axi4_bvalid[i] ), + .m_axi4_buser ( m0_axi4_buser[i] ), + .m_axi4_bready ( m0_axi4_bready[i] ) + ); + + axi4_b_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_buffer_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_bid ( int_m1_bid[i] ), + .s_axi4_bresp ( int_m1_bresp[i] ), + .s_axi4_bvalid ( int_m1_bvalid[i] ), + .s_axi4_buser ( int_m1_buser[i] ), + .s_axi4_bready ( int_m1_bready[i] ), + .m_axi4_bid ( m1_axi4_bid[i] ), + .m_axi4_bresp ( m1_axi4_bresp[i] ), + .m_axi4_bvalid ( m1_axi4_bvalid[i] ), + .m_axi4_buser ( m1_axi4_buser[i] ), + .m_axi4_bready ( m1_axi4_bready[i] ) + ); + + axi4_b_sender + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_sender + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .drop_i ( b_drop[i] ), + .done_o ( b_done[i] ), + .id_i ( b_id_drop[i] ), + .prefetch_i ( b_prefetch_drop[i] ), + .hit_i ( b_hit_drop[i] ), + .s_axi4_bid ( s_axi4_bid[i] ), + .s_axi4_bresp ( s_axi4_bresp[i] ), + .s_axi4_bvalid ( s_axi4_bvalid[i] ), + .s_axi4_buser ( s_axi4_buser[i] ), + .s_axi4_bready ( s_axi4_bready[i] ), + .m_axi4_bid ( int_bid[i] ), + .m_axi4_bresp ( int_bresp[i] ), + .m_axi4_bvalid ( int_bvalid[i] ), + .m_axi4_buser ( int_buser[i] ), + .m_axi4_bready ( int_bready[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the write response (b) channel + */ + always_comb begin + /* Output 1 always gets priority, so if it has something to send connect + it and let output 0 wait using rready = 0 */ + if (int_m1_bvalid[i] == 1'b1) begin + int_m0_bready[i] = 1'b0; + int_m1_bready[i] = int_bready[i]; + + int_bid[i] = int_m1_bid[i]; + int_bresp[i] = int_m1_bresp[i]; + int_buser[i] = int_m1_buser[i]; + int_bvalid[i] = int_m1_bvalid[i]; + end else begin + int_m0_bready[i] = int_bready[i]; + int_m1_bready[i] = 1'b0; + + int_bid[i] = int_m0_bid[i]; + int_bresp[i] = int_m0_bresp[i]; + int_buser[i] = int_m0_buser[i]; + int_bvalid[i] = int_m0_bvalid[i]; + end + end + + // }}} + + // Read Address channel (ar) {{{ + /* + * read address channel (ar) + * + * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗ + * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ + * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝ + * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗ + * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║ + * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ + * + */ + axi4_ar_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_ar_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_arid ( s_axi4_arid[i] ), + .s_axi4_araddr ( s_axi4_araddr[i] ), + .s_axi4_arvalid ( s_axi4_arvalid[i] ), + .s_axi4_arready ( s_axi4_arready[i] ), + .s_axi4_arlen ( s_axi4_arlen[i] ), + .s_axi4_arsize ( s_axi4_arsize[i] ), + .s_axi4_arburst ( s_axi4_arburst[i] ), + .s_axi4_arlock ( s_axi4_arlock[i] ), + .s_axi4_arprot ( s_axi4_arprot[i] ), + .s_axi4_arcache ( s_axi4_arcache[i] ), + .s_axi4_aruser ( s_axi4_aruser[i] ), + .m_axi4_arid ( int_arid[i] ), + .m_axi4_araddr ( int_araddr[i] ), + .m_axi4_arvalid ( int_arvalid[i] ), + .m_axi4_arready ( int_arready[i] ), + .m_axi4_arlen ( int_arlen[i] ), + .m_axi4_arsize ( int_arsize[i] ), + .m_axi4_arburst ( int_arburst[i] ), + .m_axi4_arlock ( int_arlock[i] ), + .m_axi4_arprot ( int_arprot[i] ), + .m_axi4_arcache ( int_arcache[i] ), + .m_axi4_aruser ( int_aruser[i] ) + ); + + axi4_ar_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_ar_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m0_ar_done[i] ), + .l1_accept_i ( l1_m0_ar_accept[i] ), + .l1_drop_i ( l1_m0_ar_drop[i] ), + .l1_save_i ( l1_m0_ar_save[i] ), + .l2_done_o ( l2_m0_ar_done[i] ), + .l2_accept_i ( l2_m0_ar_accept[i] ), + .l2_drop_i ( l2_m0_ar_drop[i] ), + .l2_sending_o ( l2_m0_ar_sending[i] ), + .l1_araddr_i ( int_rtrans_addr[i] ), + .l2_araddr_i ( l2_ar_addr[i] ), + .s_axi4_arid ( int_arid[i] ), + .s_axi4_arvalid ( int_m0_arvalid[i] ), + .s_axi4_arready ( int_m0_arready[i] ), + .s_axi4_arlen ( int_arlen[i] ), + .s_axi4_arsize ( int_arsize[i] ), + .s_axi4_arburst ( int_arburst[i] ), + .s_axi4_arlock ( int_arlock[i] ), + .s_axi4_arprot ( int_arprot[i] ), + .s_axi4_arcache ( int_arcache[i] ), + .s_axi4_aruser ( int_aruser[i] ), + .m_axi4_arid ( m0_axi4_arid[i] ), + .m_axi4_araddr ( m0_axi4_araddr[i] ), + .m_axi4_arvalid ( m0_axi4_arvalid[i] ), + .m_axi4_arready ( m0_axi4_arready[i] ), + .m_axi4_arlen ( m0_axi4_arlen[i] ), + .m_axi4_arsize ( m0_axi4_arsize[i] ), + .m_axi4_arburst ( m0_axi4_arburst[i] ), + .m_axi4_arlock ( m0_axi4_arlock[i] ), + .m_axi4_arprot ( m0_axi4_arprot[i] ), + .m_axi4_arcache ( ), + .m_axi4_aruser ( m0_axi4_aruser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00); + `ifndef EN_ACP + always_comb begin + if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin + if (m0_read_is_burst[i]) begin + m0_axi4_arcache[i] = 4'b1011; + end else begin + m0_axi4_arcache[i] = 4'b1111; + end + end else begin + m0_axi4_arcache[i] = 4'b0011; + end + end + `else + assign m0_axi4_arcache[i] = 4'b0011; + `endif + + axi4_ar_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_ar_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m1_ar_done[i] ), + .l1_accept_i ( l1_m1_ar_accept[i] ), + .l1_drop_i ( l1_m1_ar_drop[i] ), + .l1_save_i ( l1_m1_ar_save[i] ), + .l2_done_o ( l2_m1_ar_done[i] ), + .l2_accept_i ( l2_m1_ar_accept[i] ), + .l2_drop_i ( l2_m1_ar_drop[i] ), + .l2_sending_o ( ), // just helps to set axcache + .l1_araddr_i ( int_rtrans_addr[i] ), + .l2_araddr_i ( l2_ar_addr[i] ), + .s_axi4_arid ( int_arid[i] ), + .s_axi4_arvalid ( int_m1_arvalid[i] ), + .s_axi4_arready ( int_m1_arready[i] ), + .s_axi4_arlen ( int_arlen[i] ), + .s_axi4_arsize ( int_arsize[i] ), + .s_axi4_arburst ( int_arburst[i] ), + .s_axi4_arlock ( int_arlock[i] ), + .s_axi4_arprot ( int_arprot[i] ), + .s_axi4_arcache ( int_arcache[i] ), + .s_axi4_aruser ( int_aruser[i] ), + .m_axi4_arid ( m1_axi4_arid[i] ), + .m_axi4_araddr ( m1_axi4_araddr[i] ), + .m_axi4_arvalid ( m1_axi4_arvalid[i] ), + .m_axi4_arready ( m1_axi4_arready[i] ), + .m_axi4_arlen ( m1_axi4_arlen[i] ), + .m_axi4_arsize ( m1_axi4_arsize[i] ), + .m_axi4_arburst ( m1_axi4_arburst[i] ), + .m_axi4_arlock ( m1_axi4_arlock[i] ), + .m_axi4_arprot ( m1_axi4_arprot[i] ), + .m_axi4_arcache ( ), + .m_axi4_aruser ( m1_axi4_aruser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00); + `ifdef EN_ACP + always_comb begin + if (m1_read_is_burst[i]) begin + m1_axi4_arcache[i] = 4'b1011; + end else begin + m1_axi4_arcache[i] = 4'b1111; + end + end + `else + assign m1_axi4_arcache[i] = 4'b0011; + `endif + + // }}} + + // Read Response channel (r) {{{ + /* + * read response channel (r) + * + * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗ + * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗ + * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝ + * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ + * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║ + * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ + * + */ + axi4_r_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_buffer_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_rid ( int_m0_rid[i] ), + .s_axi4_rresp ( int_m0_rresp[i] ), + .s_axi4_rdata ( int_m0_rdata[i] ), + .s_axi4_rlast ( int_m0_rlast[i] ), + .s_axi4_rvalid ( int_m0_rvalid[i] ), + .s_axi4_ruser ( int_m0_ruser[i] ), + .s_axi4_rready ( int_m0_rready[i] ), + .m_axi4_rid ( m0_axi4_rid[i] ), + .m_axi4_rresp ( m0_axi4_rresp[i] ), + .m_axi4_rdata ( m0_axi4_rdata[i] ), + .m_axi4_rlast ( m0_axi4_rlast[i] ), + .m_axi4_rvalid ( m0_axi4_rvalid[i] ), + .m_axi4_ruser ( m0_axi4_ruser[i] ), + .m_axi4_rready ( m0_axi4_rready[i] ) + ); + + axi4_r_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_buffer_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_rid ( int_m1_rid[i] ), + .s_axi4_rresp ( int_m1_rresp[i] ), + .s_axi4_rdata ( int_m1_rdata[i] ), + .s_axi4_rlast ( int_m1_rlast[i] ), + .s_axi4_rvalid ( int_m1_rvalid[i] ), + .s_axi4_ruser ( int_m1_ruser[i] ), + .s_axi4_rready ( int_m1_rready[i] ), + .m_axi4_rid ( m1_axi4_rid[i] ), + .m_axi4_rresp ( m1_axi4_rresp[i] ), + .m_axi4_rdata ( m1_axi4_rdata[i] ), + .m_axi4_rlast ( m1_axi4_rlast[i] ), + .m_axi4_rvalid ( m1_axi4_rvalid[i] ), + .m_axi4_ruser ( m1_axi4_ruser[i] ), + .m_axi4_rready ( m1_axi4_rready[i] ) + ); + + axi4_r_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_sender + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .drop_i ( lx_r_drop[i] ), + .drop_len_i ( lx_len_drop[i] ), + .done_o ( lx_r_done[i] ), + .id_i ( lx_id_drop[i] ), + .prefetch_i ( lx_prefetch_drop[i] ), + .hit_i ( lx_hit_drop[i] ), + .s_axi4_rid ( s_axi4_rid[i] ), + .s_axi4_rresp ( s_axi4_rresp[i] ), + .s_axi4_rdata ( s_axi4_rdata[i] ), + .s_axi4_rlast ( s_axi4_rlast[i] ), + .s_axi4_rvalid ( s_axi4_rvalid[i] ), + .s_axi4_ruser ( s_axi4_ruser[i] ), + .s_axi4_rready ( s_axi4_rready[i] ), + .m_axi4_rid ( int_rid[i] ), + .m_axi4_rresp ( int_rresp[i] ), + .m_axi4_rdata ( int_rdata[i] ), + .m_axi4_rlast ( int_rlast[i] ), + .m_axi4_rvalid ( int_rvalid[i] ), + .m_axi4_ruser ( int_ruser[i] ), + .m_axi4_rready ( int_rready[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the read response(r) channel + * + * Do not perform read burst interleaving as the DMA does not support it. This means we can only + * switch between the two masters upon sending rlast or when idle. + * + * However, if the downstream already performs burst interleaving, this cannot be undone here. + * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this + * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving + * after such an event, it gives priority to the master which received the last burst in case + * both have a have a burst ready (rvalid). + * + * Order of priority: + * 1. Ongoing burst transaction + * 2. Single-beat transaction on Master 1. + * 3. Single-beat transaction on Master 0. + * 4. Burst transaction on master that received the last burst. + */ + // Select signal + always_ff @(posedge Clk_CI) begin + if (Rst_RBI == 0) begin + RRespSel_SP[i] <= 1'b0; + end else begin + RRespSel_SP[i] <= RRespSel_SN[i]; + end + end + + // FSM + always_comb begin : RRespMuxFsm + RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i]; + RRespSel_SN[i] = RRespSel_SP[i]; + + RRespBurst_S[i] = 1'b0; + RRespSelIm_S[i] = 1'b0; + + unique case (RRespMuxCtrl_SP[i]) + + IDLE: begin + // immediately forward single-beat transactions + if (int_m1_rvalid[i] && int_m1_rlast[i]) + RRespSelIm_S[i] = 1'b1; + else if (int_m0_rvalid[i] && int_m0_rlast[i]) + RRespSelIm_S[i] = 1'b0; + + // bursts - they also start immediately + else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin + RRespMuxCtrl_SN[i] = BUSY; + + // in case both are ready, continue with the master that had the last burst + if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin + RRespSel_SN[i] = RRespSel_SP[i]; + RRespSelIm_S[i] = RRespSel_SP[i]; + end else if (int_m1_rvalid[i]) begin + RRespSel_SN[i] = 1'b1; + RRespSelIm_S[i] = 1'b1; + end else begin + RRespSel_SN[i] = 1'b0; + RRespSelIm_S[i] = 1'b0; + end + end + end + + BUSY: begin + RRespBurst_S[i] = 1'b1; + // detect last handshake of currently ongoing transfer + if (int_rvalid[i] && int_rready[i] && int_rlast[i]) + RRespMuxCtrl_SN[i] = IDLE; + end + + default: begin + RRespMuxCtrl_SN[i] = IDLE; + end + + endcase + end + + // FSM state + always_ff @(posedge Clk_CI) begin + if (Rst_RBI == 0) begin + RRespMuxCtrl_SP[i] <= IDLE; + end else begin + RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i]; + end + end + + // Actual multiplexer + always_comb begin + if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin + int_m0_rready[i] = 1'b0; + int_m1_rready[i] = int_rready[i]; + + int_rid[i] = int_m1_rid[i]; + int_rresp[i] = int_m1_rresp[i]; + int_rdata[i] = int_m1_rdata[i]; + int_rlast[i] = int_m1_rlast[i]; + int_ruser[i] = int_m1_ruser[i]; + int_rvalid[i] = int_m1_rvalid[i]; + end else begin + int_m0_rready[i] = int_rready[i]; + int_m1_rready[i] = 1'b0; + + int_rid[i] = int_m0_rid[i]; + int_rresp[i] = int_m0_rresp[i]; + int_rdata[i] = int_m0_rdata[i]; + int_rlast[i] = int_m0_rlast[i]; + int_ruser[i] = int_m0_ruser[i]; + int_rvalid[i] = int_m0_rvalid[i]; + end + end + + end // BUF & SEND + + // }}} + + endgenerate // BUF & SEND }}} + + // Log {{{ + +`ifdef RAB_AX_LOG_EN + AxiBramLogger + #( + .AXI_ID_BITW ( AXI_ID_WIDTH ), + .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), + .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) + ) + u_aw_logger + ( + .Clk_CI ( NonGatedClk_CI ), + .TimestampClk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + .AxiValid_SI ( s_axi4_awvalid[1] ), + .AxiReady_SI ( s_axi4_awready[1] ), + .AxiId_DI ( s_axi4_awid[1] ), + .AxiAddr_DI ( s_axi4_awaddr[1] ), + .AxiLen_DI ( s_axi4_awlen[1] ), + .Clear_SI ( AwLogClr_SI ), + .LogEn_SI ( LogEn_SI ), + .Full_SO ( int_aw_log_full ), + .Ready_SO ( AwLogRdy_SO ), + .Bram_PS ( AwBram_PS ) + ); + + AxiBramLogger + #( + .AXI_ID_BITW ( AXI_ID_WIDTH ), + .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), + .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) + ) + u_ar_logger + ( + .Clk_CI ( NonGatedClk_CI ), + .TimestampClk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + .AxiValid_SI ( s_axi4_arvalid[1] ), + .AxiReady_SI ( s_axi4_arready[1] ), + .AxiId_DI ( s_axi4_arid[1] ), + .AxiAddr_DI ( s_axi4_araddr[1] ), + .AxiLen_DI ( s_axi4_arlen[1] ), + .Clear_SI ( ArLogClr_SI ), + .LogEn_SI ( LogEn_SI ), + .Full_SO ( int_ar_log_full ), + .Ready_SO ( ArLogRdy_SO ), + .Bram_PS ( ArBram_PS ) + ); +`endif + + // }}} + + // RAB Core {{{ + // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗ + // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝ + // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗ + // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝ + // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗ + // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ + // + /* + * rab_core + * + * The rab core translates addresses. It has two ports, which can be used + * independently, however they will compete for time internally, as lookups + * are serialized. + * + * type is the read(0) or write(1) used to check the protection flags. If they + * don't match an interrupt is created on the int_prot line. + */ + + rab_core + #( + .N_PORTS ( N_PORTS ), + .N_L2_SETS ( N_L2_SETS ), + .N_L2_SET_ENTRIES ( N_L2_SET_ENTRIES ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), + .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .MH_FIFO_DEPTH ( MH_FIFO_DEPTH ) + ) + u_rab_core + ( + .Clk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + + // Config IF + .s_axi_awaddr ( s_axi4lite_awaddr ), + .s_axi_awvalid ( s_axi4lite_awvalid ), + .s_axi_awready ( s_axi4lite_awready ), + .s_axi_wdata ( s_axi4lite_wdata ), + .s_axi_wstrb ( s_axi4lite_wstrb ), + .s_axi_wvalid ( s_axi4lite_wvalid ), + .s_axi_wready ( s_axi4lite_wready ), + .s_axi_bresp ( s_axi4lite_bresp ), + .s_axi_bvalid ( s_axi4lite_bvalid ), + .s_axi_bready ( s_axi4lite_bready ), + .s_axi_araddr ( s_axi4lite_araddr ), + .s_axi_arvalid ( s_axi4lite_arvalid ), + .s_axi_arready ( s_axi4lite_arready ), + .s_axi_rready ( s_axi4lite_rready ), + .s_axi_rdata ( s_axi4lite_rdata ), + .s_axi_rresp ( s_axi4lite_rresp ), + .s_axi_rvalid ( s_axi4lite_rvalid ), + + // L1 miss info outputs -> L2 TLB arbitration + .int_miss ( rab_miss ), + .int_multi ( rab_multi ), + .int_prot ( rab_prot ), + .int_prefetch ( rab_prefetch ), + .int_mhf_full ( int_mhf_full ), + + // L1 transaction info outputs -> L2 TLB arbitration + .int_axaddr_o ( L1OutAddr_D ), + .int_axid_o ( L1OutId_D ), + .int_axlen_o ( L1OutLen_D ), + .int_axuser_o ( L1OutUser_D ), + + // Write Req IF + .port1_addr ( int_awaddr ), + .port1_id ( int_awid ), + .port1_len ( int_awlen ), + .port1_size ( int_awsize ), + .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests + .port1_type ( {N_PORTS{1'b1}} ), + .port1_user ( int_awuser ), + .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM + .port1_out_addr ( int_wtrans_addr ), + .port1_cache_coherent ( int_wtrans_cache_coherent ), + .port1_accept ( int_wtrans_accept ), + .port1_drop ( int_wtrans_drop ), + .port1_miss ( int_wtrans_miss ), + + // Read Req IF + .port2_addr ( int_araddr ), + .port2_id ( int_arid ), + .port2_len ( int_arlen ), + .port2_size ( int_arsize ), + .port2_addr_valid ( int_arvalid ), + .port2_type ( {N_PORTS{1'b0}} ), + .port2_user ( int_aruser ), + .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM + .port2_out_addr ( int_rtrans_addr ), + .port2_cache_coherent ( int_rtrans_cache_coherent ), + .port2_accept ( int_rtrans_accept ), + .port2_drop ( int_rtrans_drop ), + .port2_miss ( int_rtrans_miss ), + + // L2 miss info inputs -> axi_rab_cfg + .miss_l2_i ( L2Miss_S ), + .miss_l2_addr_i ( L2OutInAddr_DP ), + .miss_l2_id_i ( L2OutId_DP ), + .miss_l2_user_i ( L2OutUser_DP ), + + // L2 config outputs + .wdata_l2_o ( L2CfgWData_D ), + .waddr_l2_o ( L2CfgWAddr_D ), + .wren_l2_o ( L2CfgWE_S ) + ); + + // }}} + + // AX SPLITS {{{ + // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗ + // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ + // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║ + // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║ + // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║ + // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ + // + /** + * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels. + * + * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or + * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be + * saved until the L2 outputs are available. + */ + generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT + + /* + * When accepting L1 transactions, we must just do so on the selected master. Drop requests must + * be performed on any one of the two masters. Save requests must be performed by both masters. + */ + always_comb begin : AW_L1_SPLIT + + // TLB handshake + l1_m0_aw_accept[i] = 1'b0; + l1_m1_aw_accept[i] = 1'b0; + l1_m0_aw_drop[i] = 1'b0; + l1_m1_aw_drop[i] = 1'b0; + l1_m0_aw_save[i] = 1'b0; + l1_m1_aw_save[i] = 1'b0; + + l1_mx_aw_done[i] = 1'b0; + + // AXI sender input handshake + int_m0_awvalid[i] = 1'b0; + int_m1_awvalid[i] = 1'b0; + int_awready[i] = 1'b0; + + // accept on selected master only + if (l1_aw_accept[i]) begin + if (int_wmaster_select[i]) begin + l1_m1_aw_accept[i] = 1'b1; + l1_mx_aw_done[i] = l1_m1_aw_done[i]; + + int_m1_awvalid[i] = int_awvalid[i]; + int_awready[i] = int_m1_awready[i]; + + end else begin + l1_m0_aw_accept[i] = 1'b1; + l1_mx_aw_done[i] = l1_m0_aw_done[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_awready[i] = int_m0_awready[i]; + end + + // drop on Master 0 only + end else if (l1_aw_drop[i]) begin + l1_m0_aw_drop[i] = 1'b1; + l1_mx_aw_done[i] = l1_m0_aw_done[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_awready[i] = l1_m0_aw_done[i]; + + // save on both masters + end else if (l1_aw_save[i]) begin + // split save + l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i]; + l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i]; + + // combine done + l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_m1_awvalid[i] = int_awvalid[i]; + int_awready[i] = l1_mx_aw_done[i]; + end + end + + // signal back to handshake splitter + assign l1_aw_done[i] = l1_mx_aw_done[i]; + + always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG + if (Rst_RBI == 0) begin + l1_m0_aw_done_SP[i] <= 1'b0; + l1_m1_aw_done_SP[i] <= 1'b0; + end else if (l1_mx_aw_done[i]) begin + l1_m0_aw_done_SP[i] <= 1'b0; + l1_m1_aw_done_SP[i] <= 1'b0; + end else begin + l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i]; + l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i]; + end + end + + /* + * When accepting L2 transactions, we must drop the corresponding transaction from the other + * master to make it available again for save requests from L1_DROP_SAVE. + */ + always_comb begin : AW_L2_SPLIT + + l2_m0_aw_accept[i] = 1'b0; + l2_m1_aw_accept[i] = 1'b0; + l2_m0_aw_drop[i] = 1'b0; + l2_m1_aw_drop[i] = 1'b0; + + // de-assert request signals individually upon handshakes + if (l2_aw_accept[i]) begin + if (l2_master_select[i]) begin + l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i]; + l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i]; + + end else begin + l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i]; + l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i]; + + end + end else begin + l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; + l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; + + end + + // combine done + l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i]; + + l2_aw_done[i] = l2_mx_aw_done[i]; + end + + always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG + if (Rst_RBI == 0) begin + l2_m0_aw_done_SP[i] <= 1'b0; + l2_m1_aw_done_SP[i] <= 1'b0; + end else if (l2_mx_aw_done[i]) begin + l2_m0_aw_done_SP[i] <= 1'b0; + l2_m1_aw_done_SP[i] <= 1'b0; + end else begin + l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i]; + l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i]; + end + end + + /* + * When accepting L1 transactions, we must just do so on the selected master. Drop requests must + * be performed on any one of the two masters. Save requests must be performed by both masters. + */ + always_comb begin : AR_L1_SPLIT + + // TLB handshake + l1_m0_ar_accept[i] = 1'b0; + l1_m1_ar_accept[i] = 1'b0; + l1_m0_ar_drop[i] = 1'b0; + l1_m1_ar_drop[i] = 1'b0; + l1_m0_ar_save[i] = 1'b0; + l1_m1_ar_save[i] = 1'b0; + + l1_mx_ar_done[i] = 1'b0; + + // AXI sender input handshake + int_m0_arvalid[i] = 1'b0; + int_m1_arvalid[i] = 1'b0; + int_arready[i] = 1'b0; + + // accept on selected master only + if (l1_ar_accept[i]) begin + if (int_rmaster_select[i]) begin + l1_m1_ar_accept[i] = 1'b1; + l1_mx_ar_done[i] = l1_m1_ar_done[i]; + + int_m1_arvalid[i] = int_arvalid[i]; + int_arready[i] = int_m1_arready[i]; + + end else begin + l1_m0_ar_accept[i] = 1'b1; + l1_mx_ar_done[i] = l1_m0_ar_done[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_arready[i] = int_m0_arready[i]; + end + + // drop on Master 0 only + end else if (l1_ar_drop[i]) begin + l1_m0_ar_drop[i] = 1'b1; + l1_mx_ar_done[i] = l1_m0_ar_done[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_arready[i] = l1_m0_ar_done[i]; + + // save on both masters + end else if (l1_ar_save[i]) begin + // split save + l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i]; + l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i]; + + // combine done + l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_m1_arvalid[i] = int_arvalid[i]; + int_arready[i] = l1_mx_ar_done[i]; + end + end + + // signal back to handshake splitter + assign l1_ar_done[i] = l1_mx_ar_done[i]; + + always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG + if (Rst_RBI == 0) begin + l1_m0_ar_done_SP[i] <= 1'b0; + l1_m1_ar_done_SP[i] <= 1'b0; + end else if (l1_mx_ar_done[i]) begin + l1_m0_ar_done_SP[i] <= 1'b0; + l1_m1_ar_done_SP[i] <= 1'b0; + end else begin + l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i]; + l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i]; + end + end + + /* + * When accepting L2 transactions, we must drop the corresponding transaction from the other + * master to make it available again for save requests from L1_DROP_SAVE. + */ + always_comb begin : AR_L2_SPLIT + + l2_m0_ar_accept[i] = 1'b0; + l2_m1_ar_accept[i] = 1'b0; + l2_m0_ar_drop[i] = 1'b0; + l2_m1_ar_drop[i] = 1'b0; + + // de-assert request signals individually upon handshakes + if (l2_ar_accept[i]) begin + if (l2_master_select[i]) begin + l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i]; + l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i]; + + end else begin + l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i]; + l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i]; + + end + end else if (l2_ar_drop[i]) begin + l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; + l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; + + end + + // combine done + l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i]; + + l2_ar_done[i] = l2_mx_ar_done[i]; + end + + always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG + if (Rst_RBI == 0) begin + l2_m0_ar_done_SP[i] <= 1'b0; + l2_m1_ar_done_SP[i] <= 1'b0; + end else if (l2_mx_ar_done[i]) begin + l2_m0_ar_done_SP[i] <= 1'b0; + l2_m1_ar_done_SP[i] <= 1'b0; + end else begin + l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i]; + l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i]; + end + end + + end // AX_SPLIT + endgenerate // AX_SPLIT + + // }}} + + // HANDSHAKE SPLITS {{{ + // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗ + // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ + // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║ + // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║ + // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║ + // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ + // + /* + * We need to perform combined handshakes with multiple AXI modules + * upon transactions drops, accepts, saves etc. from two TLBs. + */ + generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT + + assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i]; + assign int_wtrans_sent[i] = l1_xw_done[i]; + + assign l1_ar_accept[i] = int_rtrans_accept[i]; + assign int_rtrans_sent[i] = l1_ar_done[i]; + + /* + * L1 AW sender + W buffer handshake split + */ + // forward + assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i]; + assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i]; + + assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i]; + assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i]; + + assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i]; + assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i]; + + // backward + assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i]; + + always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT + if (Rst_RBI == 0) begin + l1_aw_done_SP[i] <= 1'b0; + l1_w_done_SP[i] <= 1'b0; + end else if (l1_xw_done[i]) begin + l1_aw_done_SP[i] <= 1'b0; + l1_w_done_SP[i] <= 1'b0; + end else begin + l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i]; + l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i]; + end + end + + if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT + + /* + * L1 AR sender + R sender handshake split + * + * AR and R do not need to be strictly in sync. We thus use separate handshakes. + * But the handshake signals for the R sender are multiplexed with the those for + * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority. + */ + assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i]; + assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i]; + assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0; + + /* + * L2 AW sender + W buffer handshake split + */ + // forward + assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i]; + assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i]; + + assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i]; + assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i]; + + // backward + assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i]; + + always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT + if (Rst_RBI == 0) begin + l2_aw_done_SP[i] <= 1'b0; + l2_w_done_SP[i] <= 1'b0; + end else if (l2_xw_done[i]) begin + l2_aw_done_SP[i] <= 1'b0; + l2_w_done_SP[i] <= 1'b0; + end else begin + l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i]; + l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i]; + end + end + + /* + * L2 AR + R sender handshake split + */ + // forward + assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i]; + assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i]; + + // backward - make sure to always clear L2_XR_HS_SPLIT + always_comb begin + if (l2_xr_drop[i]) begin + l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i]; + end else begin + l2_xr_done[i] = l2_ar_done_SP[i]; + end + end + + always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT + if (Rst_RBI == 0) begin + l2_ar_done_SP[i] <= 1'b0; + l2_r_done_SP[i] <= 1'b0; + end else if (l2_xr_done[i]) begin + l2_ar_done_SP[i] <= 1'b0; + l2_r_done_SP[i] <= 1'b0; + end else begin + l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i]; + l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i]; + end + end + + end else begin // if (ENABLE_L2TLB[i] == 1) + + assign lx_r_drop[i] = l1_r_drop[i]; + assign l1_r_done[i] = lx_r_done[i]; + + assign l2_aw_accept[i] = 1'b0; + assign l2_w_accept[i] = 1'b0; + assign l2_aw_drop[i] = 1'b0; + assign l2_w_drop[i] = 1'b0; + assign l2_xw_done[i] = 1'b0; + assign l2_aw_done_SP[i] = 1'b0; + assign l2_w_done_SP[i] = 1'b0; + + assign l2_ar_accept[i] = 1'b0; + assign l2_ar_drop[i] = 1'b0; + assign l2_r_drop[i] = 1'b0; + assign l2_xr_done[i] = 1'b0; + assign l2_r_done[i] = 1'b0; + assign l2_ar_done_SP[i] = 1'b0; + assign l2_r_done_SP[i] = 1'b0; + + end // if (ENABLE_L2TLB[i] == 1) + + end // HANDSHAKE_SPLIT + endgenerate // HANDSHAKE_SPLIT + + // }}} + + // L2 TLB {{{ + // ██╗ ██████╗ ████████╗██╗ ██████╗ + // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗ + // ██║ █████╔╝ ██║ ██║ ██████╔╝ + // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗ + // ███████╗███████╗ ██║ ███████╗██████╔╝ + // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝ + // + /* + * l2_tlb + * + * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core). + * + * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy, + * the L1 is stalled untill the L2 is available again. + * + */ + generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB + if (ENABLE_L2TLB[i] == 1) begin : L2_TLB + + /* + * L1 output selector + */ + assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0; + assign L1OutProt_D[i] = rab_prot[i]; + assign L1OutMulti_D[i] = rab_multi[i]; + + /* + * L1 output control + L1_DROP_BUF, L2_IN_BUF management + * + * Forward the L1 drop request to AR/AW sender modules if + * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or + * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full. + * + * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards + * the upstream is realized by not accepting the save request (saving the L1 transaction) + * in the senders as long as the L2 TLB is busy or has valid output. This ultimately + * blocks the L1 TLB. + * + * Together with the AW drop/save, we also perform the W drop/save as AW and W need to + * absolutely remain in order. In contrast, the R drop is performed + */ + always_comb begin : L1_DROP_SAVE + + l1_ar_drop[i] = 1'b0; + l1_ar_save[i] = 1'b0; + l1_xw_drop[i] = 1'b0; + l1_xw_save[i] = 1'b0; + + l1_id_drop[i] = L1OutId_D[i]; + l1_len_drop[i] = L1OutLen_D[i]; + l1_prefetch_drop[i] = rab_prefetch[i]; + l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses + + L1DropEn_S[i] = 1'b0; + L2InEn_S[i] = 1'b0; + + if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin + // 1. Drop + l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i]; + l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i]; + + // Store to L1_DROP_BUF upon handshake + L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) | + (l1_xw_drop[i] & l1_xw_done[i]); + + end else if ( rab_miss[i] ) begin + // 2. Save - Make sure L2 is really available. + l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i]; + l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i]; + + // Store to L2_IN_BUF upon handshake - triggers the L2 TLB + L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) | + (l1_xw_save[i] & l1_xw_done[i]); + end + end + + /* + * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control + * + * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs + * require the B response to be sent only after consuming/discarding the corresponding data + * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop + * request to the B sender is then sent by the W buffer autonomously. + * + * L1 AW/W drop requests are managed by L1_DROP_SAVE. + */ + always_comb begin : L2_ACCEPT_DROP_SAVE + + l2_ar_addr[i] = 'b0; + l2_aw_addr[i] = 'b0; + l2_ar_accept[i] = 1'b0; + l2_xr_drop[i] = 1'b0; + l2_xw_accept[i] = 1'b0; + l2_xw_drop[i] = 1'b0; + + l1_r_drop[i] = 1'b0; + + lx_id_drop[i] = 'b0; + lx_len_drop[i] = 'b0; + lx_prefetch_drop[i] = 1'b0; + lx_hit_drop[i] = 1'b0; + + L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i]; + L2OutValid_SN[i] = L2OutValid_SP[i]; + L2OutReady_S[i] = 1'b0; + L2OutEn_S[i] = 1'b0; + + L2Miss_S[i] = 1'b0; + int_multi[i] = 1'b0; + int_prot[i] = 1'b0; + + if (L2OutValid_SP[i] == 1'b0) begin + + // Drop L1 from R senders + if (L1DropValid_SP[i] == 1'b1) begin + + // Only perform the R sender drop here. + if (~L1DropRwType_DP[i]) begin + + l1_r_drop[i] = 1'b1; + lx_id_drop[i] = L1DropId_DP[i]; + lx_len_drop[i] = L1DropLen_DP[i]; + lx_prefetch_drop[i] = L1DropPrefetch_S[i]; + lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses + + // Invalidate L1_DROP_BUF upon handshake + if ( l1_r_drop[i] & l1_r_done[i] ) begin + + L1DropValid_SN[i] = 1'b0; + int_prot[i] = L1DropProt_DP[i]; + int_multi[i] = L1DropMulti_DP[i]; + end + + end else begin + // Invalidate L1_DROP_BUF + L1DropValid_SN[i] = 1'b0; + int_prot[i] = L1DropProt_DP[i]; + int_multi[i] = L1DropMulti_DP[i]; + end + end + + end else begin // L2_OUT_BUF has valid data + + if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin + + l2_ar_addr[i] = L2OutAddr_DP[i]; + l2_aw_addr[i] = L2OutAddr_DP[i]; + + l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; + l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; + + // Invalidate L2_OUT_BUF upon handshake + L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) | + (l2_xw_accept[i] & l2_xw_done[i]) ); + end else begin + + lx_id_drop[i] = L2OutId_DP[i]; + lx_len_drop[i] = L2OutLen_DP[i]; + lx_prefetch_drop[i] = L2OutPrefetch_S[i]; + lx_hit_drop[i] = L2OutHit_SP[i]; + + // The l2_xr_drop will also perform the handshake with the R sender + l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; + l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; + + // Invalidate L1_DROP_BUF upon handshake + if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin + + L2OutValid_SN[i] = 1'b0; + L2Miss_S[i] = ~L2OutHit_SP[i]; + int_prot[i] = L2OutProt_SP[i]; + int_multi[i] = L2OutMulti_SP[i]; + end + end + end + + // Only accept new L2 output after ongoing drops have finished. + if ( (l2_xr_drop[i] == l2_xr_done[i]) & + (l2_xw_drop[i] == l2_xw_done[i]) & + (l1_r_drop[i] == l1_r_done[i] ) ) begin + // Store to L2_OUT_BUF upon handshake with L2 TLB module + if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin + L2OutValid_SN[i] = 1'b1; + L2OutReady_S[i] = 1'b1; + L2OutEn_S[i] = 1'b1; + end + end + end + + /* + * L1 drop buffer + * + * Used in case of multi, prot and prefetch hits in the L1 TLB. + */ + always_ff @(posedge Clk_CI) begin : L1_DROP_BUF + if (Rst_RBI == 0) begin + L1DropProt_DP[i] <= 1'b0; + L1DropMulti_DP[i] <= 1'b0; + L1DropRwType_DP[i] <= 1'b0; + L1DropUser_DP[i] <= 'b0; + L1DropId_DP[i] <= 'b0; + L1DropLen_DP[i] <= 'b0; + L1DropAddr_DP[i] <= 'b0; + end else if (L1DropEn_S[i] == 1'b1) begin + L1DropProt_DP[i] <= L1OutProt_D[i] ; + L1DropMulti_DP[i] <= L1OutMulti_D[i] ; + L1DropRwType_DP[i] <= L1OutRwType_D[i]; + L1DropUser_DP[i] <= L1OutUser_D[i] ; + L1DropId_DP[i] <= L1OutId_D[i] ; + L1DropLen_DP[i] <= L1OutLen_D[i] ; + L1DropAddr_DP[i] <= L1OutAddr_D[i] ; + end + end // always_ff @ (posedge Clk_CI) + + /* + * L2 input buffer + * + * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. + */ + always_ff @(posedge Clk_CI) begin : L2_IN_BUF + if (Rst_RBI == 0) begin + L2InRwType_DP[i] <= 1'b0; + L2InUser_DP[i] <= 'b0; + L2InId_DP[i] <= 'b0; + L2InLen_DP[i] <= 'b0; + L2InAddr_DP[i] <= 'b0; + end else if (L2InEn_S[i] == 1'b1) begin + L2InRwType_DP[i] <= L1OutRwType_D[i]; + L2InUser_DP[i] <= L1OutUser_D[i] ; + L2InId_DP[i] <= L1OutId_D[i] ; + L2InLen_DP[i] <= L1OutLen_D[i] ; + L2InAddr_DP[i] <= L1OutAddr_D[i] ; + end + end // always_ff @ (posedge Clk_CI) + + l2_tlb + #( + .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), + .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), + .N_SETS ( `RAB_L2_N_SETS ), + .N_OFFSETS ( `RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS ), + .N_PAR_VA_RAMS ( `RAB_L2_N_PAR_VA_RAMS ), + .HIT_OFFSET_STORE_WIDTH ( log2(`RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS) ) + ) + u_l2_tlb + ( + .clk_i ( Clk_CI ), + .rst_ni ( Rst_RBI ), + + // Config inputs + .we_i ( L2CfgWE_S[i] ), + .waddr_i ( L2CfgWAddr_D[i] ), + .wdata_i ( L2CfgWData_D[i] ), + + // Request input + .start_i ( L2InEn_S[i] ), + .busy_o ( L2Busy_S[i] ), + .rw_type_i ( L2InRwType_DP[i] ), + .in_addr_i ( L2InAddr_DP[i] ), + + // Response output + .out_ready_i ( L2OutReady_S[i] ), + .out_valid_o ( L2OutValid_S[i] ), + .hit_o ( L2OutHit_SN[i] ), + .miss_o ( L2OutMiss_SN[i] ), + .prot_o ( L2OutProt_SN[i] ), + .multi_o ( L2OutMulti_SN[i] ), + .cache_coherent_o ( L2OutCC_SN[i] ), + .out_addr_o ( L2OutAddr_DN[i] ) + ); + + /* + * L2 output buffer + * + * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. + */ + always_ff @(posedge Clk_CI) begin : L2_OUT_BUF + if (Rst_RBI == 0) begin + L2OutRwType_DP[i] <= 1'b0; + L2OutUser_DP[i] <= 'b0; + L2OutLen_DP[i] <= 'b0; + L2OutId_DP[i] <= 'b0; + L2OutInAddr_DP[i] <= 'b0; + + L2OutHit_SP[i] <= 1'b0; + L2OutMiss_SP[i] <= 1'b0; + L2OutProt_SP[i] <= 1'b0; + L2OutMulti_SP[i] <= 1'b0; + L2OutCC_SP[i] <= 1'b0; + L2OutAddr_DP[i] <= 'b0; + end else if (L2OutEn_S[i] == 1'b1) begin + L2OutRwType_DP[i] <= L2InRwType_DP[i]; + L2OutUser_DP[i] <= L2InUser_DP[i] ; + L2OutLen_DP[i] <= L2InLen_DP[i] ; + L2OutId_DP[i] <= L2InId_DP[i] ; + L2OutInAddr_DP[i] <= L2InAddr_DP[i] ; + + L2OutHit_SP[i] <= L2OutHit_SN[i] ; + L2OutMiss_SP[i] <= L2OutMiss_SN[i] ; + L2OutProt_SP[i] <= L2OutProt_SN[i] ; + L2OutMulti_SP[i] <= L2OutMulti_SN[i]; + L2OutCC_SP[i] <= L2OutCC_SN[i] ; + L2OutAddr_DP[i] <= L2OutAddr_DN[i] ; + end + end // always_ff @ (posedge Clk_CI) + + always_ff @(posedge Clk_CI) begin : BUF_VALID + if (Rst_RBI == 0) begin + L1DropValid_SP[i] = 1'b0; + L2OutValid_SP[i] = 1'b0; + end else begin + L1DropValid_SP[i] = L1DropValid_SN[i]; + L2OutValid_SP[i] = L2OutValid_SN[i]; + end + end + + always_comb begin : BUF_TO_PREFETCH + // L1 Drop Buf + if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) + L1DropPrefetch_S[i] = 1'b1; + else + L1DropPrefetch_S[i] = 1'b0; + + // L2 Out Buf + if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) + L2OutPrefetch_S[i] = 1'b1; + else + L2OutPrefetch_S[i] = 1'b0; + end + + assign l2_cache_coherent[i] = L2OutCC_SP[i]; + assign int_miss[i] = L2Miss_S[i]; + + end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1) + + assign l1_ar_drop[i] = int_rtrans_drop[i]; + assign l1_r_drop[i] = int_rtrans_drop[i]; + assign l1_xw_drop[i] = int_wtrans_drop[i]; + + assign l1_ar_save[i] = 1'b0; + assign l1_xw_save[i] = 1'b0; + assign l2_xw_accept[i] = 1'b0; + assign l2_xr_drop[i] = 1'b0; + assign l2_xw_drop[i] = 1'b0; + + assign l2_ar_addr[i] = 'b0; + assign l2_aw_addr[i] = 'b0; + + assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : + int_rtrans_drop[i] ? int_arid[i] : + '0; + assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : + int_rtrans_drop[i] ? int_arlen[i] : + '0; + assign l1_prefetch_drop[i] = rab_prefetch[i]; + assign l1_hit_drop[i] = ~rab_miss[i]; + + assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : + int_rtrans_drop[i] ? int_arid[i] : + '0; + assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : + int_rtrans_drop[i] ? int_arlen[i] : + '0; + assign lx_prefetch_drop[i] = rab_prefetch[i]; + assign lx_hit_drop[i] = ~rab_miss[i]; + + assign l2_cache_coherent[i] = 1'b0; + + assign int_miss[i] = rab_miss[i]; + assign int_prot[i] = rab_prot[i]; + assign int_multi[i] = rab_multi[i]; + + // unused signals + assign L2Miss_S[i] = 1'b0; + + assign L1OutRwType_D[i] = 1'b0; + assign L1OutProt_D[i] = 1'b0; + assign L1OutMulti_D[i] = 1'b0; + + assign L1DropRwType_DP[i] = 1'b0; + assign L1DropUser_DP[i] = 'b0; + assign L1DropId_DP[i] = 'b0; + assign L1DropLen_DP[i] = 'b0; + assign L1DropAddr_DP[i] = 'b0; + assign L1DropProt_DP[i] = 1'b0; + assign L1DropMulti_DP[i] = 1'b0; + + assign L1DropEn_S[i] = 1'b0; + assign L1DropPrefetch_S[i] = 1'b0; + assign L1DropValid_SN[i] = 1'b0; + assign L1DropValid_SP[i] = 1'b0; + + assign L2InRwType_DP[i] = 1'b0; + assign L2InUser_DP[i] = 'b0; + assign L2InId_DP[i] = 'b0; + assign L2InLen_DP[i] = 'b0; + assign L2InAddr_DP[i] = 'b0; + + assign L2InEn_S[i] = 1'b0; + + assign L2OutHit_SN[i] = 1'b0; + assign L2OutMiss_SN[i] = 1'b0; + assign L2OutProt_SN[i] = 1'b0; + assign L2OutMulti_SN[i] = 1'b0; + assign L2OutCC_SN[i] = 1'b0; + assign L2OutAddr_DN[i] = 'b0; + + assign L2OutRwType_DP[i] = 1'b0; + assign L2OutUser_DP[i] = 'b0; + assign L2OutId_DP[i] = 'b0; + assign L2OutLen_DP[i] = 'b0; + assign L2OutInAddr_DP[i] = 'b0; + assign L2OutHit_SP[i] = 1'b0; + assign L2OutMiss_SP[i] = 1'b0; + assign L2OutProt_SP[i] = 1'b0; + assign L2OutMulti_SP[i] = 1'b0; + assign L2OutCC_SP[i] = 1'b0; + assign L2OutAddr_DP[i] = 'b0; + + assign L2OutEn_S[i] = 1'b0; + assign L2OutPrefetch_S[i] = 1'b0; + assign L2Busy_S[i] = 1'b0; + assign L2OutValid_S[i] = 1'b0; + assign L2OutValid_SN[i] = 1'b0; + assign L2OutValid_SP[i] = 1'b0; + assign L2OutReady_S[i] = 1'b0; + + end // !`ifdef ENABLE_L2TLB + end // for (i = 0; i < N_PORTS; i++) + endgenerate + +// }}} +""" +# endmodule +# +# +# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker +# +# diff --git a/src/iommu/axi_rab/check_ram.py b/src/iommu/axi_rab/check_ram.py new file mode 100644 index 00000000..31bf32ea --- /dev/null +++ b/src/iommu/axi_rab/check_ram.py @@ -0,0 +1,240 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class check_ram(Elaboratable): + + def __init__(self): + self.clk_i = Signal() # input + self.rst_ni = Signal() # input + self.in_addr = Signal(ADDR_WIDTH) # input + self.rw_type = Signal() # input + self.ram_we = Signal() # input + self.port0_addr = Signal(1+ERROR p_expression_25) # input + self.port1_addr = Signal(1+ERROR p_expression_25) # input + self.ram_wdata = Signal(RAM_DATA_WIDTH) # input + self.output_sent = Signal() # input + self.output_valid = Signal() # input + self.offset_addr_d = Signal(OFFSET_WIDTH) # input + self.hit_addr = Signal(1+ERROR p_expression_25) # output + self.master = Signal() # output + self.hit = Signal() # output + self.multi_hit = Signal() # output + self.prot = Signal() # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# //`define MULTI_HIT_FULL_SET +# +# module check_ram +# //#( +# // parameter ADDR_WIDTH = 32, +# // parameter RAM_DATA_WIDTH = 32, +# // parameter PAGE_SIZE = 4096, // 4kB +# // parameter SET_WIDTH = 5, +# // parameter OFFSET_WIDTH = 4 +# // ) +# ( +# input logic clk_i, +# input logic rst_ni, +# input logic [ADDR_WIDTH-1:0] in_addr, +# input logic rw_type, // 1 => write, 0=> read +# input logic ram_we, +# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, +# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr, +# input logic [RAM_DATA_WIDTH-1:0] ram_wdata, +# input logic output_sent, +# input logic output_valid, +# input logic [OFFSET_WIDTH-1:0] offset_addr_d, +# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr, +# output logic master, +# output logic hit, +# output logic multi_hit, +# output logic prot +# ); +# +""" #docstring_begin + + localparam IGNORE_LSB = log2(PAGE_SIZE); // 12 + + logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs + logic port0_hit, port1_hit; // Ram output matches in_addr + + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved; + + // Hit FSM Signals + typedef enum logic {SEARCH, HIT} hit_state_t; + hit_state_t hit_SP; // Hit FSM state + hit_state_t hit_SN; // Hit FSM next state + + // Multi Hit FSM signals +`ifdef MULTI_HIT_FULL_SET + typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t; + multi_state_t multi_SP; // Multi Hit FSM state + multi_state_t multi_SN; // Multi Hit FSM next state + + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved; + logic master_saved; +`endif + + //// --------------- Block RAM (Dual Port) -------------- //// + + // The outputs of the BRAMs are only valid if in the previous cycle: + // 1. the inputs were valid, and + // 2. the BRAM was not written to. + // Otherwise, the outputs must be ignored which is controlled by the output_valid signal. + // This signal is driven by the uppler level L2 TLB module. + ram_tp_no_change #( + .ADDR_WIDTH( SET_WIDTH+OFFSET_WIDTH+1 ), + .DATA_WIDTH( RAM_DATA_WIDTH ) + ) + ram_tp_no_change_0 + ( + .clk ( clk_i ), + .we ( ram_we ), + .addr0 ( port0_addr ), + .addr1 ( port1_addr ), + .d_i ( ram_wdata ), + .d0_o ( port0_data_o ), + .d1_o ( port1_data_o ) + ); + + //// Check Ram Outputs + assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]); + assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]); + //// ----------------------------------------------------- ///// + + //// ------------------- Check if Hit ------------------------ //// + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_SP <= SEARCH; + end else begin + hit_SP <= hit_SN; + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + port0_addr_saved <= '0; + port1_addr_saved <= '0; + end else begin + port0_addr_saved <= port0_addr; + port1_addr_saved <= port1_addr; + end + end + + always_comb begin + hit_SN = hit_SP; + hit = 1'b0; + hit_addr = 0; + master = 1'b0; + unique case(hit_SP) + SEARCH : + if (output_valid) + if (port0_hit || port1_hit) begin + hit_SN = HIT; + hit = 1'b1; + hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : + port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : + 0; + master = port0_hit ? port0_data_o[3] : + port1_hit ? port1_data_o[3] : + 1'b0; + end + + HIT : begin +`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later. + hit = 1'b1; + hit_addr = hit_addr_saved; + master = master_saved; +`endif + if (output_sent) + hit_SN = SEARCH; + end + + default : begin + hit_SN = SEARCH; + end + endcase // case (hit_SP) + end // always_comb begin + + //// ------------------------------------------- //// + + assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) : + output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) : + 1'b0; + + //// ------------------- Multi ------------------- //// +`ifdef MULTI_HIT_FULL_SET + + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_addr_saved <= 0; + master_saved <= 1'b0; + end else if (output_valid) begin + hit_addr_saved <= hit_addr; + master_saved <= master; + end + end + + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + multi_SP <= NO_HITS; + end else begin + multi_SP <= multi_SN; + end + end + + always_comb begin + multi_SN = multi_SP; + multi_hit = 1'b0; + unique case(multi_SP) + NO_HITS : + if(output_valid && (port0_hit && port1_hit)) begin + multi_SN = MULTI_HIT; + multi_hit = 1'b1; + end else if(output_valid && (port0_hit || port1_hit)) + multi_SN = ONE_HIT; + + ONE_HIT : + if(output_valid && (port0_hit || port1_hit)) begin + multi_SN = MULTI_HIT; + multi_hit = 1'b1; + end else if (output_sent) + multi_SN = NO_HITS; + + MULTI_HIT : begin + multi_hit = 1'b1; + if (output_sent) + multi_SN = NO_HITS; + end + + endcase // case (multi_SP) + end // always_comb begin + +`else // !`ifdef MULTI_HIT_FULL_SET + assign multi_hit = output_valid && port0_hit && port1_hit; +`endif // !`ifdef MULTI_HIT_FULL_SET + //// ------------------------------------------- //// +""" +# endmodule +# +# diff --git a/src/iommu/axi_rab/fsm.py b/src/iommu/axi_rab/fsm.py new file mode 100644 index 00000000..d64b1cb4 --- /dev/null +++ b/src/iommu/axi_rab/fsm.py @@ -0,0 +1,243 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class fsm(Elaboratable): + + def __init__(self): + self.Clk_CI = Signal() # input + self.Rst_RBI = Signal() # input + self.port1_addr_valid_i = Signal() # input + self.port2_addr_valid_i = Signal() # input + self.port1_sent_i = Signal() # input + self.port2_sent_i = Signal() # input + self.select_i = Signal() # input + self.no_hit_i = Signal() # input + self.multi_hit_i = Signal() # input + self.no_prot_i = Signal() # input + self.prefetch_i = Signal() # input + self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input + self.cache_coherent_i = Signal() # input + self.port1_accept_o = Signal() # output + self.port1_drop_o = Signal() # output + self.port1_miss_o = Signal() # output + self.port2_accept_o = Signal() # output + self.port2_drop_o = Signal() # output + self.port2_miss_o = Signal() # output + self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output + self.cache_coherent_o = Signal() # output + self.miss_o = Signal() # output + self.multi_o = Signal() # output + self.prot_o = Signal() # output + self.prefetch_o = Signal() # output + self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input + self.in_id_i = Signal(AXI_ID_WIDTH) # input + self.in_len_i = Signal(8) # input + self.in_user_i = Signal(AXI_USER_WIDTH) # input + self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output + self.in_id_o = Signal(AXI_ID_WIDTH) # output + self.in_len_o = Signal(8) # output + self.in_user_o = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //`timescale 1ns / 1ps +# +# module fsm +# #( +# parameter AXI_M_ADDR_WIDTH = 40, +# parameter AXI_S_ADDR_WIDTH = 32, +# parameter AXI_ID_WIDTH = 8, +# parameter AXI_USER_WIDTH = 6 +# ) +# ( +# input logic Clk_CI, +# input logic Rst_RBI, +# +# input logic port1_addr_valid_i, +# input logic port2_addr_valid_i, +# input logic port1_sent_i, +# input logic port2_sent_i, +# input logic select_i, +# input logic no_hit_i, +# input logic multi_hit_i, +# input logic no_prot_i, +# input logic prefetch_i, +# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i, +# input logic cache_coherent_i, +# output logic port1_accept_o, +# output logic port1_drop_o, +# output logic port1_miss_o, +# output logic port2_accept_o, +# output logic port2_drop_o, +# output logic port2_miss_o, +# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o, +# output logic cache_coherent_o, +# output logic miss_o, +# output logic multi_o, +# output logic prot_o, +# output logic prefetch_o, +# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, +# input logic [AXI_ID_WIDTH-1:0] in_id_i, +# input logic [7:0] in_len_i, +# input logic [AXI_USER_WIDTH-1:0] in_user_i, +# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o, +# output logic [AXI_ID_WIDTH-1:0] in_id_o, +# output logic [7:0] in_len_o, +# output logic [AXI_USER_WIDTH-1:0] in_user_o +# ); +# +""" #docstring_begin + + //-------------Internal Signals---------------------- + + typedef enum logic {IDLE, WAIT} state_t; + logic state_SP; // Present state + logic state_SN; // Next State + + logic port1_accept_SN; + logic port1_drop_SN; + logic port1_miss_SN; + logic port2_accept_SN; + logic port2_drop_SN; + logic port2_miss_SN; + logic miss_SN; + logic multi_SN; + logic prot_SN; + logic prefetch_SN; + logic cache_coherent_SN; + logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN; + + logic out_reg_en_S; + + //----------FSM comb------------------------------ + + always_comb begin: FSM_COMBO + state_SN = state_SP; + + port1_accept_SN = 1'b0; + port1_drop_SN = 1'b0; + port1_miss_SN = 1'b0; + port2_accept_SN = 1'b0; + port2_drop_SN = 1'b0; + port2_miss_SN = 1'b0; + miss_SN = 1'b0; + multi_SN = 1'b0; + prot_SN = 1'b0; + prefetch_SN = 1'b0; + cache_coherent_SN = 1'b0; + out_addr_DN = '0; + + out_reg_en_S = 1'b0; // by default hold register output + + unique case(state_SP) + IDLE : + if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin + out_reg_en_S = 1'b1; + state_SN = WAIT; + + // Select inputs for output registers + if (port1_addr_valid_i & select_i) begin + port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port1_miss_SN = no_hit_i; + port2_accept_SN = 1'b0; + port2_drop_SN = 1'b0; + port2_miss_SN = 1'b0; + end else if (port2_addr_valid_i & ~select_i) begin + port1_accept_SN = 1'b0; + port1_drop_SN = 1'b0; + port1_miss_SN = 1'b0; + port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port2_miss_SN = no_hit_i; + end + + miss_SN = port1_miss_SN | port2_miss_SN; + multi_SN = multi_hit_i; + prot_SN = ~no_prot_i; + prefetch_SN = ~no_hit_i & prefetch_i; + + cache_coherent_SN = cache_coherent_i; + out_addr_DN = out_addr_i; + end + + WAIT : + if ( port1_sent_i | port2_sent_i ) begin + out_reg_en_S = 1'b1; // "clear" the register + state_SN = IDLE; + end + + default : begin + state_SN = IDLE; + end + endcase + end + + //----------FSM seq------------------------------- + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ + if (Rst_RBI == 1'b0) + state_SP <= IDLE; + else + state_SP <= state_SN; + end + + //----------Output seq-------------------------- + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ + if (Rst_RBI == 1'b0) begin + port1_accept_o = 1'b0; + port1_drop_o = 1'b0; + port1_miss_o = 1'b0; + port2_accept_o = 1'b0; + port2_drop_o = 1'b0; + port2_miss_o = 1'b0; + miss_o = 1'b0; + multi_o = 1'b0; + prot_o = 1'b0; + prefetch_o = 1'b0; + cache_coherent_o = 1'b0; + out_addr_o = '0; + in_addr_o = '0; + in_id_o = '0; + in_len_o = '0; + in_user_o = '0; + end else if (out_reg_en_S == 1'b1) begin + port1_accept_o = port1_accept_SN; + port1_drop_o = port1_drop_SN; + port1_miss_o = port1_miss_SN; + port2_accept_o = port2_accept_SN; + port2_drop_o = port2_drop_SN; + port2_miss_o = port2_miss_SN; + miss_o = miss_SN; + multi_o = multi_SN; + prot_o = prot_SN; + prefetch_o = prefetch_SN; + cache_coherent_o = cache_coherent_SN; + out_addr_o = out_addr_DN; + in_addr_o = in_addr_i; + in_id_o = in_id_i; + in_len_o = in_len_i; + in_user_o = in_user_i; + end + end // block: OUTPUT_SEQ +""" +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/l2_tlb.py b/src/iommu/axi_rab/l2_tlb.py new file mode 100644 index 00000000..11983f64 --- /dev/null +++ b/src/iommu/axi_rab/l2_tlb.py @@ -0,0 +1,550 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class l2_tlb(Elaboratable): + + def __init__(self): + self.clk_i = Signal() # input + self.rst_ni = Signal() # input + self.we_i = Signal() # input + self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input + self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input + self.start_i = Signal() # input + self.busy_o = Signal() # output + self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input + self.rw_type_i = Signal() # input + self.out_ready_i = Signal() # input + self.out_valid_o = Signal() # output + self.hit_o = Signal() # output + self.miss_o = Signal() # output + self.prot_o = Signal() # output + self.multi_o = Signal() # output + self.cache_coherent_o = Signal() # output + self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //`include "pulp_soc_defines.sv" +# +# ////import CfMath::log2; +# +# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched. +# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected. +# +# //`ifdef MULTI_HIT_FULL_SET +# // `ifndef MULTI_HIT_CUR_CYCLE +# // `define MULTI_HIT_CUR_CYCLE +# // `endif +# //`endif +# +# module l2_tlb +# //#( +# // parameter AXI_S_ADDR_WIDTH = 32, +# // parameter AXI_M_ADDR_WIDTH = 40, +# // parameter AXI_LITE_DATA_WIDTH = 64, +# // parameter AXI_LITE_ADDR_WIDTH = 32, +# // parameter N_SETS = 32, +# // parameter N_OFFSETS = 4, //per port. There are 2 ports. +# // parameter PAGE_SIZE = 4096, // 4kB +# // parameter N_PAR_VA_RAMS = 4, +# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH +# // ) +# ( +# input logic clk_i, +# input logic rst_ni, +# +# input logic we_i, +# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i, +# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i, +# +# input logic start_i, +# output logic busy_o, +# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, +# input logic rw_type_i, //1 => write, 0=> read +# +# input logic out_ready_i, +# output logic out_valid_o, +# output logic hit_o, +# output logic miss_o, +# output logic prot_o, +# output logic multi_o, +# output logic cache_coherent_o, +# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o +# ); +# +""" #docstring_begin + + localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2; + localparam PA_RAM_DEPTH = VA_RAM_DEPTH * N_PAR_VA_RAMS; + localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH); + localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH); + localparam SET_WIDTH = log2(N_SETS); + localparam OFFSET_WIDTH = log2(N_OFFSETS); + localparam LL_WIDTH = log2(N_PAR_VA_RAMS); + localparam IGNORE_LSB = log2(PAGE_SIZE); + + localparam VA_RAM_DATA_WIDTH = AXI_S_ADDR_WIDTH - IGNORE_LSB + 4; + localparam PA_RAM_DATA_WIDTH = AXI_M_ADDR_WIDTH - IGNORE_LSB; + + logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent; + logic [N_PAR_VA_RAMS-1:0] ram_we; + logic last_search, last_search_next; + logic first_search, first_search_next; + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr; + logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr; + logic pa_ram_we; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr + logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data + logic pa_ram_store_data_SN, pa_ram_store_data_SP; + logic hit_top, prot_top, multi_hit_top, first_hit_top; + logic output_sent; + int hit_block_num; + + logic searching, search_done; + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr + logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d; + logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr; + logic [SET_WIDTH-1:0] set_num; + + logic va_output_valid; + logic searching_q; + + genvar z; + + // Search FSM + typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t; + search_state_t search_SP; // Present state + search_state_t search_SN; // Next State + + // Output FSM + typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t; + out_state_t out_SP; // Present state + out_state_t out_SN; // Next State + + logic miss_next; + logic hit_next; + logic prot_next; + logic multi_next; + logic cache_coherent_next; + + // Generate the VA Block rams and their surrounding logic + generate + for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS + check_ram + #( + .ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .RAM_DATA_WIDTH ( VA_RAM_DATA_WIDTH ), + .PAGE_SIZE ( PAGE_SIZE ), + .SET_WIDTH ( SET_WIDTH ), + .OFFSET_WIDTH ( OFFSET_WIDTH ) + ) + u_check_ram + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .in_addr ( in_addr_i ), + .rw_type ( rw_type_i ), + .ram_we ( ram_we[z] ), + .port0_addr ( port0_addr ), + .port1_addr ( port1_addr ), + .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ), + .output_sent ( output_sent ), + .output_valid ( va_output_valid ), + .offset_addr_d ( offset_addr_d ), + .hit_addr ( hit_addr[z] ), + .master ( cache_coherent[z] ), + .hit ( hit[z] ), + .multi_hit ( multi_hit[z] ), + .prot ( prot[z] ) + ); + end // for (z = 0; z < N_PORTS; z++) + endgenerate + + ////////////////// ---------------- Control and Address --------------- //////////////////////// + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + search_SP <= IDLE; + end else begin + search_SP <= search_SN; + end + end + + always_comb begin : SEARCH_FSM + search_SN = search_SP; + busy_o = 1'b0; + searching = 1'b0; + search_done = 1'b0; + last_search_next = 1'b0; + first_search_next = first_search; + + unique case (search_SP) + IDLE : begin + if (start_i) begin + search_SN = SEARCH; + first_search_next = 1'b1; + end + end + + SEARCH : begin + busy_o = 1'b1; + + // detect last search cycle + if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) ) + last_search_next = 1'b1; + + // pause search during VA RAM reconfigration + if (|ram_we) begin + searching = 1'b0; + end else begin + searching = 1'b1; + first_search_next = 1'b0; + end + + if (va_output_valid) begin + // stop search +`ifdef MULTI_HIT_FULL_SET + if (last_search | prot_top | multi_hit_top) begin +`else + if (last_search | prot_top | multi_hit_top | hit_top ) begin +`endif + search_SN = DONE; + search_done = 1'b1; + end + end + end + + DONE : begin + busy_o = 1'b1; + if (out_valid_o & out_ready_i) + search_SN = IDLE; + end + + default : begin + search_SN = IDLE; + end + endcase // case (prot_SP) + end // always_comb begin + + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + last_search <= 1'b0; + first_search <= 1'b0; + end else begin + last_search <= last_search_next; + first_search <= first_search_next; + end + end + + /* + * VA RAM address generation + * + * The input address and set number, and thus the offset start address, are available in the + * cycle after the start signal. The buffered offset_addr becomes available one cycle later. + * During the first search cycle, we therefore directly use offset_addr_start for the lookup. + */ + assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB]; + + assign port0_raddr[OFFSET_WIDTH] = 1'b0; + assign port1_addr [OFFSET_WIDTH] = 1'b1; + + assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; + assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; + + assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; + assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; + + assign port0_addr = ram_we ? ram_waddr : port0_raddr; + + // The outputs of the BRAMs are only valid if in the previous cycle: + // 1. the inputs were valid, and + // 2. the BRAMs were not written to. + // Otherwise, the outputs must be ignored. + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + searching_q <= 1'b0; + end else begin + searching_q <= searching; + end + end + assign va_output_valid = searching_q; + + // Address offset for looking up the VA RAMs + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + offset_addr <= 0; + end else if (first_search) begin + offset_addr <= offset_start_addr + 1'b1; + end else if (searching) begin + offset_addr <= offset_addr + 1'b1; + end + end + + // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + offset_addr_d <= 0; + end else if (first_search) begin + offset_addr_d <= offset_start_addr; + end else if (searching) begin + offset_addr_d <= offset_addr_d + 1'b1; + end + end + + // Store the offset addr for hit to reduce latency for next search. + generate + if (HIT_OFFSET_STORE_WIDTH > 0) begin : OFFSET_STORE +`ifndef MULTI_HIT_FULL_SET + logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET. + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg; + + assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} }; + assign offset_end_addr = hit_offset_addr[set_num]-1'b1; + + // Register the hit addr + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_addr_reg <= 0; + end else if (hit_top) begin + hit_addr_reg <= hit_addr[hit_block_num]; + end + end + + // Store hit addr for each set. The next search in the same set will start from the saved addr. + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_offset_addr <= 0; + end else if (hit_o) begin + hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)]; + end + end +`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched. + assign offset_start_addr = 0; + assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; +`endif + end else begin // if (HIT_OFFSET_STORE_WIDTH > 0) + assign offset_start_addr = 0; + assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; + end + endgenerate + + assign prot_top = |prot; + + ////////////////////////////////////////////////////////////////////////////////////// + // check for hit, multi hit + // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit. + // In case of a multi hit in the same VA RAM, Port 0 is given priority. + always_comb begin : HIT_CHECK + hit_top = |hit; + hit_block_num = 0; + first_hit_top = 1'b0; + multi_hit_top = 1'b0; + for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin + if (hit[i] == 1'b1) begin +`ifdef MULTI_HIT_CUR_CYCLE + if (multi_hit[i] | first_hit_top ) begin + multi_hit_top = 1'b1; + end +`endif + first_hit_top = 1'b1; + hit_block_num = i; + end + end // for (int i=0; i= cfg_min) ? 1'b1 : 1'b0; +# assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0; +# assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0; +# +# assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max; +# assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren)); +# assign out_addr = in_addr_min - cfg_min + cfg_offset; +# +# endmodule +# +# diff --git a/src/iommu/axi_rab/ram_tp_no_change.py b/src/iommu/axi_rab/ram_tp_no_change.py new file mode 100644 index 00000000..958ffde3 --- /dev/null +++ b/src/iommu/axi_rab/ram_tp_no_change.py @@ -0,0 +1,81 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class ram_tp_no_change(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.we = Signal() # input + self.addr0 = Signal() # input + self.addr1 = Signal() # input + self.d_i = Signal() # input + self.d0_o = Signal() # output + self.d1_o = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.d0_o.eq(self.d0) + m.d.comb += self.d1_o.eq(self.d1) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# /* +# * ram_tp_no_change +# * +# * This code implements a parameterizable two-port memory. Port 0 can read and +# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with +# * Port 0 in "no change" mode, i.e., during a write, it retains the last read +# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it +# * outputs the old data during the write cycle. Note: Port 1 outputs invalid +# * data in the cycle after the write when reading the same address. +# * +# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. +# */ +# +# module ram_tp_no_change +# #( +# parameter ADDR_WIDTH = 10, +# parameter DATA_WIDTH = 36 +# ) +# ( +# input clk, +# input we, +# input [ADDR_WIDTH-1:0] addr0, +# input [ADDR_WIDTH-1:0] addr1, +# input [DATA_WIDTH-1:0] d_i, +# output [DATA_WIDTH-1:0] d0_o, +# output [DATA_WIDTH-1:0] d1_o +# ); +# +# localparam DEPTH = 2**ADDR_WIDTH; +# +# (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; +# reg [DATA_WIDTH-1:0] d0; +# reg [DATA_WIDTH-1:0] d1; +# +# always_ff @(posedge clk) begin +# if(we == 1'b1) begin +# ram[addr0] <= d_i; +# end else begin +# d0 <= ram[addr0]; +# end +# d1 <= ram[addr1]; +# end +# +# assign d0_o = d0; +# assign d1_o = d1; +# +#endmodule // ram_tp_no_change +# +# diff --git a/src/iommu/axi_rab/ram_tp_write_first.py b/src/iommu/axi_rab/ram_tp_write_first.py new file mode 100644 index 00000000..e1f5c029 --- /dev/null +++ b/src/iommu/axi_rab/ram_tp_write_first.py @@ -0,0 +1,79 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class ram_tp_write_first(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.we = Signal() # input + self.addr0 = Signal() # input + self.addr1 = Signal() # input + self.d_i = Signal() # input + self.d0_o = Signal() # output + self.d1_o = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.d0_o.eq(self.None) + m.d.comb += self.d1_o.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# /* +# * ram_tp_write_first +# * +# * This code implements a parameterizable two-port memory. Port 0 can read and +# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in +# * "write first" mode, i.e., upon a read and write to the same address, the +# * new value is read. Note: Port 1 outputs invalid data in the cycle after +# * the write when reading the same address. +# * +# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. +# */ +# +# module ram_tp_write_first +# #( +# parameter ADDR_WIDTH = 10, +# parameter DATA_WIDTH = 36 +# ) +# ( +# input clk, +# input we, +# input [ADDR_WIDTH-1:0] addr0, +# input [ADDR_WIDTH-1:0] addr1, +# input [DATA_WIDTH-1:0] d_i, +# output [DATA_WIDTH-1:0] d0_o, +# output [DATA_WIDTH-1:0] d1_o +# ); +# +# localparam DEPTH = 2**ADDR_WIDTH; +# +# (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; +# reg [ADDR_WIDTH-1:0] raddr0; +# reg [ADDR_WIDTH-1:0] raddr1; +# +# always_ff @(posedge clk) begin +# if(we == 1'b1) begin +# ram[addr0] <= d_i; +# end +# raddr0 <= addr0; +# raddr1 <= addr1; +# end +# +# assign d0_o = ram[raddr0]; +# assign d1_o = ram[raddr1]; +# +#endmodule // ram +# +# diff --git a/src/iommu/axi_rab/slice_top.py b/src/iommu/axi_rab/slice_top.py new file mode 100644 index 00000000..0419d21f --- /dev/null +++ b/src/iommu/axi_rab/slice_top.py @@ -0,0 +1,115 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class slice_top(Elaboratable): + + def __init__(self): + self.int_cfg_regs = Signal() # input + self.int_rw = Signal() # input + self.int_addr_min = Signal(ADDR_WIDTH_VIRT) # input + self.int_addr_max = Signal(ADDR_WIDTH_VIRT) # input + self.multi_hit_allow = Signal() # input + self.multi_hit = Signal() # output + self.prot = Signal(N_SLICES) # output + self.hit = Signal(N_SLICES) # output + self.cache_coherent = Signal() # output + self.out_addr = Signal(ADDR_WIDTH_PHYS) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module slice_top +# //#( +# // parameter N_SLICES = 16, +# // parameter N_REGS = 4*N_SLICES, +# // parameter ADDR_WIDTH_PHYS = 40, +# // parameter ADDR_WIDTH_VIRT = 32 +# // ) +# ( +# input logic [N_REGS-1:0] [63:0] int_cfg_regs, +# input logic int_rw, +# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min, +# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max, +# input logic multi_hit_allow, +# output logic multi_hit, +# output logic [N_SLICES-1:0] prot, +# output logic [N_SLICES-1:0] hit, +# output logic cache_coherent, +# output logic [ADDR_WIDTH_PHYS-1:0] out_addr +# ); +# +""" #docstring_begin + + logic first_hit; + + genvar i; + integer j; + + logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr; + + generate + for ( i=0; i