add working preprocessor (creates docstrings)
[sv2nmigen.git] / examples / load_store_unit.sv
diff --git a/examples/load_store_unit.sv b/examples/load_store_unit.sv
new file mode 100644 (file)
index 0000000..1d15633
--- /dev/null
@@ -0,0 +1,481 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 19.04.2017
+// Description: Load Store Unit, handles address calculation and memory interface signals
+
+//import ariane_pkg::*;
+
+module load_store_unit #(
+    parameter int ASID_WIDTH = 1
+ //   parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig
+)(
+    input  logic                     clk_i,
+    input  logic                     rst_ni,
+    input  logic                     flush_i,
+    output logic                     no_st_pending_o,
+    input  logic                     amo_valid_commit_i,
+
+
+    //input  fu_data_t                 fu_data_i,
+    output logic                     lsu_ready_o,              // FU is ready e.g. not busy
+    input  logic                     lsu_valid_i,              // Input is valid
+
+    output logic [TRANS_ID_BITS-1:0] load_trans_id_o,          // ID of scoreboard entry at which to write back
+    output logic [63:0]              load_result_o,
+    output logic                     load_valid_o,
+    //output exception_t               load_exception_o,         // to WB, signal exception status LD exception
+
+    output logic [TRANS_ID_BITS-1:0] store_trans_id_o,         // ID of scoreboard entry at which to write back
+    output logic [63:0]              store_result_o,
+    output logic                     store_valid_o,
+    //output exception_t               store_exception_o,        // to WB, signal exception status ST exception
+
+    input  logic                     commit_i,                 // commit the pending store
+    output logic                     commit_ready_o,           // commit queue is ready to accept another commit request
+
+    input  logic                     enable_translation_i,     // enable virtual memory translation
+    input  logic                     en_ld_st_translation_i,   // enable virtual memory translation for load/stores
+
+    // icache translation requests
+    //input  icache_areq_o_t           icache_areq_i,
+    //output icache_areq_i_t           icache_areq_o,
+
+    //input  riscv::priv_lvl_t         priv_lvl_i,               // From CSR register file
+    //input  riscv::priv_lvl_t         ld_st_priv_lvl_i,         // From CSR register file
+    input  logic                     sum_i,                    // From CSR register file
+    input  logic                     mxr_i,                    // From CSR register file
+    input  logic [43:0]              satp_ppn_i,               // From CSR register file
+    input  logic [ASID_WIDTH-1:0]    asid_i,                   // From CSR register file
+    input  logic                     flush_tlb_i,
+    // Performance counters
+    output logic                     itlb_miss_o,
+    output logic                     dtlb_miss_o
+
+    // interface to dcache
+    //input  dcache_req_o_t [2:0]      dcache_req_ports_i,
+    //output dcache_req_i_t [2:0]      dcache_req_ports_o,
+    // AMO interface
+    //output amo_req_t                 amo_req_o,
+    //input  amo_resp_t                amo_resp_i
+);
+    #docstring_begin
+    // data is misaligned
+    logic data_misaligned;
+    // --------------------------------------
+    // 1st register stage - (stall registers)
+    // --------------------------------------
+    // those are the signals which are always correct
+    // e.g.: they keep the value in the stall case
+    lsu_ctrl_t lsu_ctrl;
+
+    logic      pop_st;
+    logic      pop_ld;
+
+    // ------------------------------
+    // Address Generation Unit (AGU)
+    // ------------------------------
+    // virtual address as calculated by the AGU in the first cycle
+    logic [63:0] vaddr_i;
+    logic [7:0]  be_i;
+
+    assign vaddr_i = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
+
+    logic                     st_valid_i;
+    logic                     ld_valid_i;
+    logic                     ld_translation_req;
+    logic                     st_translation_req;
+    logic [63:0]              ld_vaddr;
+    logic [63:0]              st_vaddr;
+    logic                     translation_req;
+    logic                     translation_valid;
+    logic [63:0]              mmu_vaddr;
+    logic [63:0]              mmu_paddr;
+    exception_t               mmu_exception;
+    logic                     dtlb_hit;
+
+    logic                     ld_valid;
+    logic [TRANS_ID_BITS-1:0] ld_trans_id;
+    logic [63:0]              ld_result;
+    logic                     st_valid;
+    logic [TRANS_ID_BITS-1:0] st_trans_id;
+    logic [63:0]              st_result;
+
+    logic [11:0]              page_offset;
+    logic                     page_offset_matches;
+
+    exception_t               misaligned_exception;
+    exception_t               ld_ex;
+    exception_t               st_ex;
+
+    // -------------------
+    // MMU e.g.: TLBs/PTW
+    // -------------------
+    mmu #(
+        .INSTR_TLB_ENTRIES      ( 16                     ),
+        .DATA_TLB_ENTRIES       ( 16                     ),
+        .ASID_WIDTH             ( ASID_WIDTH             ),
+        .ArianeCfg              ( ArianeCfg              )
+    ) i_mmu (
+            // misaligned bypass
+        .misaligned_ex_i        ( misaligned_exception   ),
+        .lsu_is_store_i         ( st_translation_req     ),
+        .lsu_req_i              ( translation_req        ),
+        .lsu_vaddr_i            ( mmu_vaddr              ),
+        .lsu_valid_o            ( translation_valid      ),
+        .lsu_paddr_o            ( mmu_paddr              ),
+        .lsu_exception_o        ( mmu_exception          ),
+        .lsu_dtlb_hit_o         ( dtlb_hit               ), // send in the same cycle as the request
+        // connecting PTW to D$ IF
+        .req_port_i             ( dcache_req_ports_i [0] ),
+        .req_port_o             ( dcache_req_ports_o [0] ),
+        // icache address translation requests
+        .icache_areq_i          ( icache_areq_i          ),
+        .icache_areq_o          ( icache_areq_o          ),
+        .*
+    );
+    // ------------------
+    // Store Unit
+    // ------------------
+    store_unit i_store_unit (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .no_st_pending_o,
+
+        .valid_i               ( st_valid_i           ),
+        .lsu_ctrl_i            ( lsu_ctrl             ),
+        .pop_st_o              ( pop_st               ),
+        .commit_i,
+        .commit_ready_o,
+        .amo_valid_commit_i,
+
+        .valid_o               ( st_valid             ),
+        .trans_id_o            ( st_trans_id          ),
+        .result_o              ( st_result            ),
+        .ex_o                  ( st_ex                ),
+        // MMU port
+        .translation_req_o     ( st_translation_req   ),
+        .vaddr_o               ( st_vaddr             ),
+        .paddr_i               ( mmu_paddr            ),
+        .ex_i                  ( mmu_exception        ),
+        .dtlb_hit_i            ( dtlb_hit             ),
+        // Load Unit
+        .page_offset_i         ( page_offset          ),
+        .page_offset_matches_o ( page_offset_matches  ),
+        // AMOs
+        .amo_req_o,
+        .amo_resp_i,
+        // to memory arbiter
+        .req_port_i             ( dcache_req_ports_i [2] ),
+        .req_port_o             ( dcache_req_ports_o [2] )
+    );
+
+    // ------------------
+    // Load Unit
+    // ------------------
+    load_unit i_load_unit (
+        .valid_i               ( ld_valid_i           ),
+        .lsu_ctrl_i            ( lsu_ctrl             ),
+        .pop_ld_o              ( pop_ld               ),
+
+        .valid_o               ( ld_valid             ),
+        .trans_id_o            ( ld_trans_id          ),
+        .result_o              ( ld_result            ),
+        .ex_o                  ( ld_ex                ),
+        // MMU port
+        .translation_req_o     ( ld_translation_req   ),
+        .vaddr_o               ( ld_vaddr             ),
+        .paddr_i               ( mmu_paddr            ),
+        .ex_i                  ( mmu_exception        ),
+        .dtlb_hit_i            ( dtlb_hit             ),
+        // to store unit
+        .page_offset_o         ( page_offset          ),
+        .page_offset_matches_i ( page_offset_matches  ),
+        // to memory arbiter
+        .req_port_i            ( dcache_req_ports_i [1] ),
+        .req_port_o            ( dcache_req_ports_o [1] ),
+        .*
+    );
+
+    // ----------------------------
+    // Output Pipeline Register
+    // ----------------------------
+    shift_reg #(
+        .dtype ( logic[$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1: 0]),
+        .Depth ( NR_LOAD_PIPE_REGS )
+    ) i_pipe_reg_load (
+        .clk_i,
+        .rst_ni,
+        .d_i ( {ld_valid, ld_trans_id, ld_result, ld_ex} ),
+        .d_o ( {load_valid_o, load_trans_id_o, load_result_o, load_exception_o} )
+    );
+
+    shift_reg #(
+        .dtype ( logic[$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1: 0]),
+        .Depth ( NR_STORE_PIPE_REGS )
+    ) i_pipe_reg_store (
+        .clk_i,
+        .rst_ni,
+        .d_i ( {st_valid, st_trans_id, st_result, st_ex} ),
+        .d_o ( {store_valid_o, store_trans_id_o, store_result_o, store_exception_o} )
+    );
+
+    // determine whether this is a load or store
+    always_comb begin : which_op
+
+        ld_valid_i = 1'b0;
+        st_valid_i = 1'b0;
+
+        translation_req      = 1'b0;
+        mmu_vaddr            = 64'b0;
+
+        // check the operator to activate the right functional unit accordingly
+        unique case (lsu_ctrl.fu)
+            // all loads go here
+            LOAD:  begin
+                ld_valid_i           = lsu_ctrl.valid;
+                translation_req      = ld_translation_req;
+                mmu_vaddr            = ld_vaddr;
+            end
+            // all stores go here
+            STORE: begin
+                st_valid_i           = lsu_ctrl.valid;
+                translation_req      = st_translation_req;
+                mmu_vaddr            = st_vaddr;
+            end
+            // not relevant for the LSU
+            default: ;
+        endcase
+    end
+
+
+    // ---------------
+    // Byte Enable
+    // ---------------
+    // we can generate the byte enable from the virtual address since the last
+    // 12 bit are the same anyway
+    // and we can always generate the byte enable from the address at hand
+    assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operator));
+
+    // ------------------------
+    // Misaligned Exception
+    // ------------------------
+    // we can detect a misaligned exception immediately
+    // the misaligned exception is passed to the functional unit via the MMU, which in case
+    // can augment the exception if other memory related exceptions like a page fault or access errors
+    always_comb begin : data_misaligned_detection
+
+        misaligned_exception = {
+            64'b0,
+            64'b0,
+            1'b0
+        };
+
+        data_misaligned = 1'b0;
+
+        if (lsu_ctrl.valid) begin
+            case (lsu_ctrl.operator)
+                // double word
+                LD, SD, FLD, FSD,
+                AMO_LRD, AMO_SCD,
+                AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
+                AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
+                AMO_MINDU: begin
+                    if (lsu_ctrl.vaddr[2:0] != 3'b000) begin
+                        data_misaligned = 1'b1;
+                    end
+                end
+                // word
+                LW, LWU, SW, FLW, FSW,
+                AMO_LRW, AMO_SCW,
+                AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
+                AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
+                AMO_MINWU: begin
+                    if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
+                        data_misaligned = 1'b1;
+                    end
+                end
+                // half word
+                LH, LHU, SH, FLH, FSH: begin
+                    if (lsu_ctrl.vaddr[0] != 1'b0) begin
+                        data_misaligned = 1'b1;
+                    end
+                end
+                // byte -> is always aligned
+                default:;
+            endcase
+        end
+
+        if (data_misaligned) begin
+
+            if (lsu_ctrl.fu == LOAD) begin
+                misaligned_exception = {
+                    riscv::LD_ADDR_MISALIGNED,
+                    lsu_ctrl.vaddr,
+                    1'b1
+                };
+
+            end else if (lsu_ctrl.fu == STORE) begin
+                misaligned_exception = {
+                    riscv::ST_ADDR_MISALIGNED,
+                    lsu_ctrl.vaddr,
+                    1'b1
+                };
+            end
+        end
+
+        // we work with SV39, so if VM is enabled, check that all bits [63:38] are equal
+        if (en_ld_st_translation_i && !((&lsu_ctrl.vaddr[63:38]) == 1'b1 || (|lsu_ctrl.vaddr[63:38]) == 1'b0)) begin
+
+            if (lsu_ctrl.fu == LOAD) begin
+                misaligned_exception = {
+                    riscv::LD_ACCESS_FAULT,
+                    lsu_ctrl.vaddr,
+                    1'b1
+                };
+
+            end else if (lsu_ctrl.fu == STORE) begin
+                misaligned_exception = {
+                    riscv::ST_ACCESS_FAULT,
+                    lsu_ctrl.vaddr,
+                    1'b1
+                };
+            end
+        end
+    end
+
+    // ------------------
+    // LSU Control
+    // ------------------
+    // new data arrives here
+    lsu_ctrl_t lsu_req_i;
+
+    assign lsu_req_i = {lsu_valid_i, vaddr_i, fu_data_i.operand_b, be_i, fu_data_i.fu, fu_data_i.operator, fu_data_i.trans_id};
+
+    lsu_bypass lsu_bypass_i (
+        .lsu_req_i          ( lsu_req_i   ),
+        .lus_req_valid_i    ( lsu_valid_i ),
+        .pop_ld_i           ( pop_ld      ),
+        .pop_st_i           ( pop_st      ),
+
+        .lsu_ctrl_o         ( lsu_ctrl    ),
+        .ready_o            ( lsu_ready_o ),
+        .*
+    );
+#docstring_end
+endmodule
+
+#docstring_begin
+// ------------------
+// LSU Control
+// ------------------
+// The LSU consists of two independent block which share a common address translation block.
+// The one block is the load unit, the other one is the store unit. They will signal their readiness
+// with separate signals. If they are not ready the LSU control should keep the last applied signals stable.
+// Furthermore it can be the case that another request for one of the two store units arrives in which case
+// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a
+// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check,
+// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to.
+module lsu_bypass (
+    input  logic      clk_i,
+    input  logic      rst_ni,
+    input  logic      flush_i,
+
+    input  lsu_ctrl_t lsu_req_i,
+    input  logic      lus_req_valid_i,
+    input  logic      pop_ld_i,
+    input  logic      pop_st_i,
+
+    output lsu_ctrl_t lsu_ctrl_o,
+    output logic      ready_o
+    );
+
+    lsu_ctrl_t [1:0] mem_n, mem_q;
+    logic read_pointer_n, read_pointer_q;
+    logic write_pointer_n, write_pointer_q;
+    logic [1:0] status_cnt_n, status_cnt_q;
+
+    logic  empty;
+    assign empty = (status_cnt_q == 0);
+    assign ready_o = empty;
+
+    always_comb begin
+        automatic logic [1:0] status_cnt;
+        automatic logic write_pointer;
+        automatic logic read_pointer;
+
+        status_cnt = status_cnt_q;
+        write_pointer = write_pointer_q;
+        read_pointer = read_pointer_q;
+
+        mem_n = mem_q;
+        // we've got a valid LSU request
+        if (lus_req_valid_i) begin
+            mem_n[write_pointer_q] = lsu_req_i;
+            write_pointer++;
+            status_cnt++;
+        end
+
+        if (pop_ld_i) begin
+            // invalidate the result
+            mem_n[read_pointer_q].valid = 1'b0;
+            read_pointer++;
+            status_cnt--;
+        end
+
+        if (pop_st_i) begin
+            // invalidate the result
+            mem_n[read_pointer_q].valid = 1'b0;
+            read_pointer++;
+            status_cnt--;
+        end
+
+        if (pop_st_i && pop_ld_i)
+            mem_n = '0;
+
+        if (flush_i) begin
+            status_cnt = '0;
+            write_pointer = '0;
+            read_pointer = '0;
+            mem_n = '0;
+        end
+        // default assignments
+        read_pointer_n  = read_pointer;
+        write_pointer_n = write_pointer;
+        status_cnt_n    = status_cnt;
+    end
+
+    // output assignment
+    always_comb begin : output_assignments
+        if (empty) begin
+            lsu_ctrl_o = lsu_req_i;
+        end else begin
+            lsu_ctrl_o = mem_q[read_pointer_q];
+        end
+    end
+
+    // registers
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            mem_q           <= '0;
+            status_cnt_q    <= '0;
+            write_pointer_q <= '0;
+            read_pointer_q  <= '0;
+        end else begin
+            mem_q           <= mem_n;
+            status_cnt_q    <= status_cnt_n;
+            write_pointer_q <= write_pointer_n;
+            read_pointer_q  <= read_pointer_n;
+        end
+    end
+endmodule
+#docstring_end