// Copyright 2018 ETH Zurich and University of Bologna. // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law // or agreed to in writing, software, hardware and materials distributed under // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // // Author: Florian Zaruba, ETH Zurich // Date: 19.04.2017 // Description: Load Store Unit, handles address calculation and memory interface signals //import ariane_pkg::*; module load_store_unit #( parameter int ASID_WIDTH = 1 // parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig )( input logic clk_i, input logic rst_ni, input logic flush_i, output logic no_st_pending_o, input logic amo_valid_commit_i, //input fu_data_t fu_data_i, output logic lsu_ready_o, // FU is ready e.g. not busy input logic lsu_valid_i, // Input is valid output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back output logic [63:0] load_result_o, output logic load_valid_o, //output exception_t load_exception_o, // to WB, signal exception status LD exception output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back output logic [63:0] store_result_o, output logic store_valid_o, //output exception_t store_exception_o, // to WB, signal exception status ST exception input logic commit_i, // commit the pending store output logic commit_ready_o, // commit queue is ready to accept another commit request input logic enable_translation_i, // enable virtual memory translation input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores // icache translation requests //input icache_areq_o_t icache_areq_i, //output icache_areq_i_t icache_areq_o, //input riscv::priv_lvl_t priv_lvl_i, // From CSR register file //input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file input logic sum_i, // From CSR register file input logic mxr_i, // From CSR register file input logic [43:0] satp_ppn_i, // From CSR register file input logic [ASID_WIDTH-1:0] asid_i, // From CSR register file input logic flush_tlb_i, // Performance counters output logic itlb_miss_o, output logic dtlb_miss_o // interface to dcache //input dcache_req_o_t [2:0] dcache_req_ports_i, //output dcache_req_i_t [2:0] dcache_req_ports_o, // AMO interface //output amo_req_t amo_req_o, //input amo_resp_t amo_resp_i ); #docstring_begin // data is misaligned logic data_misaligned; // -------------------------------------- // 1st register stage - (stall registers) // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case lsu_ctrl_t lsu_ctrl; logic pop_st; logic pop_ld; // ------------------------------ // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle logic [63:0] vaddr_i; logic [7:0] be_i; assign vaddr_i = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); logic st_valid_i; logic ld_valid_i; logic ld_translation_req; logic st_translation_req; logic [63:0] ld_vaddr; logic [63:0] st_vaddr; logic translation_req; logic translation_valid; logic [63:0] mmu_vaddr; logic [63:0] mmu_paddr; exception_t mmu_exception; logic dtlb_hit; logic ld_valid; logic [TRANS_ID_BITS-1:0] ld_trans_id; logic [63:0] ld_result; logic st_valid; logic [TRANS_ID_BITS-1:0] st_trans_id; logic [63:0] st_result; logic [11:0] page_offset; logic page_offset_matches; exception_t misaligned_exception; exception_t ld_ex; exception_t st_ex; // ------------------- // MMU e.g.: TLBs/PTW // ------------------- mmu #( .INSTR_TLB_ENTRIES ( 16 ), .DATA_TLB_ENTRIES ( 16 ), .ASID_WIDTH ( ASID_WIDTH ), .ArianeCfg ( ArianeCfg ) ) i_mmu ( // misaligned bypass .misaligned_ex_i ( misaligned_exception ), .lsu_is_store_i ( st_translation_req ), .lsu_req_i ( translation_req ), .lsu_vaddr_i ( mmu_vaddr ), .lsu_valid_o ( translation_valid ), .lsu_paddr_o ( mmu_paddr ), .lsu_exception_o ( mmu_exception ), .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request // connecting PTW to D$ IF .req_port_i ( dcache_req_ports_i [0] ), .req_port_o ( dcache_req_ports_o [0] ), // icache address translation requests .icache_areq_i ( icache_areq_i ), .icache_areq_o ( icache_areq_o ), .* ); // ------------------ // Store Unit // ------------------ store_unit i_store_unit ( .clk_i, .rst_ni, .flush_i, .no_st_pending_o, .valid_i ( st_valid_i ), .lsu_ctrl_i ( lsu_ctrl ), .pop_st_o ( pop_st ), .commit_i, .commit_ready_o, .amo_valid_commit_i, .valid_o ( st_valid ), .trans_id_o ( st_trans_id ), .result_o ( st_result ), .ex_o ( st_ex ), // MMU port .translation_req_o ( st_translation_req ), .vaddr_o ( st_vaddr ), .paddr_i ( mmu_paddr ), .ex_i ( mmu_exception ), .dtlb_hit_i ( dtlb_hit ), // Load Unit .page_offset_i ( page_offset ), .page_offset_matches_o ( page_offset_matches ), // AMOs .amo_req_o, .amo_resp_i, // to memory arbiter .req_port_i ( dcache_req_ports_i [2] ), .req_port_o ( dcache_req_ports_o [2] ) ); // ------------------ // Load Unit // ------------------ load_unit i_load_unit ( .valid_i ( ld_valid_i ), .lsu_ctrl_i ( lsu_ctrl ), .pop_ld_o ( pop_ld ), .valid_o ( ld_valid ), .trans_id_o ( ld_trans_id ), .result_o ( ld_result ), .ex_o ( ld_ex ), // MMU port .translation_req_o ( ld_translation_req ), .vaddr_o ( ld_vaddr ), .paddr_i ( mmu_paddr ), .ex_i ( mmu_exception ), .dtlb_hit_i ( dtlb_hit ), // to store unit .page_offset_o ( page_offset ), .page_offset_matches_i ( page_offset_matches ), // to memory arbiter .req_port_i ( dcache_req_ports_i [1] ), .req_port_o ( dcache_req_ports_o [1] ), .* ); // ---------------------------- // Output Pipeline Register // ---------------------------- shift_reg #( .dtype ( logic[$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1: 0]), .Depth ( NR_LOAD_PIPE_REGS ) ) i_pipe_reg_load ( .clk_i, .rst_ni, .d_i ( {ld_valid, ld_trans_id, ld_result, ld_ex} ), .d_o ( {load_valid_o, load_trans_id_o, load_result_o, load_exception_o} ) ); shift_reg #( .dtype ( logic[$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1: 0]), .Depth ( NR_STORE_PIPE_REGS ) ) i_pipe_reg_store ( .clk_i, .rst_ni, .d_i ( {st_valid, st_trans_id, st_result, st_ex} ), .d_o ( {store_valid_o, store_trans_id_o, store_result_o, store_exception_o} ) ); // determine whether this is a load or store always_comb begin : which_op ld_valid_i = 1'b0; st_valid_i = 1'b0; translation_req = 1'b0; mmu_vaddr = 64'b0; // check the operator to activate the right functional unit accordingly unique case (lsu_ctrl.fu) // all loads go here LOAD: begin ld_valid_i = lsu_ctrl.valid; translation_req = ld_translation_req; mmu_vaddr = ld_vaddr; end // all stores go here STORE: begin st_valid_i = lsu_ctrl.valid; translation_req = st_translation_req; mmu_vaddr = st_vaddr; end // not relevant for the LSU default: ; endcase end // --------------- // Byte Enable // --------------- // we can generate the byte enable from the virtual address since the last // 12 bit are the same anyway // and we can always generate the byte enable from the address at hand assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operator)); // ------------------------ // Misaligned Exception // ------------------------ // we can detect a misaligned exception immediately // the misaligned exception is passed to the functional unit via the MMU, which in case // can augment the exception if other memory related exceptions like a page fault or access errors always_comb begin : data_misaligned_detection misaligned_exception = { 64'b0, 64'b0, 1'b0 }; data_misaligned = 1'b0; if (lsu_ctrl.valid) begin case (lsu_ctrl.operator) // double word LD, SD, FLD, FSD, AMO_LRD, AMO_SCD, AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, AMO_MINDU: begin if (lsu_ctrl.vaddr[2:0] != 3'b000) begin data_misaligned = 1'b1; end end // word LW, LWU, SW, FLW, FSW, AMO_LRW, AMO_SCW, AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, AMO_MINWU: begin if (lsu_ctrl.vaddr[1:0] != 2'b00) begin data_misaligned = 1'b1; end end // half word LH, LHU, SH, FLH, FSH: begin if (lsu_ctrl.vaddr[0] != 1'b0) begin data_misaligned = 1'b1; end end // byte -> is always aligned default:; endcase end if (data_misaligned) begin if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { riscv::LD_ADDR_MISALIGNED, lsu_ctrl.vaddr, 1'b1 }; end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { riscv::ST_ADDR_MISALIGNED, lsu_ctrl.vaddr, 1'b1 }; end end // we work with SV39, so if VM is enabled, check that all bits [63:38] are equal if (en_ld_st_translation_i && !((&lsu_ctrl.vaddr[63:38]) == 1'b1 || (|lsu_ctrl.vaddr[63:38]) == 1'b0)) begin if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { riscv::LD_ACCESS_FAULT, lsu_ctrl.vaddr, 1'b1 }; end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { riscv::ST_ACCESS_FAULT, lsu_ctrl.vaddr, 1'b1 }; end end end // ------------------ // LSU Control // ------------------ // new data arrives here lsu_ctrl_t lsu_req_i; assign lsu_req_i = {lsu_valid_i, vaddr_i, fu_data_i.operand_b, be_i, fu_data_i.fu, fu_data_i.operator, fu_data_i.trans_id}; lsu_bypass lsu_bypass_i ( .lsu_req_i ( lsu_req_i ), .lus_req_valid_i ( lsu_valid_i ), .pop_ld_i ( pop_ld ), .pop_st_i ( pop_st ), .lsu_ctrl_o ( lsu_ctrl ), .ready_o ( lsu_ready_o ), .* ); #docstring_end endmodule #docstring_begin // ------------------ // LSU Control // ------------------ // The LSU consists of two independent block which share a common address translation block. // The one block is the load unit, the other one is the store unit. They will signal their readiness // with separate signals. If they are not ready the LSU control should keep the last applied signals stable. // Furthermore it can be the case that another request for one of the two store units arrives in which case // the LSU control should sample it and store it for later application to the units. It does so, by storing it in a // two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check, // TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to. module lsu_bypass ( input logic clk_i, input logic rst_ni, input logic flush_i, input lsu_ctrl_t lsu_req_i, input logic lus_req_valid_i, input logic pop_ld_i, input logic pop_st_i, output lsu_ctrl_t lsu_ctrl_o, output logic ready_o ); lsu_ctrl_t [1:0] mem_n, mem_q; logic read_pointer_n, read_pointer_q; logic write_pointer_n, write_pointer_q; logic [1:0] status_cnt_n, status_cnt_q; logic empty; assign empty = (status_cnt_q == 0); assign ready_o = empty; always_comb begin automatic logic [1:0] status_cnt; automatic logic write_pointer; automatic logic read_pointer; status_cnt = status_cnt_q; write_pointer = write_pointer_q; read_pointer = read_pointer_q; mem_n = mem_q; // we've got a valid LSU request if (lus_req_valid_i) begin mem_n[write_pointer_q] = lsu_req_i; write_pointer++; status_cnt++; end if (pop_ld_i) begin // invalidate the result mem_n[read_pointer_q].valid = 1'b0; read_pointer++; status_cnt--; end if (pop_st_i) begin // invalidate the result mem_n[read_pointer_q].valid = 1'b0; read_pointer++; status_cnt--; end if (pop_st_i && pop_ld_i) mem_n = '0; if (flush_i) begin status_cnt = '0; write_pointer = '0; read_pointer = '0; mem_n = '0; end // default assignments read_pointer_n = read_pointer; write_pointer_n = write_pointer; status_cnt_n = status_cnt; end // output assignment always_comb begin : output_assignments if (empty) begin lsu_ctrl_o = lsu_req_i; end else begin lsu_ctrl_o = mem_q[read_pointer_q]; end end // registers always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin mem_q <= '0; status_cnt_q <= '0; write_pointer_q <= '0; read_pointer_q <= '0; end else begin mem_q <= mem_n; status_cnt_q <= status_cnt_n; write_pointer_q <= write_pointer_n; read_pointer_q <= read_pointer_n; end end endmodule #docstring_end