From 2181f86483fad92e5d250df5990e0b4f2adf3c08 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 25 Jul 2018 04:29:58 +0100 Subject: [PATCH] add core --- src/core/PTWalk.bsv | 277 +++++++ src/core/alu.bsv | 154 ++++ src/core/branchpredictor.bsv | 92 +++ src/core/core.bsv | 274 +++++++ src/core/csr.bsv | 1129 ++++++++++++++++++++++++++ src/core/dTLB.bsv | 393 +++++++++ src/core/dcache_asic.bsv | 745 +++++++++++++++++ src/core/dcache_asic_generic.bsv | 652 +++++++++++++++ src/core/decode.defines | 204 +++++ src/core/decode_opfetch.bsv | 190 +++++ src/core/decoder.bsv | 215 +++++ src/core/defined_parameters.bsv | 485 +++++++++++ src/core/defined_types.bsv | 562 +++++++++++++ src/core/dmem.bsv | 179 ++++ src/core/execute_stage.bsv | 374 +++++++++ src/core/fetch_stage.bsv | 175 ++++ src/core/fpu/fpu.bsv | 581 +++++++++++++ src/core/fpu/fpu_compare_min_max.bsv | 220 +++++ src/core/fpu/fpu_convert_sp_dp.bsv | 282 +++++++ src/core/fpu/fpu_divider.bsv | 667 +++++++++++++++ src/core/fpu/fpu_dp_to_int.bsv | 311 +++++++ src/core/fpu/fpu_fclass.bsv | 197 +++++ src/core/fpu/fpu_fm_add_sub.bsv | 1037 +++++++++++++++++++++++ src/core/fpu/fpu_int_to_dp.bsv | 140 ++++ src/core/fpu/fpu_int_to_sp.bsv | 152 ++++ src/core/fpu/fpu_sign_injection.bsv | 102 +++ src/core/fpu/fpu_sp_to_int.bsv | 305 +++++++ src/core/fpu/fpu_sqrt.bsv | 511 ++++++++++++ src/core/fpu/integer_divider.bsv | 186 +++++ src/core/fpu/integermultiplier.bsv | 83 ++ src/core/iTLB.bsv | 307 +++++++ src/core/icache.bsv | 484 +++++++++++ src/core/icache_asic.bsv | 468 +++++++++++ src/core/imem.bsv | 180 ++++ src/core/mem_config1.bsv | 92 +++ src/core/memory_stage.bsv | 179 ++++ src/core/muldiv.bsv | 319 ++++++++ src/core/prf.bsv | 131 +++ src/core/registerfile.bsv | 136 ++++ src/core/riscv.bsv | 357 ++++++++ 40 files changed, 13527 insertions(+) create mode 100644 src/core/PTWalk.bsv create mode 100644 src/core/alu.bsv create mode 100644 src/core/branchpredictor.bsv create mode 100644 src/core/core.bsv create mode 100644 src/core/csr.bsv create mode 100755 src/core/dTLB.bsv create mode 100644 src/core/dcache_asic.bsv create mode 100644 src/core/dcache_asic_generic.bsv create mode 100644 src/core/decode.defines create mode 100644 src/core/decode_opfetch.bsv create mode 100644 src/core/decoder.bsv create mode 100644 src/core/defined_parameters.bsv create mode 100644 src/core/defined_types.bsv create mode 100644 src/core/dmem.bsv create mode 100644 src/core/execute_stage.bsv create mode 100644 src/core/fetch_stage.bsv create mode 100644 src/core/fpu/fpu.bsv create mode 100644 src/core/fpu/fpu_compare_min_max.bsv create mode 100644 src/core/fpu/fpu_convert_sp_dp.bsv create mode 100644 src/core/fpu/fpu_divider.bsv create mode 100644 src/core/fpu/fpu_dp_to_int.bsv create mode 100755 src/core/fpu/fpu_fclass.bsv create mode 100644 src/core/fpu/fpu_fm_add_sub.bsv create mode 100644 src/core/fpu/fpu_int_to_dp.bsv create mode 100644 src/core/fpu/fpu_int_to_sp.bsv create mode 100644 src/core/fpu/fpu_sign_injection.bsv create mode 100644 src/core/fpu/fpu_sp_to_int.bsv create mode 100644 src/core/fpu/fpu_sqrt.bsv create mode 100644 src/core/fpu/integer_divider.bsv create mode 100644 src/core/fpu/integermultiplier.bsv create mode 100644 src/core/iTLB.bsv create mode 100644 src/core/icache.bsv create mode 100644 src/core/icache_asic.bsv create mode 100644 src/core/imem.bsv create mode 100644 src/core/mem_config1.bsv create mode 100644 src/core/memory_stage.bsv create mode 100644 src/core/muldiv.bsv create mode 100644 src/core/prf.bsv create mode 100644 src/core/registerfile.bsv create mode 100644 src/core/riscv.bsv diff --git a/src/core/PTWalk.bsv b/src/core/PTWalk.bsv new file mode 100644 index 0000000..7e7ff67 --- /dev/null +++ b/src/core/PTWalk.bsv @@ -0,0 +1,277 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package PTWalk; + +import ConfigReg:: *; +import defined_types::*; +import GetPut::*; + + + function TLB_permissions ptw_to_tlb_perms(Bit#(10) perms); + return TLB_permissions { v : perms[0], + r : perms[1], + w : perms[2], + x : perms[3], + u : perms[4], + g : perms[5], + a : perms[6], + d : perms[7]}; + endfunction + + //interface Ifc_TLB#(numeric type vaddr, numeric type page_size, numeric type paddr, numeric type asid_width); + // method Action get_vpn(Request_PPN_PTW#(vaddr,page_size) req); + // method ActionValue#(Response_PPN_TLB#(paddr, page_size, asid_width)) send_ppn; + //endinterface + + interface Ifc_memory#(numeric type data_width); + method ActionValue#(Request_PTE_memory#(data_width)) send_PTE_pointer; + method Action get_PTE(Bit#(data_width) pte); + endinterface + + interface Ifc_PTWalk#(numeric type data_width, + numeric type vaddr, + numeric type paddr, + numeric type real_paddr, + numeric type asid_width, + numeric type page_size); + interface Put#(Request_PPN_PTW#(vaddr,page_size)) frm_TLB; + interface Get#(Tuple2#(Bool,To_TLB#(real_paddr,page_size,asid_width))) to_TLB; + interface Ifc_memory#(data_width) ifc_memory; + method Action satp_frm_csr(Bit#(data_width) satp); + method Action flush(Translation_type _flush); + //method Bool ptwalkdone; + //method Maybe#(Translation_type) page_fault; + endinterface + + module mkPTWalk(Ifc_PTWalk#(data_width,vaddr_width,paddr_width, real_paddr, asid_width,page_size_bits)) + provisos( + Mul#(8,no_of_bytes, data_width), + Log#(no_of_bytes, addressable_bits), + Log#(data_width, data_width_bits), + Add#(vpn_width, page_size_bits, vaddr_width), + Add#(vpn_split, addressable_bits, page_size_bits), + Add#(ppn_width, page_size_bits, paddr_width), + Add#(real_ppn_width, page_size_bits, real_paddr), + Mul#(vpn_split, levels, vpn_width), + Add#(10, ppn_width, ppn_perm), + Add#(y_, ppn_width, data_width), + Add#(x_, real_ppn_width, data_width), + Add#(a_, ppn_perm, data_width), + Add#(b_, paddr_width, data_width), + Add#(c_, 10, data_width), + Add#(d_, vpn_width, data_width), + Add#(1, sub_levels, levels) + ); + + let v_data_width = valueOf(data_width); + let v_data_width_bits = valueOf(data_width_bits); + let v_vpn_width = valueOf(vpn_width); + let v_vpn_split = valueOf(vpn_split); + let v_ppn_width = valueOf(ppn_width); + let v_real_ppn_width = valueOf(real_ppn_width); + let v_asid_width = valueOf(asid_width); + let v_levels = valueOf(levels); + let v_sub_levels = valueOf(sub_levels); + let v_addressable_bits = valueOf(addressable_bits); + + function Tuple2#(Bool, Bit#(ppn_perm)) fn_super_page_physical_address(Bit#(vpn_width) vpn, + Bit#(data_width) pte, + Int#(32) levels); + Bool page_fault = !unpack(pte[0]); + Bit#(ppn_width) step_ppn = 0; + if(levels==3) begin + Bit#(TMul#(3,vpn_split)) offset_3 = pte[3*v_vpn_split+9:10]; + Bit#(TSub#(ppn_width,TMul#(3,vpn_split))) ppn= pte[v_ppn_width+9:3*v_vpn_split+10]; + Bit#(TMul#(3,vpn_split)) vpn_offset = vpn[3*v_vpn_split-1:0]; + step_ppn = {ppn,vpn_offset}; + if(offset_3!=0) + page_fault = True; + end + else if(levels==2) begin + Bit#(TMul#(2,vpn_split)) offset_2 = pte[2*v_vpn_split+9:10]; + Bit#(TSub#(ppn_width,TMul#(2,vpn_split))) ppn= pte[v_ppn_width+9:2*v_vpn_split+10]; + Bit#(TMul#(2,vpn_split)) vpn_offset = vpn[2*v_vpn_split-1:0]; + step_ppn = {ppn,vpn_offset}; + if(offset_2!=0) + page_fault = True; + end + else if(levels==1) begin + Bit#(TMul#(1,vpn_split)) offset_1 = pte[1*v_vpn_split+9:10]; + Bit#(TSub#(ppn_width,TMul#(1,vpn_split))) ppn= pte[v_ppn_width+9:1*v_vpn_split+10]; + Bit#(TMul#(1,vpn_split)) vpn_offset = vpn[1*v_vpn_split-1:0]; + step_ppn = {ppn,vpn_offset}; + if(offset_1!=0) + page_fault = True; + end + return tuple2(page_fault,{step_ppn,pte[9:0]}); + endfunction + + Reg#(Bit#(vpn_width)) rg_vpn <- mkReg(0); + Reg#(Int#(32)) rg_levels <- mkConfigReg(fromInteger(v_sub_levels)); + Reg#(Bit#(data_width)) rg_ppn[2] <- mkCReg(2,0); + Reg#(Bit#(asid_width)) rg_asid[2] <- mkCReg(2,0); + Reg#(Bit#(data_width)) rg_pte <- mkReg(0); + Reg#(Bit#(data_width)) rg_pte_pointer <- mkReg(0); + Reg#(PTW_state) rg_ptw_state[2] <- mkCReg(2,PTW_ready); + Reg#(Bit#(data_width)) rg_satp <- mkReg(0); + Reg#(Bool) rg_page_fault <- mkReg(False); + Reg#(Bit#(10)) rg_permission_bits <- mkReg(0); + Reg#(Translation_type) rg_page_type <- mkConfigReg(Load); + Wire#(Bool) wr_flush <- mkDWire(False); + + //(*conflict_free="rl_computer_next_pointer, rl_return_from_page_fault"*) + rule rl_computer_next_pointer(rg_ptw_state[1] == Handling_PTW && !wr_flush); + Int#(32) vpn_trnct = (rg_levels+1)*fromInteger(v_vpn_split); + Bit#(vpn_split) lv_vpn_split = rg_vpn[vpn_trnct-1:vpn_trnct-fromInteger(v_vpn_split)]; + Bit#(page_size_bits) vpn_addr = zeroExtend(lv_vpn_split); + vpn_addr = vpn_addr << v_addressable_bits; + `ifdef verbose $display($time, "\tPTW: The VPN split bits are %d split is %h", vpn_trnct, vpn_addr); `endif + Int#(32) ppn_trnct = (rg_levels)*fromInteger(v_vpn_split) + 10; + Bit#(ppn_width) p_pte= rg_pte[v_ppn_width+9:10]; + Bit#(data_width) lv_zeros = 0; + rg_permission_bits <= rg_pte[9:0]; + `ifdef verbose $display($time, "\tPTW: page table entry %h and page level is %d", rg_pte, rg_levels); `endif + if(rg_pte[0]==0 || (rg_pte[1]==0 && rg_pte[2]==1)) begin + rg_ptw_state[1] <= PTW_done; + `ifdef verbose $display($time,"\tPTW: Page Fault due to reason1 "); `endif + rg_page_fault <= True; + end + else if((rg_pte[3]==1 || rg_pte[1]==1)) begin //if executable and read permission bits are 1 then it is a super page + rg_ptw_state[1] <= PTW_done; + match{.x,.y} = fn_super_page_physical_address(rg_vpn,rg_pte,rg_levels+1); + if(x) begin + rg_page_fault <= True; + `ifdef verbose $display($time,"Page Fault due to reason2 "); `endif + end + else begin + rg_pte <= zeroExtend(y); + `ifdef verbose $display($time,"Superpage has been found"); `endif + if(rg_pte[6]==0 || (rg_page_type==Store && rg_pte[7]==0)) + rg_page_fault <= True; + end + //if(rg_pte[ppn_trnct-1:10]!=0) begin TODO + //Bit#(data_width) pte_paddr = rg_pte << ppn_trnct; + //Bit#(vpn_width) pte_vpn = rg_vpn << rg_levels*(fromInteger(v_vpn_split)); + //Bit#(data_width) pte_vaddr = zeroExtend(pte_vpn); + //pte_vaddr = pte_vaddr >> rg_levels*fromInteger(v_vpn_split); + //rg_pte_pointer <= pte_paddr | pte_vaddr; + //`ifdef verbose $display($time, "\t It's is a superpage %h", pte_paddr | pte_vaddr); `endif + //end + //else + // rg_page_fault <= True; + end + else begin/* if(rg_levels == 0) begin + rg_ptw_state[1] <= Wait_for_memory; + rg_pte_pointer <= {rg_pte[v_data_width-1:10],lv_zeros[9:0]}; + `ifdef verbose + Bit#(data_width) pte_pointer = {rg_pte[v_data_width-1:10],lv_zeros[9:0]}; + $display($time, "\t %h", pte_pointer); `endif + end + else if(rg_levels != 0) begin*/ + Bit#(paddr_width) lv_pte_pointer = {p_pte,vpn_addr}; + rg_pte_pointer <= zeroExtend(lv_pte_pointer); + rg_ptw_state[1] <= Send_to_memory; + `ifdef verbose $display($time, "\t next page table pointer %h", lv_pte_pointer); `endif + end + + endrule + + rule rl_return_from_page_fault(wr_flush); + `ifdef verbose $display($time, "\tPTW: Flushed page table walk"); `endif + rg_ptw_state[1] <= PTW_ready; + rg_page_fault <= False; + rg_levels <= fromInteger(v_sub_levels); + endrule + + interface frm_TLB = interface Put + method Action put(Request_PPN_PTW#(vaddr_width,page_size_bits) req) if(rg_ptw_state[1]==PTW_ready && !wr_flush); + rg_vpn <= req.vpn; + rg_page_type <= req.page_type; + `ifdef verbose $display($time, "\tPTW: vpn obtained is %h", req.vpn); `endif + rg_ptw_state[1] <= Handling_PTW; + endmethod + endinterface; + + interface to_TLB = interface Get + method ActionValue#(Tuple2#(Bool,To_TLB#(real_paddr, page_size_bits, asid_width))) get if(rg_ptw_state[1]==PTW_done && !wr_flush); + rg_page_fault <= False; + rg_ptw_state[1] <= PTW_ready; + Bit#(2) pg_levels = truncate(pack(rg_levels) + 1); + rg_levels <= fromInteger(v_sub_levels); + if(rg_levels==fromInteger(v_sub_levels)) + pg_levels=0; + `ifdef verbose $display($time, "\tPTW: physical page number %h with permission bits %b", rg_pte, rg_page_fault); `endif + return tuple2(rg_page_fault, To_TLB { ppn : rg_pte[v_real_ppn_width+9:10], + tlb_perm : ptw_to_tlb_perms(rg_pte[9:0]), + asid : rg_asid[1], + levels : pg_levels}); + endmethod + endinterface; + + interface ifc_memory = interface Ifc_memory + method ActionValue#(Request_PTE_memory#(data_width)) send_PTE_pointer if(!wr_flush && rg_ptw_state[1]== Send_to_memory); + `ifdef verbose $display($time, "\tPTW: Sending from PTW to dcache for address %h", rg_pte_pointer); `endif + rg_ptw_state[1] <= Wait_for_memory; + return Request_PTE_memory{ptwdone : (rg_ptw_state[1]==PTW_done), address : rg_pte_pointer, page_type : rg_page_type}; + endmethod + method Action get_PTE(Bit#(data_width) pte) if(rg_ptw_state[1]== Wait_for_memory && !wr_flush); + `ifdef verbose $display($time, "\tPTW: pte obtained from memory %h", pte); `endif + rg_pte <= pte; + if(rg_levels == 0) begin + `ifdef verbose $display($time, "\tPTW: Last level PTW with pte %h and page fault", pte, rg_page_fault); `endif + rg_ptw_state[1] <= PTW_done; + if(pte[0]==0 || pte[6]==0 || (rg_page_type==Store && pte[7]==0)) begin + `ifdef verbose $display($time, "\tPTW: Access and dirty page fault %h", pte); `endif + rg_page_fault <= True; + end + rg_levels <= fromInteger(v_sub_levels); + end + else begin + rg_ptw_state[1] <= Handling_PTW; + rg_levels <= rg_levels-1; + end + endmethod + endinterface; + + method Action satp_frm_csr(Bit#(data_width) satp) if(rg_ptw_state[1] == PTW_ready); + Bit#(ppn_width) p_pte = satp[v_ppn_width-1:0]; + //Bit#(data_width) pte = zeroExtend(p_pte); + Bit#(10) perm_bits = 'b1; + Bit#(ppn_perm) pte ={p_pte,perm_bits}; + rg_pte <= zeroExtend(pte); + //`ifdef verbose $display($time, "\t page table pointer %h", p_pte); `endif + rg_asid[0] <= satp[v_asid_width+v_ppn_width-1:v_ppn_width]; + endmethod + + method Action flush(Translation_type _flush); + if((rg_page_type==Execution && _flush==Execution) || (rg_page_type!=Execution && _flush!=Execution)) + wr_flush <= True; + endmethod + + //method Bool ptwalkdone; + // Bool done = False; + // if(rg_ptw_state[1]==PTW_done) + // done = True; + // return done; + //endmethod + + //method Maybe#(Translation_type) page_fault /*if(rg_ptw_state[1]==PTW_done)*/; + // if(rg_page_fault) + // return tagged Valid rg_page_type; + // else + // return tagged Invalid; + //endmethod + + endmodule +endpackage diff --git a/src/core/alu.bsv b/src/core/alu.bsv new file mode 100644 index 0000000..ba56e46 --- /dev/null +++ b/src/core/alu.bsv @@ -0,0 +1,154 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Module name: Riscv_arithmetic_unit. +author name: Neel Gala +Email id: neelgala@gmail.com + +This module is the arithmetic execution unit for the RISCV ISA. It is a 64 bit implementation which is named as RV64. +The instruction with a "W" are RV64 instructions which ignore the upper 32 bits and operate on the lower 32 bits. +The arithmetic unit is implemented as a single case statement where the instruction bits define the various operations to be executed. + +This module contains single cycle MUL instruction execution. + +*/ + +package alu; + +import defined_types::*; +`include "defined_parameters.bsv" +`include "decode.defines" + (*noinline*) + function Tuple7#(Execution_output, Bit#(`VADDR), Flush_type, Maybe#(Training_data#(`VADDR)),Maybe#(Bit#(`VADDR)), Trap_type, Bit#(`PERFMONITORS)) fn_alu(Bit#(4) fn, Bit#(64) op1, Bit#(64) op2, Bit#(64) immediate_value, Bit#(`VADDR) pc , + Instruction_type inst_type, Bit#(`VADDR) npc, Bit#(3) funct3, Access_type mem_access, Bit#(5) rd, Bit#(2) prediction, + Bit#(`PERFMONITORS) perfmonitors + `ifdef RV64 ,Bool word32 `endif ); +// TODO: use the pc of the previous stage for next-pc. This will save space in the FIFOs +// But what if the instruction in the previous stage has not yet been enqueued (in cases of page/cache misses) +// In this case you will have to wait untill that instruction arrives before progressing. NEED TO THINK THIS THROUGH + /*========= Perform all the arithmetic ===== */ + // ADD* ADDI* SUB* + let inv_op2=(fn[3]==1)?~op2:op2; + let op1_xor_op2=op1^inv_op2; + let adder_output=op1+inv_op2+zeroExtend(fn[3]); + // SLT SLTU + Bit#(1) compare_out=fn[0]^( + (fn[3]==0)?pack(op1_xor_op2==0): + (op1[64-1]==op2[64-1])?adder_output[64-1]: + (fn[1]==1)?op2[64-1]:op1[64-1]); + // SLL SRL SRA + Bit#(6) shift_amt={((!word32)?op2[5]:0),op2[4:0]}; + Bit#(32) upper_bits=word32?signExtend(fn[3]&op1[31]):op1[63:32]; + Bit#(64) shift_inright={upper_bits,op1[31:0]}; + let shin = (fn==`FNSR || fn==`FNSRA)?shift_inright:reverseBits(shift_inright); + Int#(TAdd#(64,1)) t=unpack({(fn[3]&shin[64-1]),shin}); + Int#(64) shift_r=unpack(pack(t>>shift_amt)[64-1:0]); + let shift_l=reverseBits(pack(shift_r)); + Bit#(64) shift_output=((fn==`FNSR || fn==`FNSRA)?pack(shift_r):0) | + ((fn==`FNSL)? pack(shift_l):0); + // AND OR XOR + let logic_output= ((fn==`FNXOR || fn==`FNOR)?op1_xor_op2:0) | + ((fn==`FNOR || fn==`FNAND)?op1&op2:0); + let shift_logic=zeroExtend(pack(fn==`FNSEQ || fn==`FNSNE || fn >= `FNSLT)&compare_out) | + logic_output|shift_output; + Bit#(64) final_output = (fn==`FNADD || fn==`FNSUB)?adder_output:shift_logic; + if(word32) + final_output=signExtend(final_output[31:0]); + if(inst_type==MEMORY && mem_access==Atomic) // TODO see if this can be avoided + final_output=op1; + /*============================================ */ + /*====== generate the effective address to jump to ====== */ + Bit#(`VADDR) branch_address=truncate(immediate_value)+pc; + Bit#(`VADDR) next_pc=pc+4; + Bit#(`VADDR) effective_address=0; + Bit#(2) new_state=prediction; + if(inst_type==JAL || inst_type==JALR) + new_state='b11; + else if(final_output[0]==1)begin + if(new_state<3) + new_state=new_state+1; + end + else begin + if(new_state>0) + new_state=new_state-1; + end + Training_data#(`VADDR) bp_train = Training_data{pc:pc,branch_address:branch_address,state:new_state}; + Maybe#(Training_data#(`VADDR)) training_data=tagged Invalid; + Maybe#(Bit#(`VADDR)) ras_push=tagged Invalid; + + if(inst_type==BRANCH && final_output[0]==1) + perfmonitors[`COND_BRANCH_TAKEN]=1; + + if((inst_type==BRANCH && final_output[0]==1) || inst_type==JAL) + effective_address=branch_address; + else if(inst_type==FENCEI || (inst_type==BRANCH && final_output[0]==0)) + effective_address=next_pc; + else begin + effective_address=truncate(final_output); + bp_train.branch_address=truncate(final_output); + end + if(inst_type==JAL || inst_type==JALR) + final_output=signExtend(next_pc); + `ifdef simulate + if(inst_type==BRANCH) + final_output=0; + `endif + /*======================================================== */ + /*==== Generate flush if prediction was wrong or FENCEI ========== */ + if(inst_type==BRANCH || inst_type==JAL || ((rd != 'b00101 || rd!='b00001) && inst_type==JALR)) + training_data=tagged Valid bp_train; + Flush_type flush=None; + if((inst_type==BRANCH || inst_type==JAL || inst_type==JALR) && effective_address!=npc)begin + if(inst_type==BRANCH) + perfmonitors[`COND_BRANCH_MISPREDICTED]=1; + flush=AccessFlush; + end + else if(inst_type==FENCEI) + flush=Fence; + if((inst_type==JAL||inst_type==JALR) &&& rd matches 'b00?01) // TODO put on RAS only if rd = ra + ras_push=tagged Valid next_pc; + /*================================================================ */ + Trap_type exception=tagged None; + if((inst_type==JALR || inst_type==JAL) && effective_address[1]!=0) + exception=tagged Exception Inst_addr_misaligned; + Execution_output result; + if(inst_type==MEMORY || inst_type==FENCE || inst_type == FENCEI)begin + result= tagged MEMORY (Memout{ + address:final_output, + memory_data:immediate_value, + transfer_size:zeroExtend(funct3[1:0]), + signextend:~funct3[2], + mem_type:(inst_type==FENCE || inst_type==FENCEI)?Fence:mem_access + `ifdef atomic ,atomic_op:{pack(word32),fn} `endif }); + end + else if(inst_type==SYSTEM_INSTR)begin + result=tagged SYSTEM (CSRInputs{rs1:op1,rs2:op2,rs1_addr:immediate_value[16:12],funct3:funct3,csr_address:immediate_value[11:0]}); + end + else + result=tagged RESULT (Arithout{aluresult:final_output,fflags:0}); + return tuple7(result,effective_address,flush, training_data,ras_push,exception,perfmonitors); + endfunction + +// module mkTb(Empty); +// +// rule test_alu; +// Bit#(64) op1='h8000000000004123; +// Int#(64) in1=unpack(op1); +// Bit#(64) op2='h8000000000004123; +// let {x,ea,flush}<-fn_alu(`FNSNE,op1,op2,'d0,'d0,BRANCH,'h800,'d3,Load,False); +// //$display("Output is: :%h Excepted: %h",x,(op1!=op2)); +// $finish(0); +// endrule +// endmodule + +endpackage diff --git a/src/core/branchpredictor.bsv b/src/core/branchpredictor.bsv new file mode 100644 index 0000000..68cd7c9 --- /dev/null +++ b/src/core/branchpredictor.bsv @@ -0,0 +1,92 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package branchpredictor; + /*===== Pacakge imports ===== */ + import BRAMCore::*; + import FIFO::*; + import FIFOF::*; + import SpecialFIFOs::*; + import LFSR::*; + import ConfigReg::*; + import DReg::*; + import Connectable::*; + import GetPut::*; + /*===== project imports==== */ + import defined_types::*; + `include "defined_parameters.bsv" + /*========================= */ + + interface Ifc_branchpredictor; + interface Put#(Tuple2#(Bit#(3),Bit#(`VADDR))) send_prediction_request; + interface Get#(Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2))) prediction_response; + method Action training (Maybe#(Training_data#(`VADDR)) training_data); + endinterface + + (*synthesize*) + module mkbranchpredictor(Ifc_branchpredictor); + let btb_sizebits=valueOf(TLog#(`BTB_DEPTH)); + let tag_sizebits=(`VADDR-(btb_sizebits+2)); + let max_size=tag_sizebits+3; + BRAM_DUAL_PORT#(Bit#(TLog#(`BTB_DEPTH)),Bit#(`VADDR)) rg_target_addr <- mkBRAMCore2(valueOf(`BTB_DEPTH),False); + BRAM_DUAL_PORT#(Bit#(TLog#(`BTB_DEPTH)),Bit#(TAdd#(3,TSub#(TSub#(`VADDR, TLog#(`BTB_DEPTH)),2)))) rg_tag <- mkBRAMCore2(valueOf(`BTB_DEPTH),False); + Reg#(Bit#(TSub#(TSub#(`VADDR,TLog#(`BTB_DEPTH)),2))) training_tag <-mkReg(0); + Reg#(Bit#(TLog#(`BTB_DEPTH))) training_index <-mkReg(0); + Reg#(Bool) rg_initialize <-mkReg(True); + Reg#(Bit#(TAdd#(1,TLog#(`BTB_DEPTH)))) rg_index<-mkReg(0); + FIFOF#(Tuple2#(Bit#(3),Bit#(`VADDR))) capture_prediction_request <-mkLFIFOF(); + rule initialize_brams(rg_initialize); + rg_tag.b.put(True,truncate(rg_index),{3'b001,'d0}); + if(rg_index==(`BTB_DEPTH-1))begin + rg_initialize<=False; + rg_index<=0; + end + else + rg_index<=rg_index+1; + endrule + interface send_prediction_request = interface Put + method Action put(Tuple2#(Bit#(3),Bit#(`VADDR)) req)if(!rg_initialize); + let {epoch,vaddress} = req; + `ifdef verbose $display($time,"\tBPU: Prediction Request for Address: %h",vaddress); `endif + rg_target_addr.a.put(False,vaddress[btb_sizebits+1:2],?); + rg_tag.a.put(False,vaddress[btb_sizebits+1:2],?); + capture_prediction_request.enq(req); + endmethod + endinterface; + interface prediction_response = interface Get + method ActionValue#(Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2))) get if(!rg_initialize); + let {epoch,vaddress} = capture_prediction_request.first; + Bit#(`VADDR) target_address=rg_target_addr.a.read; + let info=rg_tag.a.read; + Bit#(TSub#(TSub#(`VADDR,btb_sizebits),2)) tag=info[tag_sizebits-1:0]; + Bit#(TSub#(TSub#(`VADDR, TLog#(`BTB_DEPTH)),2)) cpu_tag=vaddress[`VADDR-1:btb_sizebits+2]; + Bit#(1) valid=info[tag_sizebits+2]; + Bit#(1) tag_match=pack(tag==cpu_tag)&valid; + Bit#(2) state=(tag_match==1)?info[tag_sizebits+1:tag_sizebits]:'b01; + let x= tuple4(epoch,vaddress,target_address,state); + capture_prediction_request.deq; + return x; + endmethod + endinterface; + method Action training (Maybe#(Training_data#(`VADDR)) training_data)if(!rg_initialize); //to train the bpu; + if(training_data matches tagged Valid .td)begin + let addr=td.branch_address; + Bit#(TLog#(`BTB_DEPTH)) index=td.pc[btb_sizebits+1:2]; + Bit#(TSub#(TSub#(`VADDR, TLog#(`BTB_DEPTH)),2)) tag=td.pc[`VADDR-1:btb_sizebits+2]; + `ifdef verbose $display($time,"\tBPU: training for PC: %h JumpAddr: %h index: %d State:",td.pc,addr,index,fshow(td.state)); `endif + rg_target_addr.b.put(True,td.pc[btb_sizebits+1:2],addr); + rg_tag.b.put(True,td.pc[btb_sizebits+1:2],{1,td.state,tag}); + end + endmethod + endmodule +endpackage diff --git a/src/core/core.bsv b/src/core/core.bsv new file mode 100644 index 0000000..54902ff --- /dev/null +++ b/src/core/core.bsv @@ -0,0 +1,274 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package core; + + /* ======== Package imports ======= */ + import Vector :: *; + import FIFO :: *; + import ConfigReg ::*; + import Connectable :: *; + /*================================== */ + + /*========= Project imports ======== */ + `include "defined_parameters.bsv" + import defined_types ::*; + import Semi_FIFOF ::*; + import AXI4_Types :: *; + import AXI4_Fabric :: *; + import riscv :: *; + import imem ::*; + import dmem ::*; + import GetPut ::*; + import PTWalk ::*; + /*================================== */ + + + interface Ifc_external_interrupt; + method Action enqueueInterrupts; + endinterface + + interface Ifc_core_AXI4; + interface AXI4_Master_IFC#(`PADDR, `Reg_width, `USERSPACE) imem_master; + interface AXI4_Master_IFC#(`PADDR, `Reg_width, `USERSPACE) dmem_master; + method Action set_external_interrupt(Tuple2#(Bool,Bool) i); + method Action boot_sequence(Bit#(1) bootseq); + /* =========================== Debug Interface ===================== */ + `ifdef Debug + method Action reset; + method Action run_continue; // Execute all instructions until the end of instruction stream + method Bool reset_complete; + method Action stop; // Stop CPU + method Bool halted (); + method Bit#(`Reg_width) read_igpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_igpr (Bit#(5) r, Bit#(`Reg_width) d); // Write into a General-Purpose Register + `ifdef spfpu + method Bit#(`Reg_width) read_fgpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_fgpr (Bit#(5) r, Bit#(`Reg_width) d); // Write into a General-Purpose Register + `endif + method ActionValue#(Bit#(`Reg_width)) rw_csr (Bit#(12) r, Bool write, Bit#(`Reg_width) data); // Read a General-Purpose Register + `endif + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt); + method Action clint_mtip(Bit#(1) intrpt); + method Action clint_mtime(Bit#(`Reg_width) c_mtime); + `endif + /*-========================================================================== */ + endinterface + + typedef enum {Handling_Dcache,Handling_Icache,Idle,Handling_Uncacheable} Controller_State deriving (Bits, Eq, FShow); + + //(*mutually_exclusive="mkConnectionGetPut_7,mkConnectionGetPut_8") + `ifdef MMU (*preempts="dtlb_to_ptw,itlb_to_ptw"*) `endif + //`ifdef MMU (*preempts="mkConnectionGetPut_4,mkConnectionGetPut_3"*) `endif + (*synthesize*) + module mkcore_AXI4#(Bit#(`VADDR) reset_vector)(Ifc_core_AXI4); + Ifc_riscv riscv <-mkriscv(reset_vector); + AXI4_Master_Xactor_IFC #(`PADDR,`Reg_width,`USERSPACE) imem_xactor <- mkAXI4_Master_Xactor; + AXI4_Master_Xactor_IFC #(`PADDR,`Reg_width,`USERSPACE) dmem_xactor <- mkAXI4_Master_Xactor; + Ifc_imem imem <-mkimem(); + Ifc_dmem dmem <- mkdmem; + Ifc_PTWalk#(`ADDR, `VADDR, 56, `PADDR, `ASID, `OFFSET) ptw <- mkPTWalk; + Wire#(Bit#(`Reg_width)) wr_pte <- mkWire(); + Reg#(Bool) rg_serve_dTLB <- mkReg(False); + + mkConnection(riscv.request_to_imem,imem.request_from_core); + mkConnection(imem.instruction_response_to_core,riscv.instruction_response_from_imem); + mkConnection(riscv.request_to_dmem, dmem.request_from_cpu); + mkConnection(dmem.response_to_cpu, riscv.response_from_dmem); + `ifdef MMU + rule itlb_to_ptw; + let x <- imem.to_PTW.get; + ptw.frm_TLB.put(x); + rg_serve_dTLB <=False; + endrule + rule dtlb_to_ptw; + let x <- dmem.to_PTW.get; + ptw.frm_TLB.put(x); + rg_serve_dTLB <=True; + endrule + rule ptw_to_itlb(!rg_serve_dTLB); + let x <- ptw.to_TLB.get; + imem.refill_TLB.put(x); + endrule + rule ptw_to_dtlb(rg_serve_dTLB); + let x <- ptw.to_TLB.get; + dmem.refill_TLB.put(x); + endrule + //mkConnection(imem.to_PTW, ptw.frm_TLB); + //mkConnection(dmem.to_PTW, ptw.frm_TLB); + //mkConnection(ptw.to_TLB, imem.refill_TLB); + //mkConnection(ptw.to_TLB, dmem.refill_TLB); + `endif + mkConnection(imem.prediction_response,riscv.prediction_response); + mkConnection(riscv.send_prediction_request,imem.send_prediction_request); + rule connect_training; + imem.training(riscv.training_data); + endrule +// (*conflict_free="connect_flush_to_imem,connect_flush_to_dmem"*) +// rule connect_flush_to_imem; +// Translation_type page_type = Execution; +// if(riscv.flush_imem!=None) begin +// ptw.flush(page_type); +// end +// endrule + rule connect_flush_to_dmem; + Translation_type page_type = Load; + if(riscv.flush_dmem) + dmem.flush(); + // if(riscv.flush_dmem) begin + // ptw.flush(page_type); + // end + endrule + + `ifdef MMU + //rule fence_iTLB; + // `ifdef verbose $display($time ,"\tCORE: iTLB is being flushed"); `endif + // imem.fence_itlb(dmem.fence_itlb); + //endrule + rule send_permissions_to_tlb; + ptw.satp_frm_csr(riscv.send_satp); + imem.translation_protection_frm_csr(riscv.mmu_cache_disable[0], + riscv.perm_to_TLB, + riscv.send_satp[`Reg_width-1:`Reg_width-`ASID-4]); + dmem.translation_protection_frm_csr(riscv.mmu_cache_disable[0], + riscv.perm_to_TLB, + riscv.send_satp[`Reg_width-1:`Reg_width-`ASID-4]); + endrule + rule send_pte_pointer; + let x <- ptw.ifc_memory.send_PTE_pointer; + dmem.get_pte_pointer(x); + endrule + rule get_pte_entry; + let x <- dmem.send_pte; + wr_pte <= x; + endrule + rule send_pte_entry; + ptw.ifc_memory.get_PTE(wr_pte); + endrule + rule fence_tlbs; + dmem.fence_dtlb(riscv.fence_tlbs); + imem.fence_itlb(riscv.fence_tlbs); + endrule + `endif + rule fence_stall_icache; + imem.stall_fetch(dmem.stall_fetch); + endrule + Reg#(Bool) rg_update_a_bit <- mkReg(False); + Reg#(Bool) rg_update_b_bit <- mkReg(False); + Reg#(Maybe#(Bit#(TMul#(8,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))))) rg_data_line <-mkReg(tagged Invalid); + Reg#(Bit#(8)) rg_burst_count<-mkReg(0); + Reg#(Bool) rg_wait_for_response[2]<-mkCReg(2,False); + rule check_read_request_to_memory_from_dcache; + let info<-dmem.request_to_memory_read; + Bit#(2) arburst=2; + if(info.burst_length==1) + arburst=1; + let read_request = AXI4_Rd_Addr {araddr: truncate(info.address), aruser: 0, arlen: info.burst_length-1, arsize: zeroExtend(info.transfer_size), arburst: arburst, arid:'d0}; // arburst: 00-FIXED 01-INCR 10-WRAP + dmem_xactor.i_rd_addr.enq(read_request); + `ifdef verbose $display($time,"\tCORE: Sending Read Request from DCACHE for Address: %h Burst Length: %h",info.address,info.burst_length); `endif + endrule + rule check_write_request_to_memory_from_dcache(rg_data_line matches tagged Invalid &&& !rg_wait_for_response[1]); + let info<-dmem.request_to_memory_write; + /*=== Need to shift the data apprpriately while sending write requests===== */ + Bit#(`Reg_width) actual_data=info.data_line[`Reg_width-1:0]; + Bit#(8) write_strobe=info.transfer_size==0?8'b1:info.transfer_size==1?8'b11:info.transfer_size==2?8'hf:8'hff; + if(info.transfer_size!=3)begin // 8-bit write; + write_strobe=write_strobe<<(info.address[`byte_offset:0]); + end +// info.address[2:0]=0; // also make the address 64-bit aligned + /*========================================================================= */ + let aw = AXI4_Wr_Addr {awaddr: truncate(info.address), awuser:0, awlen: info.burst_length-1, awsize: zeroExtend(info.transfer_size), awburst: 'b01, awid:'d0}; // arburst: 00-FIXED 01-INCR 10-WRAP + let w = AXI4_Wr_Data {wdata: actual_data, wstrb: write_strobe, wlast:info.burst_length>1?False:True, wid:'d0}; + dmem_xactor.i_wr_addr.enq(aw); + dmem_xactor.i_wr_data.enq(w); + `ifdef verbose $display($time,"\tCORE: Sending Write Request from DCACHE for Address: %h BurstLength: %h Data: %h WriteStrobe: %b",info.address,info.burst_length,info.data_line, write_strobe); `endif + if(info.burst_length>1)begin // only enable the next rule when doing a line write in burst mode. + rg_data_line<=tagged Valid (info.data_line>>`Reg_width); + rg_burst_count<=rg_burst_count+1; + end + rg_wait_for_response[1]<=True; + endrule + rule send_burst_write_data(rg_data_line matches tagged Valid .data_line); + /*== Since this is going to always be a line write request in burst mode No need of shifting data and address=== */ + let w = AXI4_Wr_Data {wdata: truncate(data_line), wstrb: 8'hff , wlast:(rg_burst_count==`DCACHE_BLOCK_SIZE-1), wid:'d0}; + dmem_xactor.i_wr_data.enq(w); + `ifdef verbose $display($time,"\tCORE: Sending DCACHE Write Data: %h Burst: %d",data_line,rg_burst_count); `endif + if(rg_burst_count==`DCACHE_BLOCK_SIZE-1)begin + rg_burst_count<=0; + rg_data_line<=tagged Invalid; + end + else begin + rg_data_line<=tagged Valid (data_line>>`Reg_width); + rg_burst_count<=rg_burst_count+1; + end + endrule + rule check_read_request_to_memory_from_icache; + let info <-imem.request_to_memory; + let read_request = AXI4_Rd_Addr {araddr: truncate(info.address), aruser: 0, arlen: info.burst_length-1, arsize: info.transfer_size, arburst: 'b10, arid:'d1}; // arburst: 00-FIXED 01-INCR 10-WRAP + imem_xactor.i_rd_addr.enq(read_request); + `ifdef verbose $display($time,"\tCORE: Sending Read Request from ICACHE for Address: %h Burst Length: %h",info.address,info.burst_length); `endif + endrule + rule send_read_response_from_memory_to_dcache(dmem_xactor.o_rd_data.first.rid == 'd0); + let response <- pop_o (dmem_xactor.o_rd_data); + let bus_error_from_memory = (response.rresp==AXI4_OKAY) ? 0 : 1; + `ifdef verbose $display($time,"\tCORE: Sending Response to DCACHE: ",fshow(response)); `endif + dmem.response_from_memory_read(From_Memory{data_line:response.rdata,bus_error:bus_error_from_memory,last_word:response.rlast}); + endrule + rule send_read_response_from_memory_to_icache(imem_xactor.o_rd_data.first.rid == 'd1); + let response <- pop_o (imem_xactor.o_rd_data); + let bus_error_from_memory = (response.rresp==AXI4_OKAY) ? 0 : 1; + `ifdef verbose $display($time,"\tCORE: Sending Response to ICACHE: ",fshow(response)); `endif + imem.response_from_memory(From_Memory{data_line:response.rdata,bus_error:bus_error_from_memory, last_word:response.rlast}); + endrule + rule send_write_response_to_dcache(rg_wait_for_response[0] && dmem_xactor.o_wr_resp.first.bid == 'd0); + let response<-pop_o(dmem_xactor.o_wr_resp); + let bus_error_from_memory = (response.bresp==AXI4_OKAY) ? 0 : 1; + `ifdef verbose $display($time,"\tCORE: Received Write Response:",fshow(response)); `endif + dmem.response_from_memory_write(From_Memory{data_line:0,bus_error:bus_error_from_memory,last_word:True}); + rg_wait_for_response[0]<=False; + endrule + + interface imem_master = imem_xactor.axi_side; + interface dmem_master = dmem_xactor.axi_side; + `ifdef Debug + method run_continue=riscv.run_continue; + method reset_complete=riscv.reset_complete; + method stop=riscv.stop; + method halted=riscv.halted; + method Bit#(`Reg_width)read_igpr(Bit#(5) r); + return riscv.read_debug_igpr(r); + endmethod + method Action write_igpr(Bit#(5) r, Bit#(`Reg_width)d); + riscv.write_debug_igpr(r,d); + endmethod + `ifdef spfpu + method Bit#(`Reg_width) read_fgpr(Bit#(5) r); + return riscv.read_debug_fgpr(r); + endmethod + method Action write_fgpr(Bit#(5) r, Bit#(`Reg_width)d); + riscv.write_debug_fgpr(r,d); + endmethod + `endif + method ActionValue#(Bit#(`Reg_width)) rw_csr (Bit#(12) r, Bool write, Bit#(`Reg_width) data)=riscv.rw_csr(r,write,data); + method Action reset=riscv.reset; + `endif + method Action boot_sequence(Bit#(1) bootseq)=riscv.boot_sequence(bootseq); + method Action set_external_interrupt(Tuple2#(Bool,Bool) i)=riscv.set_external_interrupt(i); + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt)=riscv.clint_msip(intrpt); + method Action clint_mtip(Bit#(1) intrpt)=riscv.clint_mtip(intrpt); + method Action clint_mtime(Bit#(`Reg_width) c_mtime)=riscv.clint_mtime(c_mtime); + `endif + endmodule +endpackage diff --git a/src/core/csr.bsv b/src/core/csr.bsv new file mode 100644 index 0000000..704db61 --- /dev/null +++ b/src/core/csr.bsv @@ -0,0 +1,1129 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package csr; + +/* ############################ changes from 1.9.1 ############################ + 1. base field in misa is changed to readOnlyfield of mxl as per the encoding + and the same is reflected in sxl and uxl fields of mstatus + 2. change of fields in mstatus. +*/ + + import defined_types::*; + `include "defined_parameters.bsv" + import ConfigReg::*; + import GetPut ::*; + import ConcatReg ::*; + import Vector::*; + + interface Ifc_csr; + method Bit#(3) roundingmode; + method Action set_external_interrupt(Tuple2#(Bool,Bool) i); + method Action flush; + (*always_ready,always_enabled*) + method Action boot_sequence(Bit#(1) bootseq); + /*======= MMU related interfaces ===== */ + `ifdef MMU + method Bit#(`Reg_width) send_satp; + method Chmod perm_to_TLB; + `endif + method Bit#(`Reg_width) mmu_cache_disable; + /*=========================================== */ + /*=========== Debug related interfaces ====== */ + `ifdef Debug + method Bool halted; + method ActionValue#(Bit#(`Reg_width)) rw_debug_csr(Bit#(12) r, Bool write, Bit#(`Reg_width) data); + method TriggerData load_triggerdata; + method TriggerData store_triggerdata; + method Bool step_now; + method Bool reset_mode; + `endif + /*=========================================== */ + method ActionValue#(Tuple2#(Bit#(3),Trap_type)) check_for_trap(`ifdef Debug Bool haltreq, Bool resumereq, Bool resetreq, `endif Bit#(`VADDR) pc, Bit#(32) instruction); + method ActionValue#(Tuple4#(Bool,Bit#(`VADDR),Bit#(`Reg_width),Bool)) system_instruction(WriteBackType wbdata ,Bit#(`VADDR) pc, Bit#(`PERFMONITORS) perfmonitor_incr `ifdef simulate , Bit#(32) instruction, Operand_type rd_type, Bit#(5) destination `endif ); + method ActionValue#(Tuple2#(Bit#(`VADDR), Bool)) take_trap(Trap_type exception, Bit#(3) lv_debugcause, Bit#(`VADDR) pc, Bit#(`VADDR) badaddr); + method Bit#(`Reg_width) misa; + method Bit#(2) powercontrol; + method Action poweracknowledge(Bit#(2) pa); + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt); + method Action clint_mtip(Bit#(1) intrpt); + method Action clint_mtime(Bit#(`Reg_width) c_mtime); + `endif + method Bit#(2) inferred_xlen; + endinterface + + function String csr_funct(Bit#(3) funct3); + case(funct3) + 'd1:return "CSRRW"; + 'd2:return "CSRRS"; + 'd3:return "CSRRC"; + 'd5:return "CSRRWI" ; + 'd6:return "CSRRSI"; + 'd7:return "CSRRCI" ; + default: return "NOIDEA"; + endcase + endfunction + + function Reg#(t) readOnlyReg(t r); + return (interface Reg; + method t _read = r; + method Action _write(t x) = noAction; + endinterface); + endfunction + + function Reg#(Bit#(a)) extInterruptReg(Reg#(Bit#(a)) r1, Reg#(Bit#(a)) r2); + return (interface Reg; + method Bit#(a) _read = r1 | r2; + method Action _write(Bit#(a) x); + r1._write(x); + endmethod + endinterface); + endfunction + + function Reg#(t) writeSideEffect(Reg#(t) r, Action a); + return (interface Reg; + method t _read = r._read; + method Action _write(t x); + r._write(x); + a; + endmethod + endinterface); +endfunction + + (*synthesize*) + (*conflict_free="take_trap, set_external_interrupt"*) + (*conflict_free="check_for_trap,system_instruction"*) + module mkcsr(Ifc_csr); + + Reg#(Bool) rg_initialize[2]<-mkCReg(2,False); + Reg#(Bit#(12)) rg_index<-mkReg(0); + `ifdef simulate + Reg#(Bool) wr_endsimulation <-mkReg(False); + Reg#(Bit#(1)) rg_cnt <-mkReg(0); + let dump <- mkReg(InvalidFile) ; + rule open_file(rg_cnt==0); + String dumpFile = "rtl.dump" ; + File lfh <- $fopen( dumpFile, "w" ) ; + if ( lfh == InvalidFile )begin + `ifdef verbose $display("cannot open %s", dumpFile); `endif + $finish(0); + end + dump <= lfh ; + rg_cnt <= 1 ; + endrule + `endif + /////////////////////////////// Machine level register ///////////////////////// + // Current Privilege Level + Reg#(Privilege_mode) rg_prv <- mkConfigReg(Machine); // resets to machine mode + + + Reg#(Bit#(`Reg_width)) csr_mvendorid = readOnlyReg(0); + Reg#(Bit#(`Reg_width)) csr_marchid = readOnlyReg(0); + Reg#(Bit#(`Reg_width)) csr_mimpid = readOnlyReg(0); + Reg#(Bit#(`Reg_width)) csr_mhartid = readOnlyReg(0); + //misa fields + Reg#(Bit#(2)) rg_mxl <- mkReg(2); + Bit#(26) temp_misa='d0; + temp_misa[8]=1; + temp_misa[20]=1; + `ifdef atomic temp_misa[0]=1; `endif + `ifdef dpfpu temp_misa[3]=1; `endif + `ifdef spfpu temp_misa[5]=1; `endif + `ifdef muldiv temp_misa[12]=1; `endif + `ifdef MMU temp_misa[18]=1; `endif + `ifdef Openocd + Reg#(Bit#(26)) rg_misa <- mkReg(temp_misa); + `else + Reg#(Bit#(26)) rg_misa <- mkReg(`MISA_BITS); + `endif + Reg#(Bit#(`Reg_width)) csr_misa = concatReg3(rg_mxl,readOnlyReg(0),rg_misa); + + // trap vector fields (same as CSR without bottom 2 bits) + Reg#(Bit#(2)) rg_mode_m <- mkReg(0); //default value 0 if pc to base or 1 if pc to base + 4xcause + Reg#(Bit#(TSub#(`PADDR,2))) rg_mtvec <- mkReg(`MTVEC_DEFAULT); + Reg#(Bit#(`Reg_width)) csr_mtvec=concatReg3(readOnlyReg(0),rg_mtvec, rg_mode_m); + + // mstatus fields + Reg#(Bit#(1)) rg_tsr <- mkReg(0); // WARL + Reg#(Bit#(1)) rg_tw <- mkReg(0); // WARL + Reg#(Bit#(1)) rg_tvm <- mkReg(0); // WARL + Reg#(Bit#(1)) rg_mxr <- mkReg(0); // Not required + Reg#(Bit#(1)) rg_sum <- mkReg(0); // Not required + Reg#(Bit#(1)) rg_mprv <- mkReg(0); + Reg#(Bit#(2)) rg_xs = readOnlyReg(0); + Reg#(Bit#(2)) rg_fs <- mkReg(2'b00); + Reg#(Bit#(2)) rg_mpp <- mkReg(2'b0); + Reg#(Bit#(2)) rg_hpp = readOnlyReg(0); + Reg#(Bit#(1)) rg_spp <- mkReg(0); + Reg#(Bit#(1)) rg_mpie <- mkReg(0); + Reg#(Bit#(1)) rg_hpie = readOnlyReg(0); + Reg#(Bit#(1)) rg_spie <- mkReg(0); + Reg#(Bit#(1)) rg_upie <- mkReg(0); + `ifdef Openocd + Reg#(Bit#(1)) rg_mie <- mkReg(1); + `else + Reg#(Bit#(1)) rg_mie <- mkReg(0); + `endif + Reg#(Bit#(1)) rg_hie = readOnlyReg(0); + Reg#(Bit#(1)) rg_sie <- mkReg(0); + Reg#(Bit#(1)) rg_uie <- mkReg(0); + Reg#(Bit#(1)) rg_sd = readOnlyReg(pack((rg_xs == 2'b11) || (rg_fs == 2'b11))); + Reg#(Bit#(`Reg_width)) csr_mstatus = concatReg24( + rg_sd, + readOnlyReg(0), + readOnlyReg(rg_mxl), readOnlyReg(rg_mxl), //sxl and uxl fields are hardwired to mxl in misa + readOnlyReg(9'b0), + rg_tsr, rg_tw, rg_tvm, + rg_mxr, rg_sum, rg_mprv, // memory privilege + rg_xs, rg_fs, // coprocessor states + rg_mpp, rg_hpp, rg_spp, // previous privileges + rg_mpie, rg_hpie, rg_spie, rg_upie, // previous interrupt enables + rg_mie, rg_hie, rg_sie, rg_uie); // interrupt enables + + // trap delegation fields + Reg#(Bit#(16)) rg_medeleg<-mkReg(0); + Reg#(Bit#(15)) rg_mideleg<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_medeleg = concatReg2(readOnlyReg(0),rg_medeleg); + Reg#(Bit#(`Reg_width)) csr_mideleg = concatReg2(readOnlyReg(0),rg_mideleg); + + // mie fields + Reg#(Bit#(1)) rg_meie <- mkReg(0); + Reg#(Bit#(1)) rg_heie = readOnlyReg(0); + Reg#(Bit#(1)) rg_seie <- mkReg(0); + Reg#(Bit#(1)) rg_ueie <- mkReg(0); + Reg#(Bit#(1)) rg_mtie <- mkReg(0); + Reg#(Bit#(1)) rg_htie = readOnlyReg(0); + Reg#(Bit#(1)) rg_stie <- mkReg(0); + Reg#(Bit#(1)) rg_utie <- mkReg(0); + Reg#(Bit#(1)) rg_msie <- mkReg(0); + Reg#(Bit#(1)) rg_hsie = readOnlyReg(0); + Reg#(Bit#(1)) rg_ssie <- mkReg(0); + Reg#(Bit#(1)) rg_usie <- mkReg(0); + `ifdef Openocd + Reg#(Bit#(1)) rg_dhalt<-mkReg(1); + Reg#(Bit#(1)) rg_dresume<-mkReg(1); + `else + Reg#(Bit#(1)) rg_dhalt<-mkReg(0); + Reg#(Bit#(1)) rg_dresume<-mkReg(0); + `endif + Reg#(Bit#(1)) rg_dreset<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_mie = concatReg16( + readOnlyReg(0), + rg_dreset,rg_dresume,rg_dhalt, + rg_meie, rg_heie, rg_seie, readOnlyReg(rg_ueie), + rg_mtie, rg_htie, rg_stie, readOnlyReg(rg_utie), + rg_msie, rg_hsie, rg_ssie, readOnlyReg(rg_usie)); + Reg#(Bool) rg_nmi <- mkReg(True); + + // mip fields + Reg#(Bit#(1)) rg_meip <- mkConfigReg(0); + Reg#(Bit#(1)) rg_heip = readOnlyReg(0); + Reg#(Bit#(1)) rg_seips <- mkReg(0); + Reg#(Bit#(1)) rg_seipe <- mkReg(0); + Reg#(Bit#(1)) rg_ueips <- mkReg(0); + Reg#(Bit#(1)) rg_ueipe <- mkReg(0); + Reg#(Bit#(1)) rg_seip = extInterruptReg(rg_seips,rg_seipe); + Reg#(Bit#(1)) rg_ueip = extInterruptReg(rg_ueips,rg_ueipe); + Reg#(Bit#(1)) rg_mtip <-mkReg(0); + Reg#(Bit#(1)) rg_htip = readOnlyReg(0); + Reg#(Bit#(1)) rg_stip <- mkReg(0); + Reg#(Bit#(1)) rg_utip <- mkReg(0); + Reg#(Bit#(1)) rg_msip <- mkReg(0); + Reg#(Bit#(1)) rg_hsip = readOnlyReg(0); + Reg#(Bit#(1)) rg_ssip <- mkReg(0); + Reg#(Bit#(1)) rg_usip <- mkReg(0); + + `ifdef RV64 + Reg#(Bit#(`Reg_width)) csr_mcycle[2]<-mkCReg(2,0); + Reg#(Bit#(`Reg_width)) csr_minstret[2]<-mkCReg(2,0); + `else + Reg#(Bit#(`Reg_width)) csr_mcycle[2]<-mkCReg(2,0); + Reg#(Bit#(`Reg_width)) csr_minstret[2]<-mkCReg(2,0); + Reg#(Bit#(`Reg_width)) csr_mcycleh[2]<-mkCReg(2,0); + Reg#(Bit#(`Reg_width)) csr_minstreth[2]<-mkCReg(2,0); + `endif + + // Machine Trap Handling + Reg#(Bit#(`Reg_width)) rg_mepc <- mkReg(0); + Reg#(Bit#(`VADDR)) rg_mtval <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_mscratch <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_mepc = rg_mepc; + Reg#(Bit#(1)) rg_interrupt <- mkReg(0); + Reg#(Bit#(31)) rg_lower_cause <- mkReg(0); + Reg#(Bit#(32)) rg_upper_cause <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_mcause= concatReg3(rg_interrupt, rg_upper_cause, rg_lower_cause); + Reg#(Bit#(`Reg_width)) csr_mtval = concatReg2(readOnlyReg(0), rg_mtval); + Reg#(Bit#(`Reg_width)) csr_mip = concatReg13( + readOnlyReg(0), + readOnlyReg(rg_meip), readOnlyReg(rg_heip), rg_seip, rg_ueip, + readOnlyReg(rg_mtip), readOnlyReg(rg_htip), rg_stip, rg_utip, + readOnlyReg(rg_msip), readOnlyReg(rg_hsip), rg_ssip, rg_usip); + + Reg#(Bit#(`Reg_width)) mip = concatReg13( + readOnlyReg(0), + rg_meip, rg_heip, rg_seip, rg_ueip, + rg_mtip, rg_htip, rg_stip, rg_utip, + rg_msip, rg_hsip, rg_ssip, rg_usip); + ////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////Physical Memory Protection//////////////////////////// + Reg#(Bit#(`Reg_width)) csr_pmpcfg0 <- mkReg(0); + `ifndef RV64 + Reg#(Bit#(`Reg_width)) csr_pmpcfg1 <- mkReg(0); + `endif + Reg#(Bit#(`Reg_width)) csr_pmpcfg2 <- mkReg(0); + `ifndef RV64 + Reg#(Bit#(`Reg_width)) csr_pmpcfg3 <- mkReg(0); + `endif + + Reg#(Bit#(TSub#(`PADDR,2))) rg_pmpaddr[`PMPADDREND - `PMPADDRSTART +1]; + Reg#(Bit#(`Reg_width)) csr_pmpaddr[`PMPADDREND - `PMPADDRSTART +1]; + for(Integer i=0; i<(`PMPADDREND - `PMPADDRSTART +1); i =i+1) begin + rg_pmpaddr[i] <- mkReg(0); + csr_pmpaddr[i] = concatReg2(readOnlyReg(0), rg_pmpaddr[i]); + end + ////////////////////////////////////////////////////////////////////////////////////////// + // Counter enables + Reg#(Bit#(1)) rg_u_ir <- mkReg(0); + Reg#(Bit#(1)) rg_u_tm <- mkReg(0); + Reg#(Bit#(1)) rg_u_cy <- mkReg(0); + // Machine Counter Setup + Reg#(Bit#(32)) reg_mcounteren<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_mcounteren=concatReg2(readOnlyReg(32'd0),reg_mcounteren); + Reg#(Bit#(1)) rg_boot_seq<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_boot_seq =concatReg2(readOnlyReg(0),readOnlyReg(rg_boot_seq)); + Reg#(Bit#(2)) power_control_out <-mkReg(0); + Reg#(Bit#(2)) power_control_in <-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_power_control = concatReg3(readOnlyReg(0),readOnlyReg(power_control_in),power_control_out); + + ////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////SUPERVISOR LEVEL REGISTERS////////////////////////////////// +`ifdef MMU + Reg#(Bit#(`Reg_width)) csr_sstatus = concatReg20( + rg_sd, + readOnlyReg(0), readOnlyReg(rg_mxl), readOnlyReg(12'b0), //uxl field + rg_mxr, rg_sum, readOnlyReg(1'b0), // memory privilege // + rg_xs, rg_fs, // coprocessor states + readOnlyReg(2'b0), readOnlyReg(2'b0), rg_spp, // previous privileges + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_spie, rg_upie, // previous interrupt enables + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_sie, rg_uie); // interrupt enables + + Reg#(Bit#(12)) rg_sedeleg<-mkReg(0); + Reg#(Bit#(15)) rg_sideleg<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_sedeleg = concatReg2(readOnlyReg(0),rg_sedeleg); + Reg#(Bit#(`Reg_width)) csr_sideleg = concatReg2(readOnlyReg(0),rg_sideleg); + + Reg#(Bit#(`Reg_width)) csr_sie = concatReg13( + readOnlyReg(0), + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_seie, readOnlyReg(rg_ueie), + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_stie, readOnlyReg(rg_utie), + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_ssie, readOnlyReg(rg_usie)); + + Reg#(Bit#(2)) rg_mode_s <- mkReg(0); //default value 0 if pc to base or 1 if pc to base + 4xcause + Reg#(Bit#(TSub#(`Reg_width,2))) rg_stvec <- mkReg(`STVEC_DEFAULT); + Reg#(Bit#(`Reg_width)) csr_stvec=concatReg2(rg_stvec,rg_mode_s); + Reg#(Bit#(32)) rg_scounteren <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_scounteren = concatReg2(readOnlyReg(0),rg_scounteren); + + //Supervisor Trap Handling Register + Reg#(Bit#(`VADDR)) rg_stval <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_sscratch <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_sepc <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_scause <- mkReg(0); + Reg#(Bit#(`Reg_width)) csr_stval = concatReg2(readOnlyReg(0), rg_stval); + Reg#(Bit#(`Reg_width)) csr_sip = concatReg13( + readOnlyReg(0), + readOnlyReg(1'b0), readOnlyReg(1'b0), readOnlyReg(rg_seip), rg_ueip, + readOnlyReg(1'b0), readOnlyReg(1'b0), readOnlyReg(rg_stip), readOnlyReg(rg_utip), + readOnlyReg(1'b0), readOnlyReg(1'b0), rg_ssip, readOnlyReg(rg_usip)); + + //Supervisor Protection and Translation + Reg#(Bit#(`Reg_width)) csr_satp <- mkReg(0); +`endif + ////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////// User level registers /////////////////////////////////// + `ifdef RV64 + Reg#(Bit#(`Reg_width)) csr_uinstret=readOnlyReg(csr_minstret[1]); + Reg#(Bit#(`Reg_width)) csr_ucycle=readOnlyReg(csr_mcycle[1]); + `else + Reg#(Bit#(`Reg_width)) csr_uinstret=readOnlyReg(csr_minstret[1]); + Reg#(Bit#(`Reg_width)) csr_ucycle=readOnlyReg(csr_mcycle[1]); + Reg#(Bit#(`Reg_width)) csr_uinstreth=readOnlyReg(csr_minstreth[1]); + Reg#(Bit#(`Reg_width)) csr_ucycleh=readOnlyReg(csr_mcycleh[1]); + `endif + + Reg#(Bit#(`Reg_width)) rg_clint_mtime <-mkReg(0); + Reg#(Bit#(5)) rg_fflags<-mkReg(0); + Reg#(Bit#(3)) rg_frm<-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_fcsr = writeSideEffect(concatReg3(readOnlyReg(0),rg_frm,rg_fflags),rg_fs._write(2'b11)); + Reg#(Bit#(`Reg_width)) csr_fflags=writeSideEffect(concatReg2(readOnlyReg(0),rg_fflags),rg_fs._write(2'b11)); + Reg#(Bit#(`Reg_width)) csr_frm = writeSideEffect(concatReg2(readOnlyReg(0),rg_frm),rg_fs._write(2'b11)); + Reg#(Bit#(4)) rg_memse <- mkReg(0); //0th-bit set -> immu disable,1th-bit set -> icache,2nd-bit -> dmmu, 3rd bit -> dcache + Reg#(Bit#(`Reg_width)) csr_memse = concatReg2(readOnlyReg(0),rg_memse); + + ////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////// PErformance Counters ////////////////////////// + `ifdef perf + Array#(Reg#(Bit#(64))) csr_mhpmcounter[`MHPMCOUNTEND-`MHPMCOUNTSTART+1]; + Reg#(Bit#(`Reg_width)) csr_mhpmevent[`MHPMCOUNTEND-`MHPMCOUNTSTART+1]; + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1)begin + csr_mhpmcounter[i]<-mkCReg(2,0); + end + csr_mhpmevent[0]<-mkReg('h20000000); + csr_mhpmevent[1]<-mkReg('h20000); + csr_mhpmevent[2]<-mkReg('h4000); + `endif + + ////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////// Debug Registers ///////////////////////////////// + `ifdef Debug + `ifdef Openocd + Reg#(Bool) resetmode[2]<-mkCReg(2,False); // TODO + Reg#(Bit#(1)) ebreakm<-mkReg(1); + Reg#(Bit#(1)) ebreaks<-mkReg(1); + Reg#(Bit#(1)) ebreaku<-mkReg(1); + `else + Reg#(Bool) resetmode[2]<-mkCReg(2,False); // TODO + Reg#(Bit#(1)) ebreakm<-mkReg(0); + Reg#(Bit#(1)) ebreaks<-mkReg(0); + Reg#(Bit#(1)) ebreaku<-mkReg(0); + `endif + Reg#(Bit#(1)) stopcount<-mkReg(0); + Reg#(Bit#(1)) stoptime <-mkReg(0); + Reg#(Bit#(3)) debugcause<-mkReg(0); + Reg#(Bit#(1)) step<-mkReg(0); + Reg#(Bit#(2)) debugprv<-mkReg(0); + Reg#(Bit#(32)) dcsr=concatReg13(readOnlyReg(4'd4), readOnlyReg(12'd0), + ebreakm,readOnlyReg(1'b0),ebreaks,ebreaku, + readOnlyReg(1'b0), + stopcount,stoptime, + readOnlyReg(debugcause),readOnlyReg(3'd0), + step,debugprv); + Reg#(Bit#(`VADDR)) dpc <-mkReg(0); + Reg#(Bit#(`Reg_width)) csr_dpc=concatReg2(readOnlyReg('d0),dpc); + Reg#(Bit#(`Reg_width)) dscratch0<-mkReg(0); + Reg#(Bool) debugmode_active[2]<-mkCReg(2,False); + Reg#(Bit#(`Reg_width)) debugentry<-mkReg(`DebugBase); + ////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////// Trigger Registers //////////////////////////////////////// + Reg#(Bit#(`Reg_width)) tselect=readOnlyReg('d0); + Reg#(Bit#(4)) trigger_type=readOnlyReg('d2); + Reg#(Bit#(1)) dmode=readOnlyReg(1'd0); + Reg#(Bit#(6)) maskmax=readOnlyReg(6'd0); + Reg#(Bit#(1)) select<-mkReg(0); + Reg#(Bit#(1)) timing=readOnlyReg(1'b0); + Reg#(Bit#(6)) triggeraction=readOnlyReg(6'd0); // always enter debug mode. + Reg#(Bit#(1)) chain<-mkReg(0); + Reg#(Bit#(4)) triggermatch<-mkReg(0); + Reg#(Bit#(1)) triggerm<-mkReg(0); + Reg#(Bit#(1)) triggers<-mkReg(0); + Reg#(Bit#(1)) triggeru<-mkReg(0); + Reg#(Bit#(1)) execute<-mkReg(0); + Reg#(Bit#(1)) store<-mkReg(0); + Reg#(Bit#(1)) load<-mkReg(0); + Reg#(Bit#(`Reg_width)) tdata1=concatReg16(trigger_type,dmode,maskmax, + readOnlyReg(33'd0),select,timing, + triggeraction,chain,triggermatch, + triggerm,readOnlyReg(1'b0),triggers,triggeru, + execute,store,load); + Reg#(Bit#(`Reg_width)) tdata2<-mkReg(0); + Wire#(TriggerData) executetrigger<-mkDWire(TriggerData{ttype:tagged None,matchscheme:0}); + Wire#(TriggerData) loadtrigger<-mkDWire(TriggerData{ttype:tagged None,matchscheme:0}); + Wire#(TriggerData) storetrigger<-mkDWire(TriggerData{ttype:tagged None,matchscheme:0}); + Reg#(Bool) rg_step_now<-mkReg(False); + rule generate_trigger_info_decode; + if(execute==1 && ((rg_prv==User &&triggeru==1)||(rg_prv==Supervisor &&triggers==1)||(rg_prv==Machine &&triggerm==1)) ) + executetrigger<=TriggerData{ttype:select==0?tagged Address tdata2:tagged Data tdata2,matchscheme:triggermatch}; + if(load==1&& ((rg_prv==User &&triggeru==1)||(rg_prv==Supervisor &&triggers==1)||(rg_prv==Machine &&triggerm==1)) ) + loadtrigger<=TriggerData{ttype:select==0?tagged Address tdata2:tagged Data tdata2,matchscheme:triggermatch}; + if(store==1&& ((rg_prv==User &&triggeru==1)||(rg_prv==Supervisor &&triggers==1)||(rg_prv==Machine &&triggerm==1)) ) + storetrigger<=TriggerData{ttype:select==0?tagged Address tdata2:tagged Data tdata2,matchscheme:triggermatch}; + endrule + `endif +`ifdef Debug + function Bool checktrigger(TriggerData tdata, Bit#(`VADDR) pc1, Bit#(32) instruction); + Bit#(`Reg_width) pc=zeroExtend(pc1); + if(tdata.ttype matches tagged Address .addr) + if(tdata.matchscheme==0 && addr==pc) + return True; + else if(tdata.matchscheme==2 && addr>=pc) + return True; + else if(tdata.matchscheme==3 && addr<=pc) + return True; + else if(tdata.matchscheme==4 && addr[31:0]==(addr[63:32]&pc[31:0])) + return True; + else if(tdata.matchscheme==5 && addr[31:0]==(addr[`Reg_width-1:32]&pc[`Reg_width-1:32])) + return True; + else + return False; + else if(tdata.ttype matches tagged Data .data) + if(data[31:0]==instruction) + return True; + else + return False; + else + return False; + endfunction +`endif +/////////// Functions to access CSRs ///////////////////////////////////////////////////////////// + function Reg#(Bit#(`Reg_width)) read_user_sro_registers(Bit#(8) addr); + Reg#(Bit#(`Reg_width)) csr=(case(addr) + `UCYCLE :csr_ucycle; + `UTIME :readOnlyReg(rg_clint_mtime); + `UINSTRET :csr_uinstret; + `ifndef RV64 + `UCYCLEH :csr_ucycleh; + `UINSTRETH :csr_uinstreth; + `endif + default:readOnlyReg(0); + endcase); + return csr; + endfunction + function Reg#(Bit#(`Reg_width)) read_user_srw_registers(Bit#(8) addr); + Reg#(Bit#(`Reg_width)) csr=(case(addr) + `FFLAGS :csr_fflags ; + `FRM :csr_frm ; + `FCSR :csr_fcsr ; + `UMEMSE :csr_memse; + default: readOnlyReg(0); + endcase); + return csr; + endfunction + `ifdef MMU + function Reg#(Bit#(`Reg_width)) read_supervisor_srw_registers(Bit#(8) addr); + Reg#(Bit#(`Reg_width)) csr=(case(addr) + `SSTATUS :csr_sstatus; + `SEDELEG :csr_sedeleg; + `SIDELEG :csr_sideleg; + `SIE :csr_sie; + `STVEC :csr_stvec; + `SCOUNTEREN :csr_scounteren; + `SSCRATCH :csr_sscratch; + `SEPC :csr_sepc; + `SCAUSE :csr_scause; + `STVAL :csr_stval; + `SIP :csr_sip; + `SATP :csr_satp; + default:readOnlyReg(0); + endcase); + return csr; + endfunction + `endif + function Reg#(Bit#(`Reg_width)) read_machine_srw_registers(Bit#(8) address); + Reg#(Bit#(`Reg_width)) csr=(case(address) + `MSTATUS :csr_mstatus; + `MISA :csr_misa; + `MEDELEG :csr_medeleg; + `MIDELEG :csr_mideleg; + `MIE :csr_mie; + `MTVEC :csr_mtvec; + `MCOUNTEREN :csr_mcounteren; + `MSCRATCH :csr_mscratch; + `MEPC :csr_mepc; + `MCAUSE :((rg_mxl==1)?concatReg3(readOnlyReg(32'd0), rg_interrupt, rg_lower_cause):csr_mcause); + `MTVAL :csr_mtval; + `MIP :csr_mip; + `MPOWERCONTROL :csr_power_control; + `PMPCFG0 :csr_pmpcfg0; + `ifndef RV64 + `PMPCFG1 :csr_pmpcfg1; + `endif + `PMPCFG2 :csr_pmpcfg2; + `ifndef RV64 + `PMPCFG3 :csr_pmpcfg3; + `endif + default: begin + `ifdef perf + if(address>=`PMPADDRSTART && address<=`PMPADDREND) // lower 4 bits of the counter + csr_pmpaddr[address[3:0]]; + else if(address>=`MHPMEVENTSTART && address<=`MHPMEVENTEND) // lower 32 bits of the counter + csr_mhpmcounter[(address-3)[1:0]][1]; + else + `endif + readOnlyReg(0); + end + endcase); + return csr; + endfunction + function Reg#(Bit#(`Reg_width))read_machine_sro_registers(Bit#(8) address); + Reg#(Bit#(`Reg_width)) csr=(case(address) + `MVENDORID:csr_mvendorid; + `MARCHID :csr_marchid; + `MIMPID :csr_mimpid; + `MHARTID :csr_mhartid; + `MBOOTSEQ : csr_boot_seq; + default:readOnlyReg(0); + endcase); + return csr; + endfunction + + function Reg#(Bit#(`Reg_width)) read_machine_counters(Bit#(8) address); + Reg#(Bit#(`Reg_width)) csr=(case(address) + `MCYCLE :csr_mcycle[1]; + `MINSTRET :csr_minstret[1]; + `ifndef RV64 + `MCYCLEH :csr_mcycleh[1]; + `MINSTRETH :csr_minstreth[1]; + `endif + default: begin + `ifdef perf + if(address>=`MHPMCOUNTSTART && address<=`MHPMCOUNTEND) // lower 32 bits of the counter + csr_mhpmcounter[(address-3)[1:0]][1]; + else + `endif + readOnlyReg(0); + end + endcase); + return csr; + endfunction +`ifdef Debug + function Reg#(Bit#(`Reg_width)) read_debug_registers(Bit#(8) address); + Reg#(Bit#(`Reg_width)) csr=(case(address) + `DCSR: concatReg2(readOnlyReg(32'd0),dcsr); + `DPC: csr_dpc; + `DSCRATCH0: dscratch0; + `DENTRY : debugentry; + `TSELECT : tselect; + `TDATA1 : tdata1; + `TDATA2 : tdata2; + default:readOnlyReg(0); + endcase); + return csr; + endfunction +`endif + function Reg#(Bit#(`Reg_width)) read_csr(Bit#(12) addr); + Reg#(Bit#(`Reg_width)) csr=( + case(addr[11:8]) + 'h0: read_user_srw_registers(truncate(addr)); // user standard read-write + 'hC: read_user_sro_registers(truncate(addr)); // user standard read-only + `ifdef MMU 'h1: read_supervisor_srw_registers(truncate(addr)); `endif // supervisor read-write + 'h3: read_machine_srw_registers(truncate(addr)); // machine standard read-write + 'hF: read_machine_sro_registers(truncate(addr)); // machine standard read-only + 'hB: read_machine_counters(truncate(addr)); // machine standard counters + `ifdef Debug 'h7: read_debug_registers(truncate(addr)); `endif + default: readOnlyReg(0);//read_perfcounter(address); + endcase + ); + return csr; + endfunction + + function Bool hasCSRPermission(Bit#(12) address, Bool write); + Bit#(12) csr_index = pack(address); + Bool check_counter_permission = True; + if(address >= 12'hB00 && address <= 12'hB1F) begin + check_counter_permission = False; + if(pack(rg_prv) == 3) + check_counter_permission = True; + else if(pack(rg_prv) == 1 && csr_mcounteren[address[4:0]]==1) + check_counter_permission = True; + `ifdef MMU + else if(pack(rg_prv) == 0 && csr_scounteren[address[4:0]]==1) + check_counter_permission = True; + `endif + end + return ((pack(rg_prv) >= csr_index[9:8]) && check_counter_permission && !(write && csr_index[11:10]==2'b11) );//(!write || (csr_index[11:10] != 2'b11))) || check_counter_permission); + endfunction + + // if the operand is not 0 then the instruction will perform a write on the CSR. + function Bool valid_csr_access(Bit#(12) csr_addr, Bit#(5) operand, Bit#(2) operation); + Bool ret = hasCSRPermission(unpack(csr_addr), (operand != 0 || operation=='b01) ? True:False); + return ret; + endfunction + + function Bool address_valid(Bit#(12) csr_address); + case(csr_address[11:8]) + 'h0: begin + if((csr_address[7:0]>'h5 && csr_address[7:0]<'h40) || + (csr_address[7:0]>'h44)) + return False; + else + return True; + end + 'h1:begin + if((csr_address[7:0]==1)|| + (csr_address[7:0]>'h6 && csr_address[7:0]<'h40) || + (csr_address[7:0]>'h44 && csr_address[7:0]!='h80)) + return False; + else + return True; + end + 'h3: begin // machine read-write registers + if((csr_address[7:0]>'h6 && csr_address[7:0]<'h23) || + (csr_address[7:0]>'h26 && csr_address[7:0]<'h40) || + (csr_address[7:0]>'h44 && csr_address[7:0]<='hA0) || + (csr_address[7:0]>'hA3 && csr_address[7:0]<'hB8) || + (csr_address[7:0]>'hbf)) + return False; + else + return True; + end + 'h7: begin + if((csr_address[7:0]<'hA0)|| + (csr_address[7:0]>'hA3 && csr_address[7:0]<'hB0)|| + (csr_address[7:0]>'hB2)) + return False; + else + return True; + end + 'hB:begin + if((csr_address[7:0]>'h6 && csr_address[7:0]<'h80 && csr_address[7:0]!='h20)|| + (csr_address[7:0]>'h86 && csr_address[7:0]<'hA0)|| + (csr_address[7:0]>'hA6)) + return False; + else + return True; + end + 'hC:begin + if((csr_address[7:0]>'h6 && csr_address[7:0]<'h83)|| + (csr_address[7:0]>'h86)) + return False; + else + return True; + end + 'hF:begin + if(csr_address[7:0]<'h11 || csr_address[7:0]>'h15) + return False; + else + return True; + end + default:return False; + endcase + endfunction + + rule increment_cycle_counter `ifdef Debug (stopcount==0) `endif ; + `ifdef RV64 + csr_mcycle[0]<=csr_mcycle[0]+1; + `else + Bit#(64) new_cycle={csr_mcycleh[0],csr_mcycle[0]); + new_cycle=new_cycle+1; + csr_mcycle[0]<=new_cycle[31:0]; + csr_mcycleh[0]<=new_cycle[63:32]; + `endif + endrule + + + // Check pending interrupts + function ActionValue#(Trap_type) fn_chk_pending_interrupt(`ifdef Debug Bool haltreq, Bool resumereq , Bool resetreq`endif )= + actionvalue + `ifdef Debug + Bit#(15) pending_debug_interrupt=0; + if(haltreq && !debugmode_active[0] && !resetmode[0]) + pending_debug_interrupt[12]=1; + if(resumereq && debugmode_active[0] && !resetmode[0]) + pending_debug_interrupt[13]=1; + if(resetreq && !resetmode[0] && debugmode_active[0]) + pending_debug_interrupt[14]=1; + `endif + Bit#(`Reg_width) lv_csr_mip = csr_mip; + lv_csr_mip[11]=lv_csr_mip[11]|pack(rg_nmi); + Bit#(15) pending_interrupts = (truncate(csr_mip)`ifdef Debug |pending_debug_interrupt `endif ) & truncate(csr_mie) ; + `ifdef verbose $display("Pending_interrupts in the beginning csr_mip : %b pending_interrupt: %b", csr_mip, pending_interrupts); `endif + // machine mode + let pending_machine_interrupts = pending_interrupts & ~truncate(csr_mideleg); + let machine_interrupts_enabled = (rg_mie == 1) || (pack(rg_prv) < pack(Machine)); + //supervisor mode + `ifdef MMU + let pending_supervisor_interrupts = pending_interrupts & truncate(csr_mideleg) & ~truncate(csr_sideleg); + let supervisor_interrupts_enabled = (rg_sie == 1) && (pack(rg_prv) <= pack(Supervisor)); + `endif + // user mode + // combined + pending_interrupts = (machine_interrupts_enabled ? pending_machine_interrupts : 0) + `ifdef MMU |(supervisor_interrupts_enabled ? pending_supervisor_interrupts : 0) `endif ; + + // format pendingInterrupt value to return + Trap_type ret = tagged None; + if (pending_interrupts != 0) begin + ret = tagged Interrupt unpack(zeroExtend(pack(countZerosLSB(pending_interrupts)))); + end + `ifdef verbose $display("Debug interrupts: %h pending_interrupt: %h csr_mie: %h rg_mie: %b ret: ",`ifdef Debug pending_debug_interrupt `else 0 `endif ,pending_interrupts,csr_mie,rg_mie,fshow(ret)); `endif + return ret; + endactionvalue; + + + method ActionValue#(Tuple2#(Bit#(3),Trap_type)) check_for_trap(`ifdef Debug Bool haltreq, Bool resumereq, Bool resetreq, `endif Bit#(`VADDR) pc, Bit#(32) instruction)if(!rg_initialize[1] `ifdef simulate && !wr_endsimulation `endif ); + Trap_type trap_type=tagged None; + Bit#(3) lv_debugcause=0; + let opcode=instruction[6:2]; + if(opcode==`CSR_op)begin + case(instruction[14:12]) + 'd0:case (instruction[31:20]) + 'h000: // ECALL + trap_type=tagged Exception(case(rg_prv) User: Ecall_from_user; Supervisor:Ecall_from_supervisor;Machine:Ecall_from_machine;endcase); + 'h001:begin // EBREAK + `ifdef Debug + Bit#(4) ebreak={ebreakm,0,ebreaks,ebreaku}; + if((ebreak)[pack(rg_prv)]==1) + trap_type=tagged Interrupt DebugInterrupt; + else + `endif + trap_type=tagged Exception Breakpoint; + lv_debugcause=1; + end + 'h102:begin // SRET + if(pack(rg_prv)4) + trap_type=tagged Exception Illegal_inst; + end + if((opcode[4:3]=='b10 || opcode==`FSTORE_op || opcode==`FLOAD_op) && rg_fs==0) + trap_type=tagged Exception Illegal_inst; + `ifdef Debug + if(checktrigger(executetrigger,pc,instruction))begin + `ifdef verbose $display("TRAP: Trigger Fired Debug Interupt"); `endif + trap_type=tagged Exception Breakpoint; + lv_debugcause=2; + end + `endif + let pending_interrupt <- fn_chk_pending_interrupt(`ifdef Debug haltreq,resumereq, resetreq `endif );// TODO but resume request here + if(pending_interrupt matches tagged Interrupt .interrupt) begin + `ifdef verbose $display($time,"\tinterrupt injected in to pipeline"); `endif + trap_type=tagged Interrupt interrupt; + `ifdef Debug + if(interrupt ==DebugInterrupt) + lv_debugcause=(step==1)?4:3; + `endif + end + return tuple2(lv_debugcause,trap_type); + endmethod + + method ActionValue#(Tuple4#(Bool,Bit#(`VADDR),Bit#(`Reg_width),Bool)) system_instruction(WriteBackType wbdata ,Bit#(`VADDR) pc, Bit#(`PERFMONITORS) perfmonitor_incr `ifdef simulate , Bit#(32) instruction, Operand_type rd_type, Bit#(5) destination `endif )if(!rg_initialize[1] `ifdef simulate && !wr_endsimulation `endif ); + Bool flush=True; // TODO flush for only writting on certain csr registers + Bool commit=False; + Bit#(`VADDR) jump_address=pc+4; + Bit#(`Reg_width) destination_value=0; + /*====== Execute the current instruction and generate a halt interrupt on the next ===== */ + `ifdef Debug + `ifdef verbose $display($time,"CSR: STEP_NOW : %b",rg_step_now); `endif + if(step==1 && !debugmode_active[0] && !rg_step_now) + rg_step_now<=True; + else + rg_step_now<=False; + `endif + /*======================================================================================= */ + if(wbdata matches tagged SYSTEM .csr)begin + let csr_reg=read_csr(csr.csr_address); + if(csr.funct3==0)begin + case (csr.csr_address[11:8]) matches + 'h3:begin // MRET + Privilege_mode next_prv =unpack(rg_mpp); + rg_mpie <= 1; + rg_mpp <= pack(User); + rg_prv <= next_prv; + jump_address=truncate(csr_mepc); + rg_mie<=rg_mpie; + end + `ifdef MMU + 'h1:begin // SRET + if(csr.csr_address[5]==0)begin + Privilege_mode next_prv =unpack({1'b0,rg_spp}); + rg_spie <= 1; + rg_spp <= pack(User)[0]; + rg_prv <= next_prv; + jump_address=truncate(csr_sepc); + rg_sie<=rg_spie; + end + else begin // SFENCE + jump_address=pc+4; + `ifdef simulate + Bit#(64) pc1=signExtend(pc[38:0]); + $fwrite(dump, rg_prv," 0x%16h",pc1, " (0x%8h", instruction,")" ); + $fwrite(dump," x%d",destination," 0x%16h",destination_value,"\n"); + `endif + `ifdef Debug + if(resetmode[0]) + resetmode[0]<=False; + `endif + end + end + `endif + `ifdef Debug + 'h7:begin // DRET + jump_address=dpc; + rg_prv<=unpack(debugprv); + debugmode_active[0]<=False; + end + `endif + endcase + `ifdef Debug + if(stopcount==0 && !debugmode_active[0]) begin + csr_minstret[0]<=csr_minstret[0]+1; + end + `endif + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1) + if((csr_mhpmevent[i]&perfmonitor_incr)!=0 `ifdef Debug && stopcount==0 `endif ) + csr_mhpmcounter[i][1]<=csr_mhpmcounter[i][1]+1; + `ifdef verbose + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1)begin + $display($time,"\tEVENT: :%h %s",csr_mhpmevent[i],event_name(csr_mhpmevent[i])," : %d",csr_mhpmcounter[i][1]); + end + `endif + end + else begin + destination_value=csr_reg; + `ifdef verbose $display($time,"\tCSR: Dest: %h Value: %h rs1: %h funct3: %d rs1_addr: %d",csr.csr_address,csr_reg,csr.rs1,csr.funct3,csr.rs1_addr); `endif + commit=True; + case(csr.funct3) + 'd1: csr_reg <= csr.rs1; // CSRRW + 'd2:if(csr.rs1_addr!=0) csr_reg <= csr.rs1 | csr_reg; // CSRRS + 'd3:if(csr.rs1_addr!=0) csr_reg <= ~(csr.rs1) & csr_reg; // CSRRC + 'd5: csr_reg <= zeroExtend(csr.rs1_addr); // CSRRWI + 'd6:if(csr.rs1_addr!=0) csr_reg <= zeroExtend(csr.rs1_addr) | csr_reg; // CSRRSI + 'd7:if(csr.rs1_addr!=0) csr_reg <= ~(zeroExtend(csr.rs1_addr)) & csr_reg; //CSRRCI + endcase + `ifdef simulate + Bit#(64) pc1=signExtend(pc[38:0]); + $fwrite(dump, rg_prv," 0x%16h",pc1, " (0x%8h", instruction,")" ); + $fwrite(dump," x%d",destination," 0x%16h",destination_value); + $fwrite(dump,"\n"); + `endif + end + end + else if(wbdata matches tagged RESULT .res)begin + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1) + if((csr_mhpmevent[i]&perfmonitor_incr)!=0 `ifdef Debug && stopcount==0 `endif ) + csr_mhpmcounter[i][1]<=csr_mhpmcounter[i][1]+1; + `ifdef verbose + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1)begin + $display($time,"\tEVENT: :%h %s",csr_mhpmevent[i],event_name(csr_mhpmevent[i])," : %d",csr_mhpmcounter[i][1]); + end + `endif + `ifdef simulate + Bit#(64) pc1=signExtend(pc[38:0]); + $fwrite(dump, rg_prv," 0x%16h",pc1, " (0x%8h", instruction,")" ); + `endif + commit=True; + `ifdef Debug + if(stopcount==0 && !debugmode_active[0])begin + csr_minstret[0]<=csr_minstret[0]+1; + end + `endif + flush=False; + destination_value=res.aluresult; + let newfflags=res.fflags; + let fpudirty=False; + if((newfflags|rg_fflags)!=rg_fflags)begin + rg_fflags<=newfflags|rg_fflags; + fpudirty=True; + end + if(fpudirty) + if(rg_fs==2'b0)begin + `ifdef verbose $display("Error: FPU id Dirty and FX field is 0"); `endif + end + `ifdef simulate + Bit#(64) dat=signExtend(res.aluresult); + `ifdef spfpu + if(rd_type==FloatingRF) + `ifdef dpfpu + $fwrite(dump," f%d",destination," 0x%16h",dat); + `else + $fwrite(dump," f%d",destination," 0x%16h",{32'hffffffff,dat[31:0]}); + `endif + else + `endif + $fwrite(dump," x%d",destination," 0x%16h",dat); + $fwrite(dump,"\n"); + `endif + end + return tuple4(flush,jump_address,destination_value, commit); + endmethod + + method ActionValue#(Tuple2#(Bit#(`VADDR), Bool)) take_trap(Trap_type exception, Bit#(3) lv_debugcause, Bit#(`VADDR) pc, Bit#(`VADDR) badaddr)if(!rg_initialize[1]); + Bit#(`VADDR) jump_address=0; + Bool flush=True; + if(exception matches tagged Exception .ex)begin + if(ex==Inst_addr_misaligned || ex==Inst_access_fault || ex==Inst_pagefault) + badaddr=pc; + else if(ex==Illegal_inst) + badaddr=0; + else if(ex!=Load_pagefault && ex!=Load_access_fault && ex!=Load_addr_misaligned && ex!=Store_addr_misaligned && ex!=Store_pagefault && ex!=Store_access_fault) + badaddr=0; + end + else + badaddr=0; + `ifdef verbose $display($time,"\tTrap Type: ",fshow(exception)," debugcause: %d",lv_debugcause," BaddAddr: %h",badaddr); `endif + `ifdef Debug + if(exception matches tagged Interrupt .in &&& in==DebugResume)begin + if(debugmode_active[0])begin + rg_prv<=unpack(debugprv); + debugmode_active[0]<=False; + jump_address=truncate(dpc); + end + end + else if(exception matches tagged Interrupt .in &&& in==DebugInterrupt)begin + debugmode_active[0]<=True; + if(!debugmode_active[0])begin + dpc<=pc; + debugcause<=lv_debugcause; + debugprv<=pack(rg_prv); + rg_prv<=Machine; + if(lv_debugcause==4) + rg_step_now<=False; + end + jump_address=truncate(debugentry); + end + else if(exception matches tagged Interrupt .in &&& in==DebugReset)begin + resetmode[0]<=True; + jump_address='h1000; + rg_prv<=Machine; + end + else if(!debugmode_active[0] && !resetmode[0])begin + `endif + Bit#(`Reg_width) cause = 0; + Bit #(TSub #(`Reg_width, 1)) cause_code = 0; + Bit #(1) cause_type = 0; + case (exception) matches + tagged Interrupt .i: begin cause_type = 1; cause_code = zeroExtend(pack(i)); end + tagged Exception .e: begin cause_type = 0; cause_code = zeroExtend(pack(e)); + `ifdef simulate if(e==Endsimulation) begin + for(Integer i=0;i<=(`MHPMCOUNTEND-`MHPMCOUNTSTART);i=i+1)begin + $display($time,"\tEVENT: %s",event_name(csr_mhpmevent[i])," : %d",csr_mhpmcounter[i][1]); + end + $finish(0) /*wr_endsimulation <=True*/ ; + end + `endif + end + endcase + cause = {cause_type, cause_code}; + `ifdef MMU + Bool delegToS = (pack(rg_prv) <= pack(Supervisor)) && (case (exception) matches + tagged Exception .exceptionCause:begin (((csr_medeleg >> pack(exceptionCause)) & 1) != 0);end + tagged Interrupt .interruptCause: (((csr_mideleg >> pack(interruptCause)) & 1) != 0); + endcase); + if(delegToS)begin + //if(exception matches tagged Exception .ex) + // if(ex==Inst_addr_misaligned || ex==Inst_access_fault || ex==Inst_pagefault || ex==Illegal_inst + // || ex==Load_access_fault || ex==Load_addr_misaligned || ex==Load_pagefault + // || ex==Store_addr_misaligned || ex==Store_access_fault || ex==Store_pagefault) + csr_stval<=zeroExtend(badaddr); + csr_sepc<=signExtend(pc); + csr_scause<=cause; + rg_spp <= pack(rg_prv)[0]; + rg_sie <=0; + rg_spie <= rg_sie;//(case (rg_prv) User: rg_uie; Supervisor : rg_sie; endcase); + jump_address=truncate(csr_stvec); + rg_prv <= Supervisor; + end + else begin + `endif + rg_prv <= Machine; + if(exception matches tagged Exception .ex) + if(ex==Inst_addr_misaligned || ex==Inst_access_fault || ex==Inst_pagefault || ex==Illegal_inst + || ex==Load_access_fault || ex==Load_addr_misaligned || ex==Load_pagefault + || ex==Store_addr_misaligned || ex==Store_access_fault || ex==Store_pagefault) + csr_mtval<=zeroExtend(badaddr); + csr_mepc<=signExtend(pc); + if(rg_mxl==1) + csr_mcause<= zeroExtend({cause[63], cause[30:1]}); + else + csr_mcause<=cause; + rg_mie <= 0; + rg_mpp <= pack(rg_prv); + jump_address=truncate(csr_mtvec); + rg_mpie <= rg_mie;//(case (rg_prv) User: rg_uie; `ifdef MMU Supervisor : rg_sie; `endif Machine: rg_mie; endcase); + `ifdef MMU + end + `endif + + `ifdef Debug + end + else begin + flush=False; + end + `endif + return tuple2(jump_address,flush); + endmethod + method Bit#(3) roundingmode if(!rg_initialize[1] `ifdef simulate && !wr_endsimulation `endif ); + return rg_frm; + endmethod + method Action set_external_interrupt(Tuple2#(Bool,Bool) ex_i) if(!rg_initialize[1] `ifdef simulate && !wr_endsimulation `endif ); + let {i,j} = ex_i; + rg_nmi <= j; + if(rg_prv == Machine) begin + `ifdef verbose $display("CSR : Machine external interrupt pending"); `endif + rg_meip <= pack(i); + end + else if(rg_prv == Supervisor) begin + rg_seipe <= pack(i); + end + else if(rg_prv == User) begin + rg_ueipe <= pack(i); + end + endmethod + method Action flush; + rg_initialize[0]<=True; + endmethod + `ifdef MMU + method Bit#(`Reg_width) send_satp; + return csr_satp; + endmethod + method Chmod perm_to_TLB; + return Chmod {mprv : rg_mprv, sum : rg_sum, mxr : rg_mxr, mpp : unpack(rg_mpp), prv : rg_prv}; + endmethod + `endif + method Bit#(`Reg_width) mmu_cache_disable; + return csr_memse; + endmethod + `ifdef Debug + method Bool halted; + return debugmode_active[1]; + endmethod + method load_triggerdata=loadtrigger; + method store_triggerdata=storetrigger; + method ActionValue#(Bit#(`Reg_width)) rw_debug_csr(Bit#(12) r, Bool write, Bit#(`Reg_width) data) if(!rg_initialize[1]); + let y=read_csr(r); + if(write) + y<=data; + return y._read; + endmethod + method Bool step_now=rg_step_now; + method Bool reset_mode=resetmode[1]; + `endif + method Bit#(`Reg_width) misa=csr_misa._read; + method Action boot_sequence(Bit#(1) bootseq); + rg_boot_seq<=bootseq; + endmethod + method Bit#(2) powercontrol=power_control_out; + method Action poweracknowledge(Bit#(2) pa); + power_control_in<=pa; + endmethod + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt); + rg_msip<=intrpt; + endmethod + method Action clint_mtip(Bit#(1) intrpt); + rg_mtip<=intrpt; + endmethod + method Action clint_mtime(Bit#(`Reg_width) c_mtime); + rg_clint_mtime<=c_mtime; + endmethod + `endif + method inferred_xlen=rg_mxl; + + endmodule +endpackage + diff --git a/src/core/dTLB.bsv b/src/core/dTLB.bsv new file mode 100755 index 0000000..bdd56fa --- /dev/null +++ b/src/core/dTLB.bsv @@ -0,0 +1,393 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package dTLB; +import defined_types::*; +import FIFO::*; +import SpecialFIFOs::*; +import GetPut::*; +import ConfigReg::*; + +`include "defined_parameters.bsv" + +`define TLB_entries 16 + +interface Ifc_TLB#(numeric type data_width, numeric type vaddr, numeric type paddr, numeric type page_size, numeric type asid_width); + method Action get_vaddr(DTLB_access#(data_width) addr `ifdef atomic , Bit#(5) atomic `endif ); + method ActionValue#(From_TLB#(data_width)) send_ppn; + method ActionValue#(Bit#(vaddr)) send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,asid_width)) asid); //TODO parameterise this + interface Get#(Request_PPN_PTW#(vaddr,page_size)) to_PTW; + interface Put#(Tuple2#(Bool,To_TLB#(paddr,page_size,asid_width))) refill_TLB; + method Action flush(Bool _flush); + method Action fence_TLB(Fence_VMA_type#(vaddr) rsdata); + //method ActionValue#(Bool) page_fault; + //method Action page_fault_frm_PTW; +endinterface + +module mkTLB(Ifc_TLB#(data_width,vaddr,paddr,page_size,asid_width)) +provisos( Add#(vpn, page_size, vaddr), + Mul#(8, num_bytes, data_width), + Log#(num_bytes, byte_addressable_bits), + Add#(vpn_split,byte_addressable_bits, page_size), + Mul#(2,vpn_split,intermediate1), + Mul#(3,vpn_split,intermediate2), + Add#(a_, paddr, data_width), + Add#(b_, vaddr, data_width), + Add#(c_, vpn_split, vpn), + Add#(d_, intermediate1, vpn), + Add#(e_, intermediate2, vpn), + Add#(ppn, page_size, paddr)); + + let v_vaddr = valueOf(vaddr); + let v_vpn = valueOf(vpn); + let v_ppn = valueOf(ppn); + let v_page_offset = valueOf(page_size); + let v_asid_width = valueOf(asid_width); + let v_vpn_split = valueOf(vpn_split); + let v_intermediate1 = valueOf(intermediate1); + + Reg#(Bit#(vpn)) tlb_vpn[`TLB_entries]; + Reg#(Bit#(ppn)) tlb_ppn[`TLB_entries]; + Reg#(TLB_permissions) tlb_permissions[`TLB_entries]; + Reg#(Bit#(asid_width)) tlb_asid[`TLB_entries]; + Reg#(Bool) tlb_cacheable[`TLB_entries]; + Reg#(Bit#(2)) tlb_levels[`TLB_entries]; + for(Integer i = 0; i < `TLB_entries; i=i+1) begin + tlb_vpn[i] <- mkReg(0); + tlb_ppn[i] <- mkReg(0); + tlb_permissions[i] <- mkReg(TLB_permissions{v:0,r:0,w:0,x:0,u:0,g:0,a:0,d:0}); + tlb_levels[i] <- mkReg(0); + tlb_cacheable[i] <- mkReg(True); + end + FIFO#(Bit#(vpn)) ff_vpn <- mkBypassFIFO(); + FIFO#(Bit#(page_size)) ff_page_offset <- mkBypassFIFO(); + Reg#(Chmod) rg_chmod[2] <- mkCReg(2,Chmod { mprv : 0, mxr : 0, sum : 0, mpp : unpack(0), prv : unpack(0)}); + Reg#(Bool) rg_page_fault[2] <- mkCReg(2,False); + Reg#(Bool) rg_hit[2] <- mkCReg(2,False); + Reg#(Bit#(2)) rg_levels[2] <- mkCReg(2,0); + Reg#(Bool) rg_handling_PTW[2] <- mkCReg(2,False); + Reg#(Bool) rg_tlb_disable <- mkConfigReg(False); + Reg#(Bool) rg_frm_ptw[2] <- mkCReg(2,False); + Reg#(Bit#(ppn)) rg_ppn[2] <- mkCReg(2,0); + Reg#(Bool) rg_cacheable[2] <- mkCReg(2,True); + Reg#(Bit#(asid_width)) rg_asid[2] <- mkCReg(2,0); + Reg#(Bit#(4)) rg_translation_mode[2] <- mkCReg(2,0); + FIFO#(Tuple2#(Access_type,Bit#(5))) ff_access <- mkBypassFIFO(); + Reg#(Bit#(TLog#(`TLB_entries))) rg_slot_to_replace <- mkReg(0); + Wire#(Bool) wr_flush <- mkDWire(False); + + rule rl_translation(!rg_handling_PTW[0] && !rg_tlb_disable && (rg_chmod[1].prv!=Machine) && !rg_page_fault[0] + && (rg_translation_mode[1]!=0) && !wr_flush && tpl_1(ff_access.first())!=Fence); + Bit#(ppn) ppn = 0; + TLB_permissions perm_bits = TLB_permissions{v:0,r:0,w:0,x:0,u:0,g:0,a:0,d:0}; + Bool hit = False; + Bool page_fault = False; + Bool cacheable = False; + Bit#(vpn) vpn_bits = ff_vpn.first; + Bit#(vpn_split) lv_vpn_split= 0; + Bit#(intermediate1) lv_intermediate1 = 0; + Bit#(intermediate2) lv_intermediate2= 0; + Bit#(vpn) mask1 = {'1,lv_vpn_split}; + Bit#(vpn) vpnmask1 = vpn_bits & mask1; + Bit#(vpn) mask2 = {'1,lv_intermediate1}; + Bit#(vpn) vpnmask2 = vpn_bits & mask2; + Bit#(2) pg_levels = 0; + Integer slot = 0; + `ifdef verbose_torture $display($time, "\tThe acquired VPN in iTLB %h", ff_vpn.first); `endif + for(Integer i = 0; i < `TLB_entries; i = i + 1) begin + if((vpn_bits==tlb_vpn[i] && tlb_levels[i]==0 + || ((vpnmask1==(tlb_vpn[i] & mask1)) && tlb_levels[i]==1) + || ((vpnmask2==(tlb_vpn[i] & mask2)) && tlb_levels[i]==2)) + && (rg_asid[1]==tlb_asid[i] || tlb_permissions[i].g==1) && tlb_permissions[i].v==1) begin + + `ifdef verbose_torture $display($time, "\t tlb_permissions valid ", fshow(tlb_permissions[i])); `endif + ppn = tlb_ppn[i]; + perm_bits = tlb_permissions[i]; + pg_levels = tlb_levels[i]; + hit = True; + slot = i; + cacheable = tlb_cacheable[i]; + end + end + rg_levels[0] <= pg_levels; + if(hit) begin + if(rg_chmod[1].sum==0) begin + //if(rg_chmod[1].mprv==1) begin + // if(rg_chmod[1].mpp==unpack(1) && perm_bits.u==1) begin + // page_fault=True; + // $display($time, "\t dTLB: page fault 1"); + // end + //end + if(rg_chmod[1].prv==unpack(1) && perm_bits.u==1) begin + page_fault=True; + `ifdef verbose_torture $display($time, "\t dTLB: page fault 2"); `endif + end + end + if(tpl_1(ff_access.first())==Load `ifdef atomic || (tpl_1(ff_access.first)==Atomic && tpl_2(ff_access.first)[3:0]=='b0101) `endif ) begin + if(rg_chmod[1].mxr==1) begin + if(perm_bits.x==0 || perm_bits.r==0) begin + page_fault=True; + `ifdef verbose_torture $display($time, "\t dTLB: page fault 3"); `endif + end + end + else begin + if(perm_bits.r==0) begin + page_fault=True; + `ifdef verbose_torture $display($time, "\t dTLB: page fault 4"); `endif + end + end + end + else begin + if(perm_bits.w==0 || perm_bits.d==0) begin + page_fault=True; + `ifdef verbose_torture $display($time, "\t dTLB: page fault 5"); `endif + // if(perm_bits.d==0) + // perm_bits.v = 0; + // tlb_permissions[slot] <= perm_bits; + end + end + rg_ppn[0] <= ppn; + rg_cacheable[0] <= cacheable; + `ifdef verbose_torture $display($time, "\t dTLB: hit"); `endif + end + else begin + rg_handling_PTW[0] <= True; + `ifdef verbose_torture $display($time, "\t dTLB: miss"); `endif + end + rg_page_fault[0]<=page_fault; + `ifdef verbose_torture $display($time, "\t dTLB: The page fault is %b", page_fault); `endif + if(!page_fault) + rg_hit[0]<=hit; + else begin + perm_bits.v = 0; + tlb_permissions[slot] <= perm_bits; + end + endrule + + rule rl_flush_TLB(wr_flush); + ff_vpn.deq; + ff_page_offset.deq; + ff_access.deq; + rg_handling_PTW[1] <= False; + rg_hit[1] <= False; + rg_page_fault[1] <= False; + rg_frm_ptw[1] <= False; + endrule + + method Action get_vaddr(DTLB_access#(data_width) addr `ifdef atomic , Bit#(5) atomic `endif ); + `ifdef verbose_torture $display($time, "\t dTLB: Initiated translation through dTLB and disable is %b", rg_tlb_disable); `endif + ff_vpn.enq(addr.vaddr[v_vaddr-1: v_page_offset]); + ff_page_offset.enq(addr.vaddr[v_page_offset-1:0]); + ff_access.enq(tuple2(addr.ld_st_atomic, `ifdef atomic atomic `else 0 `endif )); + endmethod + + method ActionValue#(From_TLB#(data_width)) send_ppn if(rg_hit[1] || rg_tlb_disable + || (rg_chmod[1].prv==Machine) || rg_page_fault[1] || (rg_translation_mode[1]==0) + || tpl_1(ff_access.first())==Fence); + Trap_type e = tagged None; + Bit#(data_width) final_address; + Bit#(ppn) p_ppn = 0; + if(rg_levels[1]==0) begin + p_ppn = rg_ppn[1]; + end + else if(rg_levels[1]==1) begin + Bit#(TSub#(ppn,vpn_split)) lv_ppn_split = rg_ppn[1][v_ppn-1:v_vpn_split]; + Bit#(vpn_split) lv_vpn_split = ff_vpn.first[v_vpn_split-1:0]; + p_ppn = {lv_ppn_split,lv_vpn_split}; + end + else if(rg_levels[1]==2) begin + Bit#(TSub#(ppn,intermediate1)) lv_ppn_split = rg_ppn[1][v_ppn-1:v_intermediate1]; + Bit#(intermediate1) lv_vpn_split = ff_vpn.first[v_intermediate1-1:0]; + p_ppn = {lv_ppn_split,lv_vpn_split}; + end + if(rg_hit[1]) begin + rg_hit[1] <= False; + Bit#(paddr) paddress = {p_ppn,ff_page_offset.first()}; + final_address = zeroExtend(paddress); + rg_frm_ptw[0] <= False; + end + else if(rg_page_fault[1]) begin + `ifdef verbose_torture $display($time, "\t DTLB Page Fault"); `endif + if(tpl_1(ff_access.first())== Load `ifdef atomic || (tpl_1(ff_access.first)==Atomic && tpl_2(ff_access.first)[3:0]=='b0101) `endif ) + e = tagged Exception Load_pagefault; + else + e = tagged Exception Store_pagefault; + Bit#(vaddr) paddress = {ff_vpn.first(),ff_page_offset.first()}; + final_address = zeroExtend(paddress); + rg_page_fault[1] <= False; + rg_frm_ptw[0] <= False; + end + else begin + `ifdef verbose_torture $display($time, "\t dTLB: Bypass"); `endif + Bit#(vaddr) paddress = {ff_vpn.first(),ff_page_offset.first()}; + final_address = zeroExtend(paddress); + end + ff_page_offset.deq; + ff_vpn.deq; + ff_access.deq; + return From_TLB{exception : e, address : final_address, cacheable : rg_cacheable[1]}; + endmethod + + method ActionValue#(Bit#(vaddr)) send_vaddress_for_cache_index if(rg_frm_ptw[0]); + rg_frm_ptw[0] <= False; + return {ff_vpn.first, ff_page_offset.first}; + endmethod + + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,asid_width)) asid); + rg_tlb_disable <= unpack(tlb_disable); + rg_asid[0] <= asid[v_asid_width-1:0]; + rg_translation_mode[0] <= asid[v_asid_width+3:v_asid_width]; + per_bits.prv=per_bits.mprv==1?per_bits.mpp:per_bits.prv; + rg_chmod[0] <= per_bits; + `ifdef verbose_torture $display($time, "\t DTLB: mprv %b mxr %b sum %b mpp %b prv %b", per_bits.mprv, per_bits.mxr, per_bits.sum, pack(per_bits.mpp), pack(per_bits.prv)); `endif + endmethod + + interface to_PTW = interface Get + method ActionValue#(Request_PPN_PTW#(vaddr,page_size)) get if(rg_handling_PTW[1] && !rg_page_fault[1]); + return Request_PPN_PTW{ vpn : ff_vpn.first(), page_type : (tpl_1(ff_access.first())==Load `ifdef atomic || (tpl_1(ff_access.first)==Atomic && tpl_2(ff_access.first)[3:0]=='b0101) `endif )?Load:Store}; + endmethod + endinterface; + + interface refill_TLB = interface Put + method Action put(Tuple2#(Bool, To_TLB#(paddr,page_size,asid_width)) tlb_fill) if(rg_handling_PTW[0]); + let {x,tlb_structure} = tlb_fill; + rg_page_fault[1] <= x; + Bit#(paddr) paddress= {tlb_structure.ppn,ff_page_offset.first}; + Bit#(data_width) new_address = zeroExtend(paddress); + Bool cacheable = True; //!is_IO_Addr(zeroExtend(new_address)); TODO + if(!x) begin + rg_slot_to_replace <= rg_slot_to_replace + 1; + tlb_vpn[rg_slot_to_replace] <= ff_vpn.first(); + tlb_ppn[rg_slot_to_replace] <= tlb_structure.ppn; + tlb_permissions[rg_slot_to_replace] <= tlb_structure.tlb_perm; + tlb_levels[rg_slot_to_replace] <= tlb_structure.levels; + tlb_asid[rg_slot_to_replace] <= tlb_structure.asid; + tlb_cacheable[rg_slot_to_replace] <= cacheable; + end + rg_handling_PTW[0] <= False; + rg_frm_ptw[0] <= True; + `ifdef verbose_torture $display($time, "\t Filling TLB in slot %d with vpn %h with page levels i %d", rg_slot_to_replace, ff_vpn.first(), tlb_structure.levels); `endif + `ifdef verbose_torture $display($time, " dTLB: Filling TLB and page fault is %b", x); `endif + endmethod + endinterface; + + method Action flush(Bool _flush); + wr_flush <= _flush; + endmethod + + method Action fence_TLB(Fence_VMA_type#(vaddr) rsdata); + Bool flush_address = False; + Bool flush_address_space = False; + Bit#(vpn_split) lv_vpn_split= 0; + Bit#(intermediate1) lv_intermediate1 = 0; + Bit#(intermediate2) lv_intermediate2= 0; + Bit#(vpn) mask1 = {'1,lv_vpn_split}; + Bit#(vpn) vpnmask1 = rsdata.rs1[v_vaddr-1:v_page_offset] & mask1; + Bit#(vpn) mask2 = {'1,lv_intermediate1}; + Bit#(vpn) vpnmask2 = rsdata.rs1[v_vaddr-1:v_page_offset] & mask2; + if(rsdata.rs1!=0) begin + flush_address = True; + `ifdef verbose_torture $display($time, "\t dTLB address flush %h", rsdata.rs1); `endif + end + if(rsdata.rs2!=0) begin + flush_address_space = True; + `ifdef verbose_torture $display($time, "\t dTLB address space flush %h", rsdata.rs2); `endif + end + for(Integer i = 0; i < `TLB_entries; i = i+1) begin + if(((flush_address && ((rsdata.rs1[v_vaddr-1:v_page_offset] == tlb_vpn[i] && tlb_levels[i]==0) + || (vpnmask1 == (tlb_vpn[i] & mask1) && tlb_levels[i]==1) + || (vpnmask2 == (tlb_vpn[i] & mask2) && tlb_levels[i]==2))) + || (flush_address_space && rsdata.rs2[v_asid_width-1:0] == tlb_asid[i])) + || (!flush_address && !flush_address_space)) begin + `ifdef verbose_torture $display($time, "\t dTLB entry %d with vpn %h removed",i, tlb_vpn[i]); `endif + tlb_permissions[i] <= TLB_permissions{v : 0, r : 0, w : 0, x : 0, u : 0, g : 0, a : 0, d : 0}; + end + end + endmethod + + //method Action fence_TLB(Fence_VMA_type#(data_width) rsdata); + // Bool flush_address = False; + // Bool flush_address_space = False; + // if(rsdata.rs1!=0) begin + // flush_address = True; + // `ifdef verbose_torture $display($time, "\t dTLB address flush %h", rsdata.rs1); `endif + // end + // if(rsdata.rs2!=0) begin + // flush_address_space = True; + // `ifdef verbose_torture $display($time, "\t dTLB address space flush %h", rsdata.rs2); `endif + // end + // for(Integer i = 0; i < `TLB_entries; i = i+1) begin + // if(((flush_address && rsdata.rs1[v_vaddr-1:v_page_offset] == tlb_vpn[i]) + // || (flush_address_space && rsdata.rs2[v_asid_width-1:0] == tlb_asid[i])) + // || (flush_address && flush_address_space)) begin + // tlb_permissions[i] <= TLB_permissions{v : 0, r : 0, w : 0, x : 0, u : 0, g : 0, a : 0, d : 0}; + // end + // end + //endmethod + + //method ActionValue#(Bool) page_fault if(rg_page_fault[1]); + // ff_vpn.deq; + // ff_page_offset.deq; + // ff_access.deq; + // rg_hit[1] <= False; + // return True; + //endmethod + + //method Action page_fault_frm_PTW if(rg_handling_PTW[0]); + // rg_page_fault[0] <= False; + //endmethod + +endmodule + +interface Ifc_dTLB; + method Action get_vaddr(DTLB_access#(`ADDR) addr `ifdef atomic , Bit#(5) atomic `endif ); + method ActionValue#(From_TLB#(`ADDR)) send_ppn; + method ActionValue#(Bit#(`VADDR)) send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + interface Get#(Request_PPN_PTW#(`VADDR,`OFFSET)) to_PTW; + interface Put#(Tuple2#(Bool, To_TLB#(`PADDR,`OFFSET,`ASID))) refill_TLB; + method Action flush(Bool _flush); + method Action fence_TLB(Fence_VMA_type#(`VADDR) rsdata); + //method ActionValue#(Bool) page_fault; + //method Action page_fault_frm_PTW; +endinterface + +(*mutually_exclusive="refill_TLB_put, send_ppn"*) +(*synthesize*) +module mkdTLB(Ifc_dTLB); + +Ifc_TLB#(`ADDR,`VADDR,`PADDR,`OFFSET,`ASID) dtlb <- mkTLB(); + + method Action get_vaddr(DTLB_access#(`ADDR) addr `ifdef atomic , Bit#(5) atomic `endif ); + dtlb.get_vaddr(addr `ifdef atomic ,atomic `endif ); + endmethod + method ActionValue#(From_TLB#(`ADDR)) send_ppn = dtlb.send_ppn; + method ActionValue#(Bit#(`VADDR)) send_vaddress_for_cache_index = dtlb.send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + dtlb.translation_protection_frm_csr(tlb_disable,per_bits,asid); + endmethod + interface to_PTW = dtlb.to_PTW; + interface refill_TLB = dtlb.refill_TLB; + method Action flush(Bool _flush); + dtlb.flush(_flush); + endmethod + method Action fence_TLB(Fence_VMA_type#(`VADDR) rsdata); + dtlb.fence_TLB(rsdata); + endmethod + + //method ActionValue#(Bool) page_fault = dtlb.page_fault; + //method Action page_fault_frm_PTW = dtlb.page_fault_frm_PTW; +endmodule + +endpackage diff --git a/src/core/dcache_asic.bsv b/src/core/dcache_asic.bsv new file mode 100644 index 0000000..4a139f5 --- /dev/null +++ b/src/core/dcache_asic.bsv @@ -0,0 +1,745 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package dcache_asic; + /*===== Pacakge imports ===== */ + import BRAMCore::*; + import FIFO::*; + import FIFOF::*; + import SpecialFIFOs::*; + import LFSR::*; + import ConfigReg::*; + import DReg::*; + import BUtils::*; + import MemoryMap::*; + import mem_config1::*; + import Vector::*; + /*===== project imports==== */ + import defined_types::*; + `include "defined_parameters.bsv" + import QuadMem::*; + import Assert::*; + /*========================= */ + interface Ifc_dcache; + method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, `ifdef atomic Bit#(5) atomic_op, `endif Bool signextend, Bit#(1) insnepoch); + method Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1))) response_to_core; + method ActionValue#(To_Memory#(`PADDR)) read_request_to_memory; + method ActionValue#(To_Memory_Write) write_request_to_memory; + method Action read_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + method Action write_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + method Bool init_complete; + method Action flush_from_wb; + `ifdef MMU + method Action physical_address(Bit#(`PADDR) paddr, Trap_type exception); + `endif + endinterface + + typedef enum {Idle,Dummy,KeepPolling,Stall1,ReadingCache,Initialize,Fence,FenceStart,IOReadResp,IOWriteResp} DcacheState deriving (Bits,Eq,FShow); + + (*synthesize*) + (*conflict_free="virtual_address,pre_fence_updating"*) + (*conflict_free="virtual_address,handle_fence"*) + (*conflict_free="keep_polling_on_stall,handle_fence"*) + (*conflict_free="keep_polling_on_stall,pre_fence_updating"*) + (*conflict_free="keep_polling_on_stall,wait_for_ioread_response"*) + (*conflict_free="keep_polling_on_stall,wait_for_iowrite_response"*) + (*conflict_free="dummy_cycle,read_from_lbdata_into_hold_reg"*) + (*preempts="virtual_address,read_from_lbdata_into_hold_reg"*) +// (*preempts="keep_polling_on_stall,read_from_lbdata_into_hold_reg"*) + (*preempts="stall_the_next_request_by_one_cycle,read_from_lbdata_into_hold_reg"*) + (*preempts="read_from_lbdata_into_hold_reg,keep_polling_on_stall"*) + module mkdcache(Ifc_dcache); + /* VAddr = [tag_bits|set_bits|word_bits|byte_bits] */ + let byte_bits=valueOf(TLog#(`DCACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2 + let word_bits=valueOf(TLog#(`DCACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4 + let set_bits=valueOf(TLog#(`DCACHE_SETS)); // number of bits to select a set from the cache. = + Reg#(Maybe#(Tuple2#(Bit#(1),Bit#(`PADDR)))) rg_lr_paddress<-mkReg(tagged Invalid); + `ifdef atomic + function ActionValue#(Tuple3#(Maybe#(Bit#(1)),Bool, Bit#(TMul#(`DCACHE_WORD_SIZE,8)))) atomic_operation(Bit#(TMul#(`DCACHE_WORD_SIZE,8)) loaded_value, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) rs2, Bit#(5) atomic_op, Bit#(`PADDR) addr); + return ( + actionvalue + Bit#(TMul#(`DCACHE_WORD_SIZE,8)) atomic_result=rs2; + Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op1; + Maybe#(Bit#(1)) sc_done=tagged Invalid; + if(atomic_op[4]==1) + op1=signExtend(loaded_value[31:0]); + else + op1=loaded_value; + Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op2=(atomic_op[4]==1)?signExtend(rs2[31:0]):rs2; + Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op1=unpack(op1); + Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op2 = unpack(op2); + Bool store_result = True; + `ifdef verbose $display($time,"\tDCACHE: atomic instruction atomic op %b op1: %h op2: %h", atomic_op,op1,op2); `endif + case (atomic_op[3:0]) + 'b0011:atomic_result=op2; + 'b0000:atomic_result= (op1+op2); + 'b0010:atomic_result= (op1^op2); + 'b0110:atomic_result= (op1&op2); + 'b0100:atomic_result= (op1|op2); + 'b1100:atomic_result= min(op1,op2); + 'b1110:atomic_result= max(op1,op2); + 'b1000:atomic_result= pack(min(s_op1,s_op2)); + 'b1010:atomic_result= pack(max(s_op1,s_op2)); + default: begin atomic_result= op1; end + endcase + case (atomic_op[3:0]) + 'b0101: begin + rg_lr_paddress <= tagged Valid tuple2(atomic_op[4],addr); + atomic_result=loaded_value; // LR + store_result = False; + end + 'b0111: begin + rg_lr_paddress <= tagged Invalid; + atomic_result=rs2; // SC + sc_done = tagged Valid 1; + store_result = False; + `ifdef verbose $display($time,"\tDCACHE: store condition instruction"); `endif + if(rg_lr_paddress matches tagged Valid .lr) begin + let {x,y} = lr; + if(x==atomic_op[4] && addr== y) begin + `ifdef verbose $display($time,"\tDCACHE: store condition satisfied"); `endif + sc_done = tagged Valid 0; + store_result = True; + end + end + end + default: begin rg_lr_paddress<=tagged Invalid ;end + endcase + if(atomic_op[4]==1) + atomic_result=duplicate(atomic_result[31:0]); + + return tuple3(sc_done,store_result,atomic_result); + endactionvalue ); + endfunction + `endif + function Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) update_line (Bit#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE)) we, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data_reg); + Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) mask=0; + for(Integer i=0;i<32;i=i+1)begin + Bit#(8) ex_we=duplicate(we[i]); + mask[(i*8)+7:i*8]=ex_we; + end + Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) x = mask& data; + Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) y = ~mask& data_reg; + data_reg=x|y; + return data_reg; + endfunction + + Ifc_dcache_data data [`DCACHE_WAYS]; + Ifc_dcache_tag tag [`DCACHE_WAYS]; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin + tag[i] <- mkdcache_tag; + data[i] <-mkdcache_data; + end + + /*====== Hit buffer data structur======*/ + Reg#(Bool) hb_valid <-mkReg(False); + Reg#(Bit#(`DCACHE_WAYS)) hb_way <-mkReg(0); + Reg#(Bit#(`DCACHE_TAG_BITS)) hb_tag <-mkReg(0); + Reg#(Bit#(TLog#(`DCACHE_SETS))) hb_setindex <- mkReg(0); + Ifc_QuadMem hb_data <-mkQuadMem; + /*=====================================*/ + + /*-====== Line buffer data structure ====*/ + Ifc_QuadMem lb_data <-mkQuadMem; + FIFOF#(Tuple4#(Bit#(20),Bit#(TLog#(`DCACHE_SETS)),Bit#(`DCACHE_WAYS),Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)))) memoperation <-mkUGSizedFIFOF(2); + Reg#(Bit#(1)) lb_dirty <-mkReg(0); + Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) line_bytes_written<-mkReg(0); + /*=====================================*/ + + /*======= Request Capture =========*/ + Reg#(Bit#(`VADDR)) rg_vaddress <-mkReg(0); + Reg#(Bit#(`PADDR)) rg_paddress <-mkReg(0); + Reg#(Bit#(`PADDR)) rg_poll_address <-mkReg(0); + Reg#(Bit#(3)) rg_transfer_size <-mkReg(0); + `ifdef atomic Reg#(Bit#(5)) rg_atomic_op <-mkReg(0); `endif + Reg#(Access_type) rg_access_type <-mkReg(Load); + Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_writeenable<-mkReg(0); + Reg#(Bool) rg_signextend<-mkReg(False); + Reg#(Bool) misaligned_addr <-mkReg(False); + Reg#(Bit#(1)) rg_insn_epoch <-mkReg(0); + Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,8))) rg_writedata<-mkReg(0); + /*=================================*/ + /* storage for physical translation */ + Reg#(Bool) rg_trnslte_done[2] <- mkCReg(2,`ifdef MMU False `else True `endif ); + Reg#(Trap_type) rg_tlb_exception[2]<-mkCReg(2,tagged None); + /*==================================== */ + + /*===== registers for fencing/initializing ====*/ + Reg#(Bit#(TLog#(`DCACHE_SETS))) fence_set <-mkReg(0); + Reg#(Bit#(TLog#(`DCACHE_WAYS))) fence_way <-mkReg(0); + /*==============================================*/ + + /*========= FIFO for interfaces ================*/ + FIFOF#(To_Memory#(`PADDR)) ff_read_request_to_memory <-mkLFIFOF(); + FIFOF#(To_Memory_Write) ff_write_request_to_memory <-mkLFIFOF(); + FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_read_response_from_memory <-mkSizedBypassFIFOF(1); + FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_write_response_from_memory <-mkSizedBypassFIFOF(1); + /*===============================================*/ + + /*===== State Registers========*/ + Reg#(Bit#(1)) wbEpoch [3] <-mkCReg(3,0); + Reg#(DcacheState) rg_state[3] <-mkCReg(3,Initialize); + /*============================*/ + + /*============ globals =========*/ + Reg#(Bool) rg_global_dirty <-mkReg(False); + Wire#(Maybe#(Tuple2#(Bit#(20),Bit#(TLog#(`DCACHE_SETS))))) wr_write_info<-mkDWire(tagged Invalid); + Wire#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1)))) wr_response_to_cpu<-mkDWire(tagged Invalid); + Reg#(Bit#(`PERFMONITORS)) rg_perf_monitor<-mkReg(0); + LFSR#(Bit#(2)) random_line<-mkRCounter(3); // for random line replacement + Reg#(Bool) pending_write_response[3]<-mkCReg(3,False); + Reg#(Bool) capture_counters <-mkDReg(False); + Reg#(Bool) rg_initialize <-mkReg(True); + Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_we<-mkReg(0); + Reg#(Bool) rg_bus_error<-mkReg(False); + /*==============================*/ + rule display_state; + `ifdef verbose $display($time,"\tDCACHE: state ",fshow(rg_state[0])," wbEpoch: %b",wbEpoch[0]); `endif + endrule + + rule dummy_cycle(rg_state[1]==Dummy); + rg_state[1]<=Idle; + endrule + + rule deq_write_response_during_fence(pending_write_response[2]); + ff_write_response_from_memory.deq; + pending_write_response[2]<=False; + endrule + rule pre_fence_updating(rg_state[0]==FenceStart && !memoperation.notEmpty && !pending_write_response[2]); + if(wbEpoch[0]==rg_insn_epoch)begin + if(hb_valid)begin + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin + tag[i].write_request(unpack(hb_way[i]),hb_setindex,{2'b11,hb_tag}); + data[i].write_request(duplicate(hb_way[i]),hb_setindex,hb_data.response_portA); + end + hb_valid<=False; + end + else begin + rg_state[0]<=Fence; + tag[0].read_request(0); + data[0].read_request(0); + fence_set<=0; + fence_way<=0; + end + end + else begin + wr_response_to_cpu<=tagged Valid (tuple4(0,tagged None, 0, rg_insn_epoch)); + end + endrule + /*====== Invalidate all the entries in the cache on startup or during Fence ==== */ + rule fencing_the_cache(rg_state[0]==Initialize && !memoperation.notEmpty && !pending_write_response[2]); + `ifdef verbose $display($time,"\tDCACHE: Initializing index: %d",fence_set," ",fshow(rg_access_type)); `endif + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin + tag[i].write_request(True,truncate(fence_set),0); + end + if(fence_set==fromInteger(`DCACHE_SETS-1)) begin + rg_state[0]<=Dummy; + fence_set<=0; + fence_way<=0; + random_line.seed('d3); + rg_global_dirty<=False; + rg_trnslte_done[0]<=False; + if(rg_access_type==Fence) + wr_response_to_cpu<= tagged Valid (tuple4(0,tagged None,0,rg_insn_epoch)); + end + else + fence_set<=fence_set+1; + endrule + /*=============================================================================== */ + rule handle_fence(rg_state[0]==Fence &&!memoperation.notEmpty); + Bit#(20) tag_values=tag[fence_way].read_response[20-1:0]; // hold the tag values + Bit#(1) dirty_value=tag[fence_way].read_response[20+1]; // holds the dirty bits + Bit#(1) valid_value=tag[fence_way].read_response[20]; // holds the dirty bits + Bit#(TMul#(8,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) data_values; // holds the cache lines. + Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) p_offset =0; + data_values=data[fence_way].read_response; + + Bit#(`PADDR) write_addr={tag_values,truncate(fence_set),p_offset}; + `ifdef verbose $display($time,"\tDCACHE: Handling Fence.tag %h setindex: %d fence_way: %d Dirty: %b Valid: %b",tag_values,fence_set,fence_way,dirty_value,valid_value); `endif + `ifdef verbose $display($time,"\tDCACHE: Fence addr: %h line: %h ",write_addr,data_values); `endif + Bit#(TLog#(`DCACHE_SETS)) new_set=fence_set; + Bit#(TLog#(`DCACHE_SETS)) old_set=fence_set; + Bit#(TLog#(`DCACHE_WAYS)) next_way=fence_way; + if(!pending_write_response[1])begin + if(dirty_value==1 && valid_value==1)begin // valid and dirty + ff_write_request_to_memory.enq(To_Memory_Write { // send the request to memory to + address:write_addr, data_line:data_values, + burst_length:`DCACHE_BLOCK_SIZE, transfer_size:3, ld_st:Store}); + pending_write_response[1]<=True; + end + if(fence_way==fromInteger(`DCACHE_WAYS-1))begin + new_set=fence_set+1; + if(fence_set==fromInteger(`DCACHE_SETS-1))begin + rg_state[0]<=Dummy; + rg_global_dirty<=False; + wr_response_to_cpu<= tagged Valid (tuple4(0,tagged None,0,rg_insn_epoch)); + rg_trnslte_done[0]<=False; + fence_set<=0; + end + else + fence_set<=new_set; + end + next_way=fence_way+1; + tag[fence_way].write_request(True,old_set,0); + end + `ifdef verbose $display($time,"\tDCACHE: FENCE: sending request to setindex: %d way: %d",new_set,next_way); `endif + tag[next_way].read_request(new_set); + data[next_way].read_request(new_set); + fence_way<=next_way; + endrule + + rule read_from_lbdata_into_hold_reg(line_bytes_written=='1 && memoperation.notEmpty); + let lb_hold_reg=lb_data.response_portB; + let {cputag,setindex,replaceblock,writeenable}=memoperation.first; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin + tag[i].write_request((unpack(replaceblock[i])&&True),setindex,{lb_dirty,1'b1,cputag}); + data[i].write_request(duplicate(replaceblock[i]),setindex,lb_hold_reg); + end + line_bytes_written<=0; + lb_dirty<=0; + memoperation.deq; + rg_bus_error<=False; + `ifdef verbose $display($time,"\tDCACHE: capturing lbdata cpu_tag: %h setindex: %d addr: %h linenum: %b data: %h",cputag, setindex,{cputag,setindex,6'd0}, replaceblock,lb_hold_reg); `endif + if(rg_state[1]==KeepPolling) + rg_state[1]<=Stall1; + endrule + + rule fillcache(memoperation.notEmpty && line_bytes_written!='1); // need to check line_bytes_written to ensure the same response is being served. + let memresp=ff_read_response_from_memory.first; + ff_read_response_from_memory.deq; + rg_bus_error<=unpack(memresp.bus_error)||rg_bus_error; + let {cpu_tag,setindex,replaceblock,writeenable}=memoperation.first; + `ifdef verbose $display($time,"\tDCACHE: Response from Memory: %h setindex: %d cpu_tag: %h replaceblock: %b",memresp.data_line,setindex,cpu_tag,replaceblock); `endif + let we=writeenable; + if(|line_bytes_written!=0)begin + we=rg_we; + end + Bit#(TMul#(2,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8; + lb_data.write_portB(we,duplicate(memresp.data_line)); + `ifdef verbose $display($time,"\tDCACHE: linebytes: %h currently writing into: %h",line_bytes_written,we); `endif + if(memresp.last_word)begin // if all the data words have been fetched exit + `ifdef verbose $display($time,"\tDCACHE: Received Last response from Memory set: %d ",setindex); `endif + end + line_bytes_written<=line_bytes_written|we; + rg_we<=extended_mask[2*`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE]|extended_mask[`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:0]; + endrule + + rule drop_incoming_request(rg_state[0]==ReadingCache && memoperation.notFull && wbEpoch[0]!=rg_insn_epoch); + if(rg_trnslte_done[0]) + wr_response_to_cpu<=tagged Valid (tuple4(0,tagged None, 0, rg_insn_epoch)); + `ifdef verbose $display($time,"\tDCACHE: Dropping incoming request wbEpoch: %b rg_insn_epoch: %b",wbEpoch[0],rg_insn_epoch); `endif + rg_trnslte_done[0]<=False; + rg_state[0]<=Idle; + endrule + /*============== One cycle delay to ensure the write is reflected in the BRAM ========= */ + rule stall_the_next_request_by_one_cycle(rg_state[1]==Stall1); + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + rg_state[1]<=ReadingCache; + endrule + /*===================================================================================== */ + + rule keep_polling_on_stall(rg_state[1]==KeepPolling); + Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor; + if(capture_counters)begin + `ifdef verbose $display($time,"\tDCACHE: Miss during polling for ",fshow(rg_access_type)); `endif + if(rg_access_type==Load)begin + perf_monitor[`DCACHE_LOAD_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_LOAD]=1; + end + else if(rg_access_type==Store)begin + perf_monitor[`DCACHE_STORE_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_STORE]=1; + end + else if(rg_access_type==Atomic) begin + perf_monitor[`DCACHE_ATOMIC_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_ATOMIC]=1; + end + rg_perf_monitor<=perf_monitor; + end + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + Bit#(20) cpu_tag=rg_poll_address[`PADDR-1:`PADDR-20]; + let {lbtag,lbset,lbreplaceblock,lbwriteenable}=memoperation.first; + if((line_bytes_written & rg_writeenable) == rg_writeenable && (lbset==setindex && lbtag==cpu_tag))begin + `ifdef verbose $display($time,"\tDCACHE: Accessing LB"); `endif + rg_state[1]<=ReadingCache; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + end + `ifdef verbose $display($time,"\tDCACHE: Polling on LB. cpu_tag: %h lbtag: %h required: %h bytes in Buffer: %h",cpu_tag,lbtag,rg_writeenable,line_bytes_written); `endif + endrule + + + rule read_from_memory_structures(rg_state[0]==ReadingCache && memoperation.notFull && wbEpoch[0]==rg_insn_epoch); + Bool cache_enabled = !is_IO_Addr(rg_paddress); + Trap_type exception = misaligned_addr?((rg_access_type==Load)? + tagged Exception Load_addr_misaligned:tagged Exception Store_addr_misaligned): rg_bus_error? + (rg_access_type==Load?tagged Exception Load_access_fault:tagged Exception Store_access_fault):rg_tlb_exception[0]; + /*====== Get the states of the request ======*/ + Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20]; + Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset=rg_vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0]; + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hbdataline=0; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) lbdataline=0; + + /*========== Check hit on Hit buffer =======*/ + Bool hb_hit = False; + if(hb_valid && (hb_setindex==setindex) && (hb_tag==cpu_tag) && !misaligned_addr)begin + hb_hit=True; + hbdataline=hb_data.response_portA; + end + /*==========================================*/ + + /*========= Check Line buffer ==============*/ + Bool stall_on_lb=((line_bytes_written & rg_writeenable) != rg_writeenable) && memoperation.notEmpty; + Bool lb_valid=memoperation.notEmpty; + let {lb_tag,lb_setindex,lb_way,lb_we}=memoperation.first; + Bool lb_hit = False; + if(lb_valid && (lb_setindex==setindex) && (lb_tag==cpu_tag) && !misaligned_addr)begin + lb_hit=True; + lbdataline=lb_data.response_portA; + end + /*===========================================*/ + + /*======= Check SRAMS ==============*/ + Bit#(`DCACHE_WAYS) tag_hit=0; + + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline0=data[0].read_response; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline1=data[1].read_response; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline2=data[2].read_response; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline3=data[3].read_response; + + Bit#(`DCACHE_WAYS) valid_values={tag[3].read_response[20],tag[2].read_response[20],tag[1].read_response[20],tag[0].read_response[20]}; + Bit#(`DCACHE_WAYS) dirty_values={tag[3].read_response[21],tag[2].read_response[21],tag[1].read_response[21],tag[0].read_response[21]}; + + if(cpu_tag==tag[0].read_response[19:0] && valid_values[0]==1) tag_hit[0]=1; + if(cpu_tag==tag[1].read_response[19:0] && valid_values[1]==1) tag_hit[1]=1; + if(cpu_tag==tag[2].read_response[19:0] && valid_values[2]==1) tag_hit[2]=1; + if(cpu_tag==tag[3].read_response[19:0] && valid_values[3]==1) tag_hit[3]=1; + + Bool hit=False; + hit=unpack(|(tag_hit)) && (!hb_hit) && (!lb_hit) && !misaligned_addr; + // We are not invalidating a replaced line when enquing into the linebuffer. + // So it is possible that the next request finds this to be a hit and proceeds to change the SRAM. + // While the linebuffer, having received all the bytes from the memory will simply + // go ahead and replace the dirty line without eviction. The following condition ensures + // that for the same index if the SRAM hit is to the same line as the LB treat is as a miss. + if(hit && tag_hit==lb_way && lb_valid && lb_setindex==setindex) + hit=False; + dynamicAssert(!(lb_hit&&hb_hit),"ASSERT: lb_hit and hb_hit are both 1"); + + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp0=duplicate(tag_hit[0]&pack(hit)); + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp1=duplicate(tag_hit[1]&pack(hit)); + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp2=duplicate(tag_hit[2]&pack(hit)); + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp3=duplicate(tag_hit[3]&pack(hit)); + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline0=temp0&dataline0; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline1=temp1&dataline1; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline2=temp2&dataline2; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline3=temp3&dataline3; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) sram_dataline=hitline0|hitline1|hitline2|hitline3; + `ifdef verbose $display($time,"\tDCACHE: valid_values: %b dirty_values: %b stall_on_lb: %b we: %h Access_type: ",valid_values,dirty_values,stall_on_lb,rg_writeenable,fshow(rg_access_type)); `endif + /*================================================*/ + /*===== replacement line selection ==============*/ + Bit#(`DCACHE_WAYS) replace_vec=valid_values; + if(&(valid_values)==1) + replace_vec=dirty_values; + case (replace_vec) matches + 'b???0:replace_vec='b0001; + 'b??01:replace_vec='b0010; + 'b?011:replace_vec='b0100; + 'b0111:replace_vec='b1000; + default:begin + replace_vec=0; + replace_vec[random_line.value]=1; + random_line.next; + end + endcase + if(replace_vec==lb_way && lb_setindex==setindex && lb_valid) + replace_vec=rotateBitsBy(lb_way,1); + if(replace_vec==hb_way && hb_valid && hb_setindex==setindex) + replace_vec=rotateBitsBy(replace_vec,1); + `ifdef verbose $display($time,"\tDCACHE: replacevec: %b hb_way: %b lb_way: %b",replace_vec,hb_way,lb_way); `endif + `ifdef verbose $display($time,"\tDCACHE: CPUTAG: %h lb_tag: %h hb_tag :%h",cpu_tag,lb_tag,hb_tag); `endif + `ifdef verbose $display($time,"\tDCACHE: CPUIndex: %d lb_index: %d hb_inex :%d",setindex,lb_setindex,hb_setindex); `endif + + Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) offset_zeros='d0; + Bit#(`PADDR) r0=duplicate(replace_vec[0]); + Bit#(`PADDR) r1=duplicate(replace_vec[1]); + Bit#(`PADDR) r2=duplicate(replace_vec[2]); + Bit#(`PADDR) r3=duplicate(replace_vec[3]); + Bit#(`PADDR) write_address0=r0&{tag[0].read_response[20-1:0],setindex[6:0],offset_zeros}; + Bit#(`PADDR) write_address1=r1&{tag[1].read_response[20-1:0],setindex[6:0],offset_zeros}; + Bit#(`PADDR) write_address2=r2&{tag[2].read_response[20-1:0],setindex[6:0],offset_zeros}; + Bit#(`PADDR) write_address3=r3&{tag[3].read_response[20-1:0],setindex[6:0],offset_zeros}; + Bit#(`PADDR) write_address = write_address0 | write_address1 | write_address2 | write_address3; + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) replace_dataline = + case(replace_vec) + 'b0001:dataline0; + 'b0010:dataline1; + 'b0100:dataline2; + 'b1000:dataline3; + default:0; + endcase; + `ifdef verbose $display($time,"\tDCACHE: Replace vec: %h line: %h address :%h",replace_vec,replace_dataline, write_address); `endif + /*==============================================*/ + + /*==== capture the word to be operated on and perform the atomic operation as well on it=======*/ + Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline=hbdataline|lbdataline|sram_dataline; + Bit#(`Reg_width) data_word=(dataline>>{6'd0,word_offset}*64)[`Reg_width-1:0]; + data_word=data_word>>({4'b0,byte_offset}*8); + + if(!rg_signextend) + data_word=rg_transfer_size==0?zeroExtend(data_word[7:0]):rg_transfer_size==1?zeroExtend(data_word[15:0]):rg_transfer_size==2?zeroExtend(data_word[31:0]):data_word; + else + data_word=rg_transfer_size==0?signExtend(data_word[7:0]):rg_transfer_size==1?signExtend(data_word[15:0]):rg_transfer_size==2?signExtend(data_word[31:0]):data_word; + `ifdef atomic + let {success,storeResult,atomicdata} <- atomic_operation(data_word,rg_writedata,rg_atomic_op,rg_paddress); + if(rg_access_type==Load) + storeResult=False; + if(success matches tagged Valid .sc) + data_word = zeroExtend(sc); + `endif + Bit#(`Reg_width) final_word = `ifdef atomic (rg_access_type==Atomic)?atomicdata: `endif (rg_access_type==Store)?rg_writedata:data_word; + `ifdef verbose $display($time,"\tDCACHE: hbhit: %b hbdataline: %h",hb_hit,hbdataline); `endif + `ifdef verbose $display($time,"\tDCACHE: lb_hit: %b lbdataline: %h",lb_hit,lbdataline); `endif + `ifdef verbose $display($time,"\tDCACHE: tag_hit: %b hit : %b srdataline: %h",tag_hit,hit , sram_dataline); `endif + `ifdef verbose $display($time,"\tDCACHE: Sending to Core: %h Final line: %h",data_word,dataline); `endif + `ifdef verbose $display($time,"\tDCACHE: translation done: %b tlb_exception: ",rg_trnslte_done[0],fshow(rg_tlb_exception[0])); `endif + /*=============================================================================================*/ + /*============ perform Store/Atomic operations =============*/ + if(rg_trnslte_done[0] &&& rg_tlb_exception[0] matches tagged None)begin + if(cache_enabled)begin + /*======= Calculate the next state ===*/ + DcacheState nextstate=Idle; + if(misaligned_addr || hit || hb_hit || (lb_hit && !stall_on_lb)) + nextstate=Idle; + else if(lb_hit && stall_on_lb || (!hit && !lb_hit &&!hb_hit))begin + nextstate=KeepPolling; + rg_poll_address<=rg_paddress; + end + if(nextstate==Idle) + rg_trnslte_done[0]<=False; + rg_state[0]<=nextstate; + `ifdef verbose $display($time,"\tDCACHE: NextState: ",fshow(nextstate)); `endif + /*=====================================*/ + /*========= response to CPU =============*/ + if(rg_access_type!=Store && nextstate==Idle) + wr_response_to_cpu<= tagged Valid (tuple4(data_word,exception,0,rg_insn_epoch));// TODO perf + else if(nextstate==Idle) + wr_response_to_cpu<= tagged Valid (tuple4(0,exception,0,rg_insn_epoch)); // TODO perf + + if(exception matches tagged None) + wbEpoch[0]<=wbEpoch[0]; + else + wbEpoch[0]<=~wbEpoch[0]; + /*=======================================*/ + + if(rg_access_type==Store `ifdef atomic || storeResult `endif ) + rg_global_dirty<=True; + /*=============== updated hit buffer on a write =========*/ + if(hb_hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ))begin + `ifdef verbose $display($time,"\tDCACHE: HB Hit. Writing Tag: %h Data: %h Way: %h",hb_tag,final_word,hb_way); `endif + hb_data.write_portA(rg_writeenable,duplicate(final_word)); + end + /*============================================*/ + /*=============== updated line buffer on a write =========*/ + if(lb_hit && !stall_on_lb && (rg_access_type==Store `ifdef atomic || storeResult `endif ))begin + `ifdef verbose $display($time,"\tDCACHE: LB Hit. Writing Tag: %h Data: %h Way: %h setindex: %d",lb_tag,final_word,lb_way,lb_setindex); `endif + lb_data.write_portA(rg_writeenable,duplicate(final_word)); + lb_dirty<=1; + end + if(hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ) && !hb_hit)begin + `ifdef verbose $display($time,"\tDCACHE: Hit in SRAMS and writing new value :%h to HB",update_line(rg_writeenable,duplicate(final_word),dataline)); `endif + hb_tag<=cpu_tag; + hb_setindex<=setindex; + hb_data.write_portA('1,update_line(rg_writeenable,duplicate(final_word),dataline)); + hb_way<=tag_hit; + end + if(hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ) && !hb_hit) + hb_valid<=True; + else if(hb_valid && !hb_hit) + hb_valid<=False; + /*=============== updated SRAM entries with Hit buffer when possible =========*/ + if(hb_valid &&!hb_hit)begin + `ifdef verbose $display($time,"\tDCACHE: HB updating SRAM Tag: %h Data: %h Way: %h setindex: %d",hb_tag,hb_data.response_portA,hb_way,hb_setindex); `endif + wr_write_info<=tagged Valid tuple2(hb_tag,hb_setindex); + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin + tag[i].write_request(unpack(hb_way[i]),hb_setindex,{2'b11,hb_tag}); + data[i].write_request(duplicate(hb_way[i]),hb_setindex,hb_data.response_portA); + end + end + /*============================================================================*/ + if(!hit && !lb_hit && !hb_hit && !misaligned_addr)begin// a complete miss + `ifdef verbose $display($time,"\tDCACHE: A complete miss in Data Cache. Enquing into the memoperation FIFO"); `endif + Bit#(TLog#(`DCACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) writeenable='hFF; + writeenable=writeenable<<{3'b0,val1}*8; + memoperation.enq(tuple4(cpu_tag,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],replace_vec,writeenable)); + ff_read_request_to_memory.enq(To_Memory {address:rg_paddress&'hfffffff8,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3}); + if((valid_values&dirty_values&replace_vec)==replace_vec)begin // if the replacing is dirty + `ifdef verbose $display($time,"\tDCACHE: Line being replaced is dirty. Addr: %h Data: %h",write_address,replace_vec); `endif + ff_write_request_to_memory.enq(To_Memory_Write {address:write_address,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3, + data_line:replace_dataline }); + pending_write_response[0]<=True; + end + end + end + else if(rg_access_type==Load || rg_access_type==Atomic)begin + ff_read_request_to_memory.enq(To_Memory {address:rg_paddress,burst_length:1,ld_st:Load,transfer_size:rg_transfer_size}); + rg_state[0]<=IOReadResp; + end + else if(rg_access_type==Store)begin + ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + rg_state[0]<=IOWriteResp; + end + end + else if(rg_trnslte_done[0])begin + rg_state[0]<=Idle; + wr_response_to_cpu<= tagged Valid (tuple4(0,rg_tlb_exception[0],0,rg_insn_epoch));//TODO perf + wbEpoch[0]<=~wbEpoch[0]; + rg_tlb_exception[0]<=tagged None; + rg_perf_monitor<=0; + rg_trnslte_done[0]<=False; + `ifdef verbose $display($time,"\tDCACHE: Exception from TLB taken"); `endif + end + else begin + `ifdef verbose $display($time,"\tDCACHE: Translation not done"); `endif + rg_state[0] <= Idle; + end + /*==========================================================*/ + endrule + rule wait_for_ioread_response(rg_state[0]==IOReadResp && !memoperation.notEmpty); + `ifdef verbose $display($time,"\tDCACHE: Received IO Read Response"); `endif + Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0]; + Bit#(`Reg_width) data_value=ff_read_response_from_memory.first.data_line; + ff_read_response_from_memory.deq; + data_value=data_value>>({4'b0,byte_offset}*8); + if(!rg_signextend) + data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value; + else + data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value; + wr_response_to_cpu<=tagged Valid (tuple4(data_value,ff_read_response_from_memory.first.bus_error==1?tagged Exception Load_access_fault:tagged None,rg_perf_monitor,rg_insn_epoch)); + wbEpoch[0]<=ff_read_response_from_memory.first.bus_error==1?~wbEpoch[0]:wbEpoch[0]; + `ifdef atomic + if(rg_access_type==Atomic)begin + let {success,storeResult,atomicdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress); + `ifdef MMU + ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(atomicdata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `else + ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(new_data),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `endif + rg_state[0]<=IOWriteResp; + end + else + `endif + begin + rg_state[0]<=Idle; + end + rg_perf_monitor<=0; + endrule + rule wait_for_iowrite_response(rg_state[0]==IOWriteResp && !memoperation.notEmpty && !pending_write_response[2]); + `ifdef verbose $display($time,"\tDCACHE: Received IO Write Response"); `endif + ff_write_response_from_memory.deq; + if(rg_access_type!=Atomic) begin + wr_response_to_cpu<=tagged Valid (tuple4(0,ff_write_response_from_memory.first.bus_error==1?tagged Exception Store_access_fault:tagged None,rg_perf_monitor,rg_insn_epoch)); + wbEpoch[0]<=ff_write_response_from_memory.first.bus_error==1?~wbEpoch[0]:wbEpoch[0]; + end + rg_perf_monitor<=0; + rg_state[0]<=Idle; + endrule + method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, `ifdef atomic Bit#(5) atomic_op, `endif Bool signextend, Bit#(1) insnepoch) if(rg_state[1]==Idle); + if((transfer_size=='b01 && vaddress[0]!='b0) || (transfer_size=='b10 && vaddress[1:0]!=0) || (transfer_size=='b11 && vaddress[2:0]!=0)) + misaligned_addr<=True; + else + misaligned_addr<=False; + Bit#(`PERFMONITORS) perf_monitor=0; + Bit#(TLog#(`DCACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tDCACHE: ",fshow(load_store)," Request of VAddr: %h transfersize: %d signextend: %b setindex: %d data:%h",vaddress,transfer_size, signextend,setindex,writedata); `endif + Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) we=transfer_size==0?'b1:transfer_size==1?'b11:transfer_size==2?'hF:'hFF; + Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset= vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=vaddress[byte_bits-1:0]; + we=we<<{4'b0,word_offset}*8; + we=we<>{6'd0,word_offset}*64)[`Reg_width-1:0]; + data_value=data_value>>({4'b0,byte_offset}*8); + if(!rg_signextend) + data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value; + else + data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value; + + /*====================================================== */ + /*=========== Respond to Core ============================ */ + if((rg_transfer_size=='b01 && rg_vaddress[0]!='b0) || (rg_transfer_size=='b10 && rg_vaddress[1:0]!=0) || (rg_transfer_size=='b11 && rg_vaddress[2:0]!=0))begin // miss-aligned error. + perf_monitor[`DCACHE_MISALIGNED]=1; // cache mis-aligned error. + if(rg_load_store==Load) + wr_response_to_cpu<= tagged Valid (tuple3(0,tagged Exception Load_addr_misaligned,perf_monitor)); + else + wr_response_to_cpu<=tagged Valid (tuple3(0,tagged Exception Store_addr_misaligned,perf_monitor)); + rg_state[0]<=Idle; + rg_perf_monitor<=0; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + end + else if(hit||lbhit)begin // if there has been a hit. + let {success,storeResult,newdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress); + if(rg_load_store==Load) + storeResult=False; + if(success matches tagged Valid .sc) + data_value = zeroExtend(sc); + if(lbhit && (line_bytes_written & rg_writeenable) != rg_writeenable)begin + rg_state[0]<=KeepPolling; + rg_perf_monitor<=perf_monitor; + `ifdef verbose $display($time,"\tDCACHE: Going to poll LB: %h we: %h",line_bytes_written,rg_writeenable); `endif + end + else begin + if(rg_load_store==Store) + data_value=0; + `ifdef verbose $display($time,"\tDCACHE: Hit for ",fshow(rg_load_store)," address : %h data: %h line: %d rg_writedata: %h rg_writeenable: %h lbhit: %b atomic_data %h storeResult %b",rg_vaddress,data_value,linenum,rg_writedata,rg_writeenable, lbhit, newdata, storeResult); `endif + wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor)); + rg_trnslte_done[0] <= False; + rg_perf_monitor<=0; + rg_state[0]<=Idle; + if(rg_load_store==Store || storeResult)begin //Atomic but not LR + `ifdef verbose $display("Store or atomic kuch toh ho raha hai"); `endif + wr_write_info<=tagged Valid tuple2(cpu_tag,setindex); + if(lbhit)begin + if(rg_load_store==Store) + lbdata.write_portA(rg_writeenable,duplicate(rg_writedata)); + else + lbdata.write_portA(rg_writeenable,duplicate(newdata)); + `ifdef verbose if(line_bytes_written!='1) + $display("WRITING ON BOTH PORTS OF LB"); `endif + lb_dirty<=1; + end + else begin + tag[linenum].write_request(rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],{2'b11,tag[linenum].read_response[19:0]}); + if(rg_load_store==Store) + data[linenum].write_request(rg_writeenable,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],duplicate(rg_writedata)); + else + data[linenum].write_request(rg_writeenable,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],duplicate(newdata)); + end + rg_global_dirty[0]<=True; + end + end + end + /*====================================================== */ + /*==== Request to memory =============================== */ + else begin // miss + rg_state[0]<=KeepPolling; + if(rg_load_store==Load) + perf_monitor[`DCACHE_LOAD_MISS]=1; + else if(rg_load_store==Store) + perf_monitor[`DCACHE_STORE_MISS]=1; + else if(rg_load_store==Atomic) + perf_monitor[`DCACHE_ATOMIC_MISS]=1; + + if(valid_values=='1)begin // if all the lines are valid and no match then replace line + perf_monitor[`DCACHE_LINEREPLACE]=1; // cache line replacement increment. + if(dirty_values[0]==0) + replaceblock=0; + else if(dirty_values[1]==0) + replaceblock=1; + else if(dirty_values[2]==0) + replaceblock=2; + else if(dirty_values[3]==0) + replaceblock=3; + else begin + replaceblock=truncate(random_line.value); + random_line.next; + end + `ifdef verbose $display($time,"\tDCACHE: Miss of ",fshow(rg_load_store)," address: %h Replacing line: %d valid: %b dirty_values: %b",rg_vaddress,replaceblock,valid_values,dirty_values); `endif + end + else begin + `ifdef verbose $display($time,"\tDCACHE: Miss of ",fshow(rg_load_store)," address: %h Filling line: %d",rg_vaddress,replaceblock); `endif + end + if(memoperation.notEmpty && lbset==setindex && replaceblock==lbreplaceblock)begin + replaceblock=replaceblock+1; + end + + `ifdef MMU + ff_read_request_to_memory.enq(To_Memory {address:rg_paddress&'hfffffff8,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3}); + `else + ff_read_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress&'hfffffff8),burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3}); + `endif + Bit#(TLog#(`DCACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) writeenable='hFF; + writeenable=writeenable<<{3'b0,val1}*8; + if(dirty_values[replaceblock]==1)begin // if the replacing is dirty + perf_monitor[`DCACHE_WRITEBACKS]=1; + Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) offset_zeros='d0; + Bit#(`PADDR) write_address={tag[replaceblock].read_response[20-1:0],setindex[6:0],offset_zeros}; + `ifdef verbose $display($time,"\tDCACHE: Line being replaced is dirty. Addr: %h Data: %h",write_address,data[replaceblock].read_response); `endif + ff_write_request_to_memory.enq(To_Memory_Write {address:write_address,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3, + data_line:data[replaceblock].read_response }); + pending_fence_write_response[0]<=True; + end + memoperation.enq(tuple4(cpu_tag,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],replaceblock,writeenable)); + `ifdef verbose $display($time,"\tDCACHE: mask: %h byteoffset: %h",writeenable,val1); `endif + rg_perf_monitor<=perf_monitor; + end + end + else begin + if(rg_load_store==Load || rg_load_store==Atomic)begin + `ifdef MMU + ff_read_request_to_memory.enq(To_Memory {address:rg_paddress,burst_length:1,ld_st:Load,transfer_size:rg_transfer_size}); + `else + ff_read_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress),burst_length:1,ld_st:Load,transfer_size:rg_transfer_size}); + `endif + rg_state[0]<=IOReadResp; + end + else if(rg_load_store==Store)begin + `ifdef verbose $display($time,"\tDCACHE: Sending IO Write REQUEST"); `endif + `ifdef MMU + ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `else + ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `endif + rg_state[0]<=IOWriteResp; + end + end + `ifdef MMU + end + else if(rg_trnslte_done[0])begin + rg_state[0]<=Idle; + wr_response_to_cpu<= tagged Valid (tuple3(0,rg_tlb_exception,perf_monitor)); + rg_tlb_exception<=tagged None; + rg_perf_monitor<=0; + rg_trnslte_done[0]<=False; + $display($time,"\tDCACHE: Exception from TLB taken"); + end + else begin + $display($time,"\tDCACHE: Translation not done"); + rg_state[0] <= Idle; + end + `endif + endrule + rule wait_for_ioread_response(rg_state[0]==IOReadResp && memoperation.notFull); + `ifdef verbose $display($time,"\tDCACHE: Received IO Read Response"); `endif + Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0]; + Bit#(`Reg_width) data_value=ff_read_response_from_memory.first.data_line; + ff_read_response_from_memory.deq; + data_value=data_value>>({4'b0,byte_offset}*8); + if(!rg_signextend) + data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value; + else + data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value; + wr_response_to_cpu<=tagged Valid (tuple3(data_value,ff_read_response_from_memory.first.bus_error==1?tagged Exception Load_access_fault:tagged None,rg_perf_monitor)); + if(rg_load_store==Atomic)begin + let {success,storeResult,newdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress); + `ifdef MMU + ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(newdata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `else + ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(new_data),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store}); + `endif + rg_state[0]<=IOWriteResp; + end + else begin + rg_state[0]<=Idle; + end + rg_perf_monitor<=0; + endrule + rule wait_for_iowrite_response(rg_state[0]==IOWriteResp && !memoperation.notEmpty && !pending_fence_write_response[1]); + `ifdef verbose $display($time,"\tDCACHE: Received IO Write Response"); `endif + ff_write_response_from_memory.deq; + if(rg_load_store!=Atomic) + wr_response_to_cpu<=tagged Valid (tuple3(0,ff_write_response_from_memory.first.bus_error==1?tagged Exception Store_access_fault:tagged None,rg_perf_monitor)); + rg_perf_monitor<=0; + rg_state[0]<=Idle; + endrule + /*============== One cycle delay to ensure the write is reflected in the BRAM ========= */ + rule stall_the_next_request_by_one_cycle(rg_state[0]==Stall1); + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + rg_state[0]<=ReadingCache; + endrule + /*===================================================================================== */ + /*======= filling up the cache from the data recieved from the external memory ======= */ + (*conflict_free="virtual_address,fillcache"*) + rule fillcache(memoperation.notEmpty && line_bytes_written!='1); + let memresp=ff_read_response_from_memory.first; + ff_read_response_from_memory.deq; + let {cpu_tag,setindex,replaceblock,writeenable}=memoperation.first; + `ifdef verbose $display($time,"\tDCACHE: Response from Memory: %h setindex: %d cpu_tag: %h replaceblock: %d",memresp.data_line,setindex,cpu_tag,replaceblock); `endif + let we=writeenable; + if(|line_bytes_written!=0)begin + we=rg_we; + end + Bit#(TMul#(2,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8; + lbdata.write_portB(we,duplicate(memresp.data_line)); + `ifdef verbose $display($time,"\tDCACHE: linebytes: %h currently writing into: %h",line_bytes_written,we); `endif + if(memresp.last_word)begin // if all the data words have been fetched exit + `ifdef verbose $display($time,"\tDCACHE: Received Last response from Memory set: %d ",setindex); `endif + end + line_bytes_written<=line_bytes_written|we; + rg_we<=extended_mask[2*`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE]|extended_mask[`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:0]; + endrule + rule read_from_lbdata_into_hold_reg(line_bytes_written=='1); + let lb_hold_reg=lbdata.response_portB; + let {cputag,setindex,replaceblock,writeenable}=memoperation.first; + data[replaceblock].write_request('1,setindex,lb_hold_reg); + tag[replaceblock].write_request(setindex,{lb_dirty,1'b1,cputag}); + line_bytes_written<=0; + lb_dirty<=0; + memoperation.deq; + `ifdef verbose $display($time,"\tDCACHE: capturing lbdata cpu_tag: %h setindex: %d addr: %h linenum: %d data: %h",cputag, setindex,{cputag,setindex,6'd0}, replaceblock,lb_hold_reg); `endif + if(rg_state[1]==ReadingCache) + rg_state[1]<=Stall1; + endrule + /*===================================================================================== */ + /*===================================================================================== */ + rule keep_polling_on_stall(rg_state[0]==KeepPolling); + Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor; + if(capture_counters)begin + $display($time,"\tDCACHE: Miss during polling for ",fshow(rg_load_store)); + if(rg_load_store==Load)begin + perf_monitor[`DCACHE_LOAD_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_LOAD]=1; + end + else if(rg_load_store==Store)begin + perf_monitor[`DCACHE_STORE_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_STORE]=1; + end + else if(rg_load_store==Atomic) begin + perf_monitor[`DCACHE_ATOMIC_MISS]=1; + perf_monitor[`DCACHE_CACHEABLE_ATOMIC]=1; + end + rg_perf_monitor<=perf_monitor; + end + + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef MMU + Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20]; + `else + Bit#(20) cpu_tag=rg_vaddress[`PADDR-1:`PADDR-20]; + `endif + let {lbtag,lbset,lbreplaceblock,lbwriteenable}=memoperation.first; + if((line_bytes_written & rg_writeenable) == rg_writeenable && (lbset==setindex && lbtag==cpu_tag))begin + `ifdef verbose $display($time,"\tDCACHE: Accessing LB"); `endif + rg_state[0]<=ReadingCache; + increment_counters<=False; + for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + end + `ifdef verbose $display($time,"\tDCACHE: Polling on LB. cpu_tag: %h lbtag: %h required: %h bytes in Buffer: %h",cpu_tag,lbtag,rg_writeenable,line_bytes_written); `endif + endrule + + /*============= Prediction in burst mode ================================ */ + method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, Bit#(5) atomic_op, Bool signextend)if(rg_state[1]==Idle); + Bit#(`PERFMONITORS) perf_monitor=0; + Bit#(TLog#(`DCACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tDCACHE: ",fshow(load_store)," Request of VAddr: %h transfersize: %d signextend: %b setindex: %d",vaddress,transfer_size, signextend,setindex); `endif + Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) we=transfer_size==0?'b1:transfer_size==1?'b11:transfer_size==2?'hF:'hFF; + Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset= vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=vaddress[byte_bits-1:0]; + we=we<<{4'b0,word_offset}*8; + we=we<10) + $finish(0); + endrule + endmodule +endpackage diff --git a/src/core/decode.defines b/src/core/decode.defines new file mode 100644 index 0000000..3fd0971 --- /dev/null +++ b/src/core/decode.defines @@ -0,0 +1,204 @@ + `define FNADD 0 + `define FNSL 1 + `define FNLR 2 + `define FNSEQ 2 + `define FNSC 3 + `define FNSNE 3 + `define FNXOR 4 + `define FNSR 5 + `define FNOR 6 + `define FNAND 7 + `define FNSUB 10 + `define FNSRA 11 + `define FNSLT 12 + `define FNSGE 13 + `define FNSLTU 14 + `define FNSGEU 15 + + `define FNSWAP 1 + `define FMINU 10 + `define FMIN 11 + `define FMAXU 12 + `define FMAX 13 + + `define FNRAND 8 + + `define BEQ 'b?????????????????000?????1100011 + `define BNE 'b?????????????????001?????1100011 + `define BLT 'b?????????????????100?????1100011 + `define BGE 'b?????????????????101?????1100011 + `define BLTU 'b?????????????????110?????1100011 + `define BGEU 'b?????????????????111?????1100011 + `define JALR 'b?????????????????000?????1100111 + `define JAL 'b?????????????????????????1101111 + `define LUI 'b?????????????????????????0110111 + `define AUIPC 'b?????????????????????????0010111 + `define ADDI 'b?????????????????000?????0010011 + `define SLLI 'b000000???????????001?????0010011 + `define SLTI 'b?????????????????010?????0010011 + `define SLTIU 'b?????????????????011?????0010011 + `define XORI 'b?????????????????100?????0010011 + `define SRLI 'b000000???????????101?????0010011 + `define SRAI 'b010000???????????101?????0010011 + `define ORI 'b?????????????????110?????0010011 + `define ANDI 'b?????????????????111?????0010011 + `define ADD 'b0000000??????????000?????0110011 + `define SUB 'b0100000??????????000?????0110011 + `define SLL 'b0000000??????????001?????0110011 + `define SLT 'b0000000??????????010?????0110011 + `define SLTU 'b0000000??????????011?????0110011 + `define XOR 'b0000000??????????100?????0110011 + `define SRL 'b0000000??????????101?????0110011 + `define SRA 'b0100000??????????101?????0110011 + `define OR 'b0000000??????????110?????0110011 + `define AND 'b0000000??????????111?????0110011 + `define ADDIW 'b?????????????????000?????0011011 + `define SLLIW 'b0000000??????????001?????0011011 + `define SRLIW 'b0000000??????????101?????0011011 + `define SRAIW 'b0100000??????????101?????0011011 + `define ADDW 'b0000000??????????000?????0111011 + `define SUBW 'b0100000??????????000?????0111011 + `define SLLW 'b0000000??????????001?????0111011 + `define SRLW 'b0000000??????????101?????0111011 + `define SRAW 'b0100000??????????101?????0111011 + `define LB 'b?????????????????000?????0000011 + `define LH 'b?????????????????001?????0000011 + `define LW 'b?????????????????010?????0000011 + `define LD 'b?????????????????011?????0000011 + `define LBU 'b?????????????????100?????0000011 + `define LHU 'b?????????????????101?????0000011 + `define LWU 'b?????????????????110?????0000011 + `define SB 'b?????????????????000?????0100011 + `define SH 'b?????????????????001?????0100011 + `define SW 'b?????????????????010?????0100011 + `define SD 'b?????????????????011?????0100011 + `define FENCE 'b?????????????????000?????0001111 + `define FENCE_I 'b?????????????????001?????0001111 + `define MUL 'b0000001??????????000?????0110011 + `define MULH 'b0000001??????????001?????0110011 + `define MULHSU 'b0000001??????????010?????0110011 + `define MULHU 'b0000001??????????011?????0110011 + `define DIV 'b0000001??????????100?????0110011 + `define DIVU 'b0000001??????????101?????0110011 + `define REM 'b0000001??????????110?????0110011 + `define REMU 'b0000001??????????111?????0110011 + `define MULW 'b0000001??????????000?????0111011 + `define DIVW 'b0000001??????????100?????0111011 + `define DIVUW 'b0000001??????????101?????0111011 + `define REMW 'b0000001??????????110?????0111011 + `define REMUW 'b0000001??????????111?????0111011 + `define AMOADD_W 'b00000????????????010?????0101111 + `define AMOXOR_W 'b00100????????????010?????0101111 + `define AMOOR_W 'b01000????????????010?????0101111 + `define AMOAND_W 'b01100????????????010?????0101111 + `define AMOMIN_W 'b10000????????????010?????0101111 + `define AMOMAX_W 'b10100????????????010?????0101111 + `define AMOMINU_W 'b11000????????????010?????0101111 + `define AMOMAXU_W 'b11100????????????010?????0101111 + `define AMOSWAP_W 'b00001????????????010?????0101111 + `define LR_W 'b00010??00000?????010?????0101111 + `define SC_W 'b00011????????????010?????0101111 + `define AMOADD_D 'b00000????????????011?????0101111 + `define AMOXOR_D 'b00100????????????011?????0101111 + `define AMOOR_D 'b01000????????????011?????0101111 + `define AMOAND_D 'b01100????????????011?????0101111 + `define AMOMIN_D 'b10000????????????011?????0101111 + `define AMOMAX_D 'b10100????????????011?????0101111 + `define AMOMINU_D 'b11000????????????011?????0101111 + `define AMOMAXU_D 'b11100????????????011?????0101111 + `define AMOSWAP_D 'b00001????????????011?????0101111 + `define LR_D 'b00010??00000?????011?????0101111 + `define SC_D 'b00011????????????011?????0101111 + `define ECALL 'b00000000000000000000000001110011 + `define EBREAK 'b00000000000100000000000001110011 + `define URET 'b00000000001000000000000001110011 + `define SRET 'b00010000001000000000000001110011 + `define HRET 'b00100000001000000000000001110011 + `define MRET 'b00110000001000000000000001110011 + `define DRET 'b01111011001000000000000001110011 + `define SFENCE_VMA 'b0001001??????????000000001110011 + `define WFI 'b00010000010100000000000001110011 + `define CSRRW 'b?????????????????001?????1110011 + `define CSRRS 'b?????????????????010?????1110011 + `define CSRRC 'b?????????????????011?????1110011 + `define CSRRWI 'b?????????????????101?????1110011 + `define CSRRSI 'b?????????????????110?????1110011 + `define CSRRCI 'b?????????????????111?????1110011 + `define FADD_S 'b0000000??????????????????1010011 + `define FSUB_S 'b0000100??????????????????1010011 + `define FMUL_S 'b0001000??????????????????1010011 + `define FDIV_S 'b0001100??????????????????1010011 + `define FSGNJ_S 'b0010000??????????000?????1010011 + `define FSGNJN_S 'b0010000??????????001?????1010011 + `define FSGNJX_S 'b0010000??????????010?????1010011 + `define FMIN_S 'b0010100??????????000?????1010011 + `define FMAX_S 'b0010100??????????001?????1010011 + `define FSQRT_S 'b010110000000?????????????1010011 + `define FADD_D 'b0000001??????????????????1010011 + `define FSUB_D 'b0000101??????????????????1010011 + `define FMUL_D 'b0001001??????????????????1010011 + `define FDIV_D 'b0001101??????????????????1010011 + `define FSGNJ_D 'b0010001??????????000?????1010011 + `define FSGNJN_D 'b0010001??????????001?????1010011 + `define FSGNJX_D 'b0010001??????????010?????1010011 + `define FMIN_D 'b0010101??????????000?????1010011 + `define FMAX_D 'b0010101??????????001?????1010011 + `define FCVT_S_D 'b010000000001?????????????1010011 + `define FCVT_D_S 'b010000100000?????????????1010011 + `define FSQRT_D 'b010110100000?????????????1010011 + `define FLE_S 'b1010000??????????000?????1010011 + `define FLT_S 'b1010000??????????001?????1010011 + `define FEQ_S 'b1010000??????????010?????1010011 + `define FLE_D 'b1010001??????????000?????1010011 + `define FLT_D 'b1010001??????????001?????1010011 + `define FEQ_D 'b1010001??????????010?????1010011 + `define FCVT_W_S 'b110000000000?????????????1010011 + `define FCVT_WU_S 'b110000000001?????????????1010011 + `define FCVT_L_S 'b110000000010?????????????1010011 + `define FCVT_LU_S 'b110000000011?????????????1010011 + `define FMV_X_S 'b111000000000?????000?????1010011 + `define FCLASS_S 'b111000000000?????001?????1010011 + `define FCVT_W_D 'b110000100000?????????????1010011 + `define FCVT_WU_D 'b110000100001?????????????1010011 + `define FCVT_L_D 'b110000100010?????????????1010011 + `define FCVT_LU_D 'b110000100011?????????????1010011 + `define FMV_X_D 'b111000100000?????000?????1010011 + `define FCLASS_D 'b111000100000?????001?????1010011 + `define FCVT_S_W 'b110100000000?????????????1010011 + `define FCVT_S_WU 'b110100000001?????????????1010011 + `define FCVT_S_L 'b110100000010?????????????1010011 + `define FCVT_S_LU 'b110100000011?????????????1010011 + `define FMV_S_X 'b111100000000?????000?????1010011 + `define FCVT_D_W 'b110100100000?????????????1010011 + `define FCVT_D_WU 'b110100100001?????????????1010011 + `define FCVT_D_L 'b110100100010?????????????1010011 + `define FCVT_D_LU 'b110100100011?????????????1010011 + `define FMV_D_X 'b111100100000?????000?????1010011 + `define FLW 'b?????????????????010?????0000111 + `define FLD 'b?????????????????011?????0000111 + `define FSW 'b?????????????????010?????0100111 + `define FSD 'b?????????????????011?????0100111 + `define FMADD_S 'b?????00??????????????????1000011 + `define FMSUB_S 'b?????00??????????????????1000111 + `define FNMSUB_S 'b?????00??????????????????1001011 + `define FNMADD_S 'b?????00??????????????????1001111 + `define FMADD_D 'b?????01??????????????????1000011 + `define FMSUB_D 'b?????01??????????????????1000111 + `define FNMSUB_D 'b?????01??????????????????1001011 + `define FNMADD_D 'b?????01??????????????????1001111 + `define FRFLAGS 'b00000000000100000010?????1110011 + `define FSFLAGS 'b000000000001?????001?????1110011 + `define FSFLAGSI 'b000000000001?????101?????1110011 + `define FRRM 'b00000000001000000010?????1110011 + `define FSRM 'b000000000010?????001?????1110011 + `define FSRMI 'b000000000010?????101?????1110011 + `define FSCSR 'b000000000011?????001?????1110011 + `define FRCSR 'b00000000001100000010?????1110011 + `define RDCYCLE 'b11000000000000000010?????1110011 + `define RDTIME 'b11000000000100000010?????1110011 + `define RDINSTRET 'b11000000001000000010?????1110011 + `define RDCYCLEH 'b11001000000000000010?????1110011 + `define RDTIMEH 'b11001000000100000010?????1110011 + `define RDINSTRETH 'b11001000001000000010?????1110011 + `define SCALL 'b00000000000000000000000001110011 + `define SBREAK 'b00000000000100000000000001110011 diff --git a/src/core/decode_opfetch.bsv b/src/core/decode_opfetch.bsv new file mode 100644 index 0000000..39f50ec --- /dev/null +++ b/src/core/decode_opfetch.bsv @@ -0,0 +1,190 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package decode_opfetch; + /*============= package imports ========== */ + import FIFOF::*; + import TxRx:: *; + import DReg::*; + /* ======================================= */ + + /* ============== project imports ======= */ + import registerfile::*; + import decoder::*; + import defined_types::*; + `include "defined_parameters.bsv" + /* ======================================= */ + + interface Ifc_decode_opfetch; + method Action write_rd (Bit#(5)r, Bit#(`Reg_width) d, Operand_type rdtype); + /* ====================== pipe connections ========= */ + interface RXe#(IF_ID_type) rx_in; + interface TXe#(ID_IE_type) tx_out; + /*================================================== */ + `ifdef Debug + method Bit#(`Reg_width) read_debug_igpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + method Bit#(`Reg_width) read_debug_fgpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + `endif + method Action flush(); + method Action trap_from_csr(Tuple2#(Bit#(3),Trap_type) tt); + method Action misa(Bit#(`Reg_width) val); + method Action update_eEpoch; + method Action update_wEpoch; + method Action inferred_xlen(Bit#(2) mxl); + endinterface:Ifc_decode_opfetch + + function Bool isNone(Trap_type trap); + if(trap matches tagged None) + return True; + else + return False; + endfunction + + (*synthesize*) + module mkdecode_opfetch(Ifc_decode_opfetch); + Reg#(Bit#(`PERFMONITORS)) rg_decode_perfmon<-mkDReg(0); + Wire#(Tuple2#(Bit#(3),Trap_type)) wr_trap_type<-mkDWire(tuple2(0,tagged None)); + Wire#(Bit#(`Reg_width)) wr_misa<-mkWire(); + Reg#(Bit#(1)) eEpoch <-mkReg(0); + Reg#(Bit#(1)) wEpoch <-mkReg(0); + // this is used to ensure that when a trap is + // taken no other instruction fills the pipe unless wb generates a flush. Stores are avoided using this mechanism + Reg#(Bool) rg_flush_ahead <-mkReg(False); + + + Ifc_registerfile registerfile <-mkregisterfile(); + RX#(IF_ID_type) rx <-mkRX; + TX#(ID_IE_type) tx <-mkTX; + /*=================================== Decode and Operand Fetch ======================================================================*/ + // This rule decodes the instruction and provides necessary info for the execution units. + rule rl_operand_fetch(rx.u.notEmpty && tx.u.notFull && !rg_flush_ahead); + `ifdef verbose + $display($time,"\t********** DECODE STAGE FIRING ************ PC: %h EPOCHS: %b Instr-EPOCHS: %b",rx.u.first.program_counter,{eEpoch,wEpoch}, rx.u.first.epochs) ; + `endif + if({eEpoch,wEpoch}!=rx.u.first.epochs)begin + `ifdef verbose $display($time,"\tDECODE: PC: %h Dropping Instruction since EPOCSH do not match",rx.u.first.program_counter); `endif + rx.u.deq(); + end + else begin + let x = fn_decode(rx.u.first().instruction,rx.u.first.exception, wr_misa, rx.u.first.perfmonitors); + let pc=rx.u.first.program_counter; + let dest=x.rd; + let {debugcause,csr_ex}=wr_trap_type; + Bit#(`PERFMONITORS) perfmonitor_incr=x.perf; + Trap_type exception=x.exception; + Bool trap_on_wfi = False; + if(exception matches tagged None) + exception = csr_ex; + Bool dnwfi=True; + if(x.immediate_value[2:0]=='b101 && x.immediate_value[5]==0 && x.funct3==0 && x.inst_type==SYSTEM_INSTR) begin + dnwfi=False; + trap_on_wfi = True; + end + if(exception matches tagged Interrupt .i) + dnwfi=True; + + if(trap_on_wfi && dnwfi) + pc=pc+4; + + Bit#(`VADDR) nextpc=rx.u.first.nextpc; + + if(x.inst_type==NOP)begin + `ifdef verbose $display($time,"DECODE: NOP Instruction"); `endif + rx.u.deq(); + end + else begin + Bool choose_rs3=`ifdef spfpu ( `ifdef dpfpu x.inst_type==DFLOATING || `endif x.inst_type==FLOATING) && (rx.u.first.instruction[6:4]=='b100) `else False `endif ; + let operands<- registerfile._inputs_from_decode_stage(x.rs1,x.rs1type,x.rs2,x.rs2type,pc,x.immediate_value `ifdef spfpu ,choose_rs3, x.rs3 `endif ); + if(dnwfi)begin + Bool e = isNone(exception); + if(!e || x.inst_type==SYSTEM_INSTR) + rg_flush_ahead<=True; + rx.u.deq(); + tx.u.enq(ID_IE_type{ + rs1:operands.rs1, + rs2:(x.inst_type==MEMORY && (x.mem_access!=Load))?x.immediate_value:operands.rs2, + rs3_imm:`ifdef spfpu (choose_rs3)?operands.rs3: `endif (x.inst_type==MEMORY && x.mem_access!=Load)?operands.rs2:x.immediate_value, + rdtype:x.rdtype, + inst_type:x.inst_type, + destination:x.rd, + program_counter:pc, + exception:exception, + fn:x.fn, + mem_access:x.mem_access, + word32:x.word32, + funct3:x.funct3, + nextpc:rx.u.first.nextpc, + debugcause:debugcause, + perfmonitors:perfmonitor_incr, + prediction:rx.u.first.prediction, + epochs:rx.u.first.epochs, + rs1_type:x.rs1type, + rs2_type:(x.inst_type==MEMORY && (x.mem_access!=Load))?Immediate:x.rs2type, + rs3_type:(x.inst_type==MEMORY && (x.mem_access!=Load))?x.rs2type:`ifdef spfpu choose_rs3?FloatingRF: `endif Immediate, + rs1addr:x.rs1, + rs2addr:x.rs2, + rs3addr:(x.inst_type==MEMORY && (x.mem_access!=Load))?x.rs2:choose_rs3?x.rs3:0 + `ifdef simulate ,instruction:rx.u.first.instruction `endif }); + end + else begin + `ifdef verbose $display($time,"\tWaiting for interrupt"); `endif + end + `ifdef verbose + $display($time,"\tDECODE: Instruction : %h",rx.u.first().instruction," ",fshow(x.inst_type)," FN: %b",x.fn," ",fshow(x.mem_access)); + $display($time,"\tRs1: %d",x.rs1," ",fshow(x.rs1type)); + $display($time,"\tRs2: %d",x.rs2," ",fshow(x.rs2type)); + `ifdef spfpu $display($time,"\tRs3: %d",x.rs3); `endif + $display($time,"\tRd: %d",x.rd," ",fshow(x.rdtype)); + $display($time,"\tImmediate Value: %h",x.immediate_value); + $display($time,"\tException: ",fshow(exception)); + $display($time,"\t*****************************************************"); + `endif + end + end + endrule + /* ============================== method and interface definitions ========================= */ + method tx_out=tx.e; + method rx_in=rx.e; + method Action write_rd (Bit#(5)r, Bit#(`Reg_width) d, Operand_type rdtype)=registerfile.write_rd(r,d,rdtype); + method Action flush(); + `ifdef verbose $display($time,"\tDECODE: Flushing"); `endif + rg_flush_ahead<=False; + endmethod + method Action trap_from_csr(Tuple2#(Bit#(3),Trap_type) tt); + wr_trap_type<=tt; + endmethod + `ifdef Debug + method read_debug_igpr (Bit#(5) r) = registerfile.read_debug_igpr(r); // Read a General-Purpose Register + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d)=registerfile.write_debug_igpr(r,d); // Write a General-Purpose Register + method read_debug_fgpr (Bit#(5) r)=registerfile.read_debug_fgpr(r); // Read a General-Purpose Register + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d)=registerfile.write_debug_fgpr(r,d); // Write a General-Purpose Register + `endif + method Action misa(Bit#(`Reg_width) val); + wr_misa<=val; + endmethod + method Action update_eEpoch; + `ifdef verbose $display($time,"\tDECODE: updating eEpoch"); `endif + eEpoch<=~eEpoch; + endmethod + method Action update_wEpoch; + `ifdef verbose $display($time,"\tDECODE: updating wEpoch"); `endif + wEpoch<=~wEpoch; + endmethod + method Action inferred_xlen(Bit#(2) mxl) ; + registerfile.inferred_xlen(mxl); + endmethod +// method init_complete=registerfile.init_complete; + endmodule +endpackage:decode_opfetch diff --git a/src/core/decoder.bsv b/src/core/decoder.bsv new file mode 100644 index 0000000..902b409 --- /dev/null +++ b/src/core/decoder.bsv @@ -0,0 +1,215 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package decoder; + `include "defined_parameters.bsv" + import defined_types::*; + + typedef struct { + Bit#(4) fn; + Bit#(5) rs1; + Bit#(5) rs2; + Bit#(5) rs3; + Bit#(5) rd; + Operand_type rs1type; + Operand_type rs2type; + Operand_type rdtype; + Instruction_type inst_type; + Bit#(`Reg_width) immediate_value; + Bool word32; + Access_type mem_access; + Trap_type exception; + Bit#(3) funct3; + Bit#(`PERFMONITORS) perf; + } Decoded_data deriving(Bits,Eq,FShow); + +(*noinline*) +function Decoded_data fn_decode(Bit#(32) instruction, Trap_type exception, Bit#(`Reg_width) misa, Bit#(`PERFMONITORS) perfmonitors); + Bit#(5) rs1=instruction[19:15]; + Bit#(5) rs2=instruction[24:20]; + Bit#(5) rs3=instruction[31:27]; + Bit#(5) rd=instruction[11:7]; + Bit#(5) opcode = instruction[6:2]; + Bit#(7) funct7 = instruction[31:25]; + Bit#(3) funct3 = instruction[14:12]; + Bool word32 =False; + + Access_type mem_access=Load; + `ifdef atomic + if(opcode[3]=='b1 && opcode[1]=='b1) + mem_access=Atomic; + else + `endif + if(opcode[3]=='b1 && opcode[1]==0) + mem_access=Store; + + Operand_type rs1type=IntegerRF; + Operand_type rs2type=IntegerRF; + Operand_type rdtype=IntegerRF; + + Bit#(`Reg_width) immediate_value=signExtend(instruction[31:20]); + if(opcode==`LUI_op|| opcode==`AUIPC_op) + immediate_value=signExtend({instruction[31:12],12'd0}); + else if(opcode==`JAL_op) + immediate_value=signExtend({instruction[31],instruction[19:12],instruction[20],instruction[30:21],1'b0}); + else if(opcode==`JALR_op) + immediate_value=signExtend({instruction[31:21],1'b0}); + else if (opcode==`BRANCH_op) // Branch instructions + immediate_value=signExtend({instruction[31],instruction[7],instruction[30:25],instruction[11:8],1'b0}); + else if (opcode==`STORE_op `ifdef spfpu || opcode==`FSTORE_op `endif ) // Store operations + immediate_value=signExtend({instruction[31:25],instruction[11:7]}); + else if(opcode==`CSR_op) + immediate_value[16:12]=instruction[19:15]; + else if(opcode==`ATOMIC_op) + immediate_value=0; + + if(opcode==`LUI_op || opcode==`JAL_op || opcode==`AUIPC_op || (opcode==`CSR_op && funct3[2]==1)) + rs1=0; + if(opcode==`CSR_op || opcode[4:2]=='b000 // CSR or ( (F)Load or FENCE ) + || opcode==`LUI_op || opcode==`JAL_op || opcode[4:2]=='b001 // LUI or JAL or (AUIPC or IMMediate Arith) + || opcode==`JALR_op || (opcode[4:2]=='b101 && funct7[5]==1)) // JALR or Floating conversion operations. + rs2=0; + if(opcode==`BRANCH_op || opcode[4:1]=='b0100) + rd=0; + + if(opcode==`JAL_op || opcode==`AUIPC_op) + rs1type=PC; +`ifdef spfpu + else if(opcode[4:2]=='b100 || (opcode[4:2]=='b101 && // (F(N)MADD or F(N)SUB) + (funct7[6:3]!='b1101 && funct7[6:3]!='b1111))) // some of the conversion operations + rs1type=FloatingRF; +`endif + + if(opcode==`JAL_op || opcode==`JALR_op || opcode==`LUI_op|| opcode[4:2]=='b001 // JAL or JALR or (AUIPC or IMM Arith) + || opcode[4:1]==0) // (F)Load or + rs2type=Immediate; +`ifdef spfpu + else if((opcode[4:2]=='b101 && funct7[5]!='b1) || opcode==`FSTORE_op || opcode[4:2]=='b100) // All convert + FSQRToperations do not need rs2 + rs2type=FloatingRF; + + if(opcode==`FLOAD_op || (opcode[4:2]=='b101 && + funct7[6:3]!='b1010 && funct7[6:3]!='b1100 && funct7[6:3]!='b1110 ) || opcode[4:2]=='b100) + rdtype=FloatingRF; +`endif + + if(opcode==`IMM_ARITHW_op || opcode==`MULDIVW_op || opcode==`ARITHW_op || (opcode[4:3]=='b10 && funct7[0]==0) + || (opcode[4:1]=='b0101 && funct3[0]==0)) + word32=True; + + Instruction_type inst_type=NOP; + `ifdef spfpu + if(opcode[4:3]=='b10)begin + inst_type=funct7[0]==0?FLOATING:DFLOATING; + end + else `endif + if(opcode[4:3]=='b11)begin + case (opcode[2:0]) + 'b011:inst_type=JAL; + 'b001:inst_type=JALR; + 'b000:inst_type=BRANCH; + 'b100:inst_type=SYSTEM_INSTR; + endcase + end + else if(opcode[4:3]=='b01)begin + case (opcode[2:0]) + 'b000,'b011,'b001:inst_type=MEMORY; // STORE or FSTORE or ATOMIC + 'b101:inst_type=ALU; // LUI + 'b100,'b110:inst_type=(funct7[0]==1)?(funct3[2]==0)?MUL:DIV:ALU; + endcase + end + else if(opcode[4:3]=='b00)begin + case(opcode[2:0]) + 'b000,'b001:inst_type=MEMORY; // + 'b101,'b100,'b110:inst_type=ALU; //AUIPC IMM WORD + 'b011:inst_type=(funct3[0]==0)?FENCE:FENCEI; + endcase + end + + Trap_type ex=tagged None; + if(exception matches tagged None)begin + if( `ifdef spfpu (inst_type==FLOATING && misa[5]==0) `ifdef dpfpu || (inst_type==DFLOATING && misa[3]==0) `endif || `endif + (inst_type==MUL && misa[12]==0) || (inst_type==DIV && misa[12]==0) + `ifdef atomic || (inst_type==MEMORY && mem_access==Atomic && misa[0]==0) `endif ) + ex=tagged Exception Illegal_inst; + `ifdef simulate + if(inst_type==JAL && immediate_value==0) + ex=tagged Exception Endsimulation; + `endif + if(instruction[1:0]!='b11) + ex=tagged Exception Illegal_inst; + if(inst_type==NOP) + ex=tagged Exception Illegal_inst; + end + else + ex=exception; + + Bit#(4) fn=0; + if(opcode==`ATOMIC_op)begin + if((instruction[27] | instruction[28]) == 1) + fn={instruction[29:27],1'b1}; + else + fn={instruction[31:29],instruction[27]}; + end + else if(opcode==`BRANCH_op)begin + if(funct3[2]==0) + fn={2'b0,1,funct3[0]}; + else + fn={1'b1,funct3}; + end + else if(opcode==`JAL_op || opcode==`JALR_op || opcode==`LOAD_op `ifdef spfpu || opcode==`FLOAD_op `endif + || opcode==`STORE_op `ifdef spfpu || opcode==`FSTORE_op `endif || opcode==`AUIPC_op || opcode==`LUI_op) + fn=0; + else if(opcode==`IMM_ARITHW_op || opcode==`IMM_ARITH_op)begin + fn=case(funct3) + 'b010: 'b1100; + 'b011: 'b1110; + 'b101: if(funct7[5]==1) 'b1011; else 'b0101; + default:{1'b0,funct3}; + endcase; + end + else if(opcode==`ARITHW_op || opcode==`ARITH_op)begin + fn=case(funct3) + 'b000:if(funct7[5]==1) 'b1010; else 'b0000; + 'b010:'b1100; + 'b011:'b1110; + 'b101:if (funct7[5]==1) 'b1011;else 'b0101; + default:{1'b0,funct3}; + endcase; + end + else if(opcode[4:3]=='b10) + fn=opcode[3:0]; + if(inst_type==BRANCH) + perfmonitors[`COND_BRANCH]=1; + `ifdef spfpu + if(inst_type==FLOATING) + perfmonitors[`SPFPU_INST]=1; + `endif + `ifdef dpfpu + if(inst_type==DFLOATING) + perfmonitors[`DPFPU_INST]=1; + `endif + if(inst_type==JAL || inst_type==JALR) + perfmonitors[`UNCOND_JUMPS]=1; + if(inst_type==MEMORY) + perfmonitors[`MEMORY_INSTRUCTIONS]=1; + if(inst_type==MUL || inst_type==DIV) + perfmonitors[`MULDIV_INSTRUCTIONS]=1; + + return (Decoded_data{fn:fn,rs1:rs1,rs2:rs2,rs3:rs3,rd:rd, + rs1type:rs1type,rs2type:rs2type,rdtype:rdtype, + inst_type:inst_type,immediate_value:immediate_value, + word32:word32,mem_access:mem_access,exception:ex, + funct3:funct3,perf:perfmonitors}); + +endfunction +endpackage diff --git a/src/core/defined_parameters.bsv b/src/core/defined_parameters.bsv new file mode 100644 index 0000000..c44bae3 --- /dev/null +++ b/src/core/defined_parameters.bsv @@ -0,0 +1,485 @@ +`define RegFileSize 32 // describes the size of ht register file in the processor. +`ifdef spfpu + `define FLEN 32 +`endif +`ifdef dpfpu + `define FLEN 64 +`endif +//`define fpu_hierarchical //Define this if you want hierarchical modules in verilog + +//`define MMU +`define PRFDEPTH 6 +`define USERSPACE 0 +`ifdef RV64 + `define Burst_length_bits 8 + `define byte_offset 2 + `define Reg_width 64 // the register data width of the processor. + `define ADDR 64 // the address width + `define DCACHE_ADDR 64 + `define DCACHE_BLOCK_SIZE 4 + `define DCACHE_WORD_SIZE 8 +/////////////////////////////MMU parameters/////////////////////////////////// +`define VADDR 39 +`define PADDR 32 +`define OFFSET 12 +`define ASID 8 +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + // TLM2 Request Response definitions for Processor to Bus connection + `define TLM_PRM_CPU_REQ 4, 64, 64, 5, Bit #(0) + `define TLM_PRM_CPU_RSP 4, 64, 64, 5, Bit #(0) + + // TLM2 Request Response definitions for Memory to Bus connection + `define TLM_PRM_MEM_REQ 4, 64, 64, 5, Bit #(0) + `define TLM_PRM_MEM_RSP 4, 64, 64, 5, Bit #(0) + + // Axi Request Response definitions for Processor as a Master + `define AXI_PRM_CPU 4, 64, 64, 5, Bit #(0) // Fabric Interface + `define AXI_XTR_CPU TLMRequest #(`TLM_PRM_CPU_REQ), TLMResponse #(`TLM_PRM_CPU_RSP), `AXI_PRM_CPU // Transactor Interface + + // Axi Request Response definitions for Memory as a Slave + `define AXI_PRM_MEM 4, 64, 64, 5, Bit #(0) // Fabric Interface + `define AXI_XTR_MEM TLMRequest #(`TLM_PRM_MEM_REQ), TLMResponse #(`TLM_PRM_MEM_RSP), `AXI_PRM_MEM // Transactor Interface +/////////////////////////////////////////////////////////////////////////////// +`else + `define byte_offset 1 + `define Reg_width 32 // the register data width of the processor. + `define Addr_width 32 // the address width + `define DCACHE_ADDR 32 + `define DCACHE_BLOCK_SIZE 8 + `define DCACHE_WORD_SIZE 4 +/////////////////////////////////////////////////////////////////////////////// + // TLM2 Request Response definitions for Processor to Bus connection + `define TLM_PRM_CPU_REQ 4, 32, 32, 5, Bit #(0) + `define TLM_PRM_CPU_RSP 4, 32, 32, 5, Bit #(0) + + // TLM2 Request Response definitions for Memory to Bus connection + `define TLM_PRM_MEM_REQ 4, 32, 32, 5, Bit #(0) + `define TLM_PRM_MEM_RSP 4, 32, 32, 5, Bit #(0) + + // Axi Request Response definitions for Processor as a Master + `define AXI_PRM_CPU 4, 32, 32, 5, Bit #(0) // Fabric Interface + `define AXI_XTR_CPU TLMRequest #(`TLM_PRM_CPU_REQ), TLMResponse #(`TLM_PRM_CPU_RSP), `AXI_PRM_CPU // Transactor Interface + + // Axi Request Response definitions for Memory as a Slave + `define AXI_PRM_MEM 4, 32, 32, 5, Bit #(0) // Fabric Interface + `define AXI_XTR_MEM TLMRequest #(`TLM_PRM_MEM_REQ), TLMResponse #(`TLM_PRM_MEM_RSP), `AXI_PRM_MEM // Transactor Interface +/////////////////////////////////////////////////////////////////////////////// +`endif + +`define Loop 1 +`define BAUD_RATE 130 +`ifdef verilog + `define Addr_space 22 //since we are leaving off the lower 2 bits of address(byte addressable memory), we have to +`else + `define Addr_space 30 +`endif +`ifdef simulate + `define BAUD_RATE 5 //130 // +`endif +`define INTERRUPT_PINS 64 + +// Branch_predictor_paramters +/////////////////////////// CACHE RELATED PARAMETERS //////////////////////////////// +`define DCACHE_WAYS 4 +`define DCACHE_SETS 512 + +`define ICACHE_WAYS 4 // way_bits =2 +`define ICACHE_BLOCK_SIZE 8 // word_bits = 3 +`define ICACHE_SETS 512 // set_bits =7 +`define ICACHE_WORD_SIZE 4 // byte_bits=2 +`define ICACHE_TAG_BITS 20 // tag_bits = 52 +`define DCACHE_TAG_BITS 20 // tag_bits = 52 +`define BTB_DEPTH 256 +`define RAS_DEPTH 8 +///////////////////////////////////////////////////////////////////////////////////// +`ifdef RV64 + `define MISA_BITS 'h141129 //'h082C849//// 'h40101121 // A + F + I + M + U + `define MXL_BITS 'h2 +`else + `define MISA_BITS 'h082C849 // 'h40101121 // A + F + I + M + U + `define MXL_BITS 'h1 +`endif +`define MTVEC_DEFAULT 'h00000000 +`define STVEC_DEFAULT 'h00000000 +`define UTVEC_DEFAULT 'h00000000 +/////////////////////////// Register Mapping for Machine Mode Regs ///////////////// +`define MSTATUS 'h00 //'h300 // Machine Status register +`define MISA 'h01 //'h301 // ISA and extensions +`define MEDELEG 'h02 //'h302 // Machine exception delegation +`define MIDELEG 'h03 //'h303 // Machine interrupt delegation +`define MIE 'h04 //'h304 // Machine interrupt enable +`define MTVEC 'h05 //'h305 // Machine trap-handler base address +`define MCOUNTEREN 'h06 //'h306 // Machine counter setup register +`define MHPMEVENTSTART 'h23 //'h323 // statr of event selectors +`define MHPMEVENTEND 'h26 //'h326 // end of event selectors +`define MSCRATCH 'h40 //'h340 // Scratch rgister for machine trap hanglers +`define MEPC 'h41 //'h341 // Machine exception program counter +`define MCAUSE 'h42 //'h342 // Machine trap cause +`define MTVAL 'h43 //'h343 // Machine bad address +`define MIP 'h44 //'h344 // Machine interrupt pending +`define MPOWERCONTROL 'h45 //'h345 // 2 bits to control the power switches. +`define PMPCFG0 'hA0 //'h3A0 // +`ifndef RV64 + `define PMPCFG1 'hA1 //'h3A1 // Physical Memory Protection Configuration Registers +`endif +`define PMPCFG2 'hA2 //'h3A2 // Physical Memory Protection Configuration Registers +`ifndef RV64 + `define PMPCFG3 'hA3 //'h3A3 // Physical Memory Protection Configuration Registers +`endif +`define PMPADDRSTART 'hB0 //PMP Address array start +`define PMPADDREND 'hB7 //PMP Address array end +`define MCYCLE 'h00 //'hB00 // Machine cycle counter +`define MTIME 'h01 //'hB01 // mtime register (Non-standard r/w) +`define MINSTRET 'h02 //'hB02 // Machine instructions retired. +`define MHPMCOUNTSTART 'h03 //'hB03 // start address for performance counters +`define MHPMCOUNTEND 'h05 //'hB05 // end address for performance counters +`define MTIMECMP 'h20 //'hB20 // time compare register (Non-standard r/w) +`define MCYCLEH 'h80 //'hB80 // Upper 32 bits of mcycle +`define MTIMEH 'h81 //'hB81 // mtime hi-register (Non-standard r/w) +`define MINSTRETH 'h82 //'hB82 // Upper 32 bits of minstret. +`define MHPMCOUNTHSTART 'h83 //'hB83 // start address for performance counters higher bits +`define MHPMCOUNTHEND 'h85 //'hB86 // end address for performance counters higher bits +`define MTIMECMPH 'hA0 //'hBA0 // time compare hi-register (Non-standard r/w) +`define MHPMTHRESSTART 'hA3 //'hBA3 // start of counter-thresholds +`define MHPMTHRESEND 'hA5 //'hBA6 // end of counter-thresholds +`define MVENDORID 'h11 //'hF11 // Vendor ID +`define MARCHID 'h12 //'hF12 // Architecture ID +`define MIMPID 'h13 //'hF13 // Implementation ID +`define MHARTID 'h14 //'hF14 // Hardware Thread ID +`define MBOOTSEQ 'h15 //'hF15 // Hardware Thread ID + +`define MHPMTHRESHSTART 'hE0 //'h7E0 // start of counter-thresholds +`define MHPMTHRESHEND 'hE2 //'h7FC // end of counter-thresholds +/////////////////////////// Register Mapping for Supervisor Mode Regs ///////////////// +`define SSTATUS 'h00 //'h100 // Supervisor Status register +`define SEDELEG 'h02 //'h102 // Supervisor exception delegation +`define SIDELEG 'h03 //'h103 // Supervisor interrupt delegation +`define SIE 'h04 //'h104 // Supervisor interrupt enable +`define STVEC 'h05 //'h105 // Supervisor trap-handler base address +`define SCOUNTEREN 'h06 //'h106 // Supervisor counter setup register +`define SSCRATCH 'h40 //'h140 // Scratch register for supervisor trap hanglers +`define SEPC 'h41 //'h141 // Supervisor exception program counter +`define SCAUSE 'h42 //'h142 // Supervisor trap cause +`define STVAL 'h43 //'h143 // Supervisor bad address or illegal instruction +`define SIP 'h44 //'h144 // Supervisor interrupt pending +`define SATP 'h80 //'h180 // Supervisor interrupt pending + +/////////////////////////// Register Mapping for User Mode Regs ///////////////// +`define USTATUS 'h00 //'h000 // User status register +`define FFLAGS 'h01 //'h001 // FP Accrued exceptions +`define FRM 'h02 //'h002 // FP Dynamic rounding mode +`define FCSR 'h03 //'h003 // FP Control and status register +`define UIE 'h04 //'h004 // User interrupt enable register +`define UTVEC 'h05 //'h005 // User trap handler base address +`define USCRATCH 'h40 //'h040 // Scratch register for user trap handlers +`define UEPC 'h41 //'h041 // User exception program counter +`define UCAUSE 'h42 //'h042 // User trap cause +`define UTVAL 'h43 //'h043 // User bad address or illegal instruction +`define UIP 'h44 //'h044 // User interrupt pending +`define UMEMSE 'h45 //'h045 // Machine Memory Structures enable +`define UCYCLE 'h00 //'hC00 // cycle counter for RDCYCLE instruction. +`define UTIME 'h01 //'hC01 // Tiemr for RDTIME instruction +`define UINSTRET 'h02 //'hC02 // Instruction retired counter for RDINSTRET +`define UCYCLEH 'h80 //'hC80 // Upper 32bits of UCYCLE +`define UTIMEH 'h81 //'hC81 // Upper 32bits of UTIME +`define UINSTRETH 'h82 //'hC82 // Upper 32bits of UINSTRET +`define HPMCOUNTSTART 'h03 //'hC03 // start address for performance counters +`define HPMCOUNTEND 'h05 //'hC06 // start address for performance counters +`define HPMCOUNTHSTART 'h83 //'hC83 // start address for performance counters higher bits +`define HPMCOUNTHEND 'h85 //'hC86 // end address for performance counters higher bits + +//////////////////////////////////////////////////////////////////////////////////// +/////////// Debug registers ////////////////////////// +`define DCSR 'hb0 //'h7b0 +`define DPC 'hb1 //'h7b1 +`define DSCRATCH0 'hb2 //'h7b2 +`define DSCRATCH1 'hb3 //'h7b3 +`define DENTRY 'hb4 //'h7b4 // holds the address of the debug entry for self loop +`define TSELECT 'ha0 // 'h7a0 // holds the tselect information +`define TDATA1 'ha1 // 'h7a1 // holds the first trigger data +`define TDATA2 'ha2 // 'h7a2 // holds the first trigger data +//////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////// funct3 defintions for ISA //////////////////// +`define JALR_f3 'b000 +`define BEQ_f3 'b000 +`define BNE_f3 'b001 +`define BLT_f3 'b100 +`define BGE_f3 'b101 +`define BLTU_f3 'b110 +`define BGEU_f3 'b111 +`define LB_f3 'b000 +`define Lh_f3 'b001 +`define LW_f3 'b010 +`define LBU_f3 'b100 +`define LHU_f3 'b101 +`define LWU_f3 'b110 +`define SB_f3 'b000 +`define SH_f3 'b001 +`define SW_f3 'b010 +`define ADD_SUB_f3 'b000 +`define SLT_SLTI_f3 'b010 +`define SLTU_SLTIU_f3 'b011 +`define XOR_XORI_f3 'b100 +`define OR_ORI_f3 'b110 +`define AND_ANDI_f3 'b111 +`define SLL_SLLI_f3 'b001 +`define SR_SRI_f3 'b101 +`define ECALL_f3 'b000 +`define EBREAK_f3 'b000 +`define CSRRW_f3 'b001 +`define CSRRS_f3 'b010 +`define CSRRC_f3 'b011 +`define CSRRWI_f3 'b101 +`define CSRRSI_f3 'b110 +`define CSRRCI_f3 'b111 +`define MUL_f3 'b000 +`define MULH_f3 'b001 +`define MULHSU_f3 'b010 +`define MULHU_f3 'b011 +`define DIV_f3 'b100 +`define DIVU_f3 'b101 +`define REM_f3 'b110 +`define REMU_f3 'b111 +`define ATOMIC_f3 'b010 +`define FENCE_f3 'b000 +`define FENCEI_f3 'b001 +///////////////////////////////////////////////////////////////////////// +////////////////////// opcode definitions of ISA //////////////////////// +`define LUI_op 'b01101 +`define AUIPC_op 'b00101 +`define JAL_op 'b11011 +`define JALR_op 'b11001 +`define BRANCH_op 'b11000 +`define LOAD_op 'b00000 +`define FLOAD_op 'b00001 +`define STORE_op 'b01000 +`define FSTORE_op 'b01001 +`define IMM_ARITH_op 'b00100 +`define ARITH_op 'b01100 +`ifdef RV64 + `define IMM_ARITHW_op 'b00110 + `define ARITHW_op 'b01110 + `define MULDIVW_op 'b01110 +`endif +`define CSR_op 'b11100 +`define MULDIV_op 'b01100 +`define ATOMIC_op 'b01011 +`define FMADD_op 'b10000 +`define FMSUB_op 'b10001 +`define FNMSUB_op 'b10010 +`define FNMADD_op 'b10011 +`define FLOAT_op 'b10100 +`define FENCE_op 'b00011 +////////////////////////////////////////////////////////////////////////// +/////////////// funct7 deifnition of ISA ///////////////////////////////// +`define SLLI_f7 'b0000000 +`define LOGIC_SHIFT_f7 'b0000000 +`define ARITH_SHIFT_f7 'b0100000 +`define ARITH_f7 'b0000000 +`define SUB_f7 'b0100000 +`define MULDIV_f7 'b0000001 +`define SFENCE_VMA 'b0001001 +`define LR_f5 'b00010 +`define SC_f5 'b00011 +`define AMOSWAP_f5 'b00001 +`define AMOADD_f5 'b00000 +`define AMOXOR_f5 'b00100 +`define AMOAND_f5 'b01100 +`define AMOOR_f5 'b01000 +`define AMOMIN_f5 'b10000 +`define AMOMAX_f5 'b10100 +`define AMOMINU_f5 'b11000 +`define AMOMAXU_f5 'b11100 +`define AMOMIN_f5 'b10000 +`define AMOMAX_f5 'b10100 + + +`define FADD_f5 'b00000 +`define FSUB_f5 'b00001 +`define FMUL_f5 'b00010 +`define FDIV_f5 'b00011 +`define FSQRT_f5 'b01011 +`define FP_OPCODE 'b0100 +`define FCMP_f5 'b10100 +`define FMMAX_f5 'b00101 +`define FCVT_F_I_f5 'b11010 +`define FCVT_I_F_f5 'b11000 +`define FSGNJN_f5 'b00100 +`define FCLASS_f5 'b11100 +`define FCVT_S_D_f5 'b01000 +`define FMV_X_S_f7 'b1110000 +`define FMV_S_X_f7 'b1111000 +`define FMV_X_D_f7 'b1110001 +`define FMV_D_X_f7 'b1111001 + + + + +/////////////////////////////////////////////////////////////////////////// +///////////////// Event Values of Performance Counters //////////////////// +`define PERFMONITORS 64 +`define CYCLECOUNTERS 13 +`define ICACHE_MISS 0 +`define ICACHE_CACHEABLE `ICACHE_MISS+1 +`define ICACHE_LINEREPLACE `ICACHE_CACHEABLE+1 +`define ICACHE_TLBMISS `ICACHE_LINEREPLACE+1 +`define ICACHE_MISALIGNED `ICACHE_TLBMISS+1 +`define ICACHE_PREFETCHMISS `ICACHE_MISALIGNED+1 +`define COND_BRANCH `ICACHE_PREFETCHMISS+1 +`define COND_BRANCH_TAKEN `COND_BRANCH+1 +`define COND_BRANCH_MISPREDICTED `COND_BRANCH_TAKEN+1 +`define TAKEN_BRANCH_MISPREDICTED `COND_BRANCH_MISPREDICTED+1 +`define UNCOND_JUMPS `TAKEN_BRANCH_MISPREDICTED+1 +`define SPFPU_INST `UNCOND_JUMPS+1 +`define DPFPU_INST `SPFPU_INST+1 +`define DCACHE_TLBMISS `DPFPU_INST+1 +`define TOTAL_LOADS `DCACHE_TLBMISS+1 +`define TOTAL_STORES `TOTAL_LOADS+1 +`define TOTAL_ATOMIC `TOTAL_STORES+1 +`define DCACHE_LOAD_MISS `TOTAL_ATOMIC+1 +`define DCACHE_STORE_MISS `DCACHE_LOAD_MISS+1 +`define DCACHE_ATOMIC_MISS `DCACHE_STORE_MISS+1 +`define DCACHE_CACHEABLE_LOAD `DCACHE_ATOMIC_MISS+1 +`define DCACHE_CACHEABLE_STORE `DCACHE_CACHEABLE_LOAD+1 +`define DCACHE_CACHEABLE_ATOMIC `DCACHE_CACHEABLE_STORE+1 +`define DCACHE_WRITEBACKS `DCACHE_CACHEABLE_ATOMIC+1 +`define DCACHE_LINEREPLACE `DCACHE_WRITEBACKS+1 +`define DCACHE_MISALIGNED `DCACHE_LINEREPLACE+1 +`define EXCEPTIONS_TAKEN `DCACHE_MISALIGNED+1 +`define INTERRUPTS_TAKEN `EXCEPTIONS_TAKEN+1 +`define MULDIV_INSTRUCTIONS `INTERRUPTS_TAKEN+1 +`define MEMORY_INSTRUCTIONS `MULDIV_INSTRUCTIONS+1 +`define EXEC_FLUSHES `MEMORY_INSTRUCTIONS+1 +`define WB_FLUSHES `EXEC_FLUSHES+1 + +`define USERMODE_CYCLES 30 +`define SUPERVISORMODE_CYCLES 31 +`define MACHINEMODE_CYLES 32 +`define MISPREDICTION_STALLS 33 +`define INTERRUPT_STALLS 34 +`define DFENCE_CYCLES 35 +`define IFENCE_CYCLES 36 +`define DCACHE_MISS_CYCLES 37 +`define ICACHE_MISS_CYCLES 38 +`define FPBUSY_CYCLES 39 +`define DIVISIONBUSY_CYCLES 40 +`define TOTAL_STALL_CYCLES 41 +`define PAGEWALK_CYCLES 42 +`define COREBUS_CYCLES 43 + +/////////////////////////////////////////////////////////////////////////////////// +/*====== Define the slave number of each peripheral=== */ +//`ifdef simulate +// `define Sdram_slave_num 0 +// `define Sdram_cfg_slave_num `Sdram_slave_num +// `define BootRom_slave_num `Sdram_cfg_slave_num+1 +// `define Uart0_slave_num `BootRom_slave_num+1 +// `define Uart1_slave_num `Uart0_slave_num+1 +// `define Debug_slave_num `Uart1_slave_num+1 +// `define Plic_slave_num `Debug_slave_num+1 +// `define Qspi0_slave_num `Plic_slave_num +// `define Qspi1_slave_num `Qspi0_slave_num +// `define Num_Slaves `Qspi1_slave_num +//`else +// `define Uart0_slave_num 0 +// `define Uart1_slave_num 1 +// `define Qspi0_slave_num 2 +// `define Qspi1_slave_num 3 +// `define I2c0_slave_num 4 +// `define I2c1_slave_num 5 +// `define Sdram_slave_num 6 +// `define Sdram_cfg_slave_num 7 +// `define Dma_slave_num 8 +// `define Hyperflash_mem_slave_num 9 +// `define Hyperflash_reg_slave_num 10 +// `define Debug_slave_num 11 +// `define AxiExp1_slave_num 12 +// `define GPIO_slave_num 13 +// `define BootRom_slave_num 14 +// `define PLIC_slave_num 15 +// `define TCM_slave_num 16 +//`endif +/*=================================================== */ + +/*===== Define the base address of each peripheral === */ + `define DebugBase 'h00000000 + `define DebugEnd 'h000000FF + `define BootRomBase 'h00001000 + `define BootRomEnd 'h00010FFF + `define GPIOBase 'h00011100 + `define GPIOEnd 'h000111FF // 2 32-bit registers + `define UART0Base 'h00011200 + `define UART0End 'h000112FF // 8 32-bit registers + `define UART1Base 'h00011300 + `define UART1End 'h000113FF // 2 32-bit registers + `define I2C0Base 'h00011400 + `define I2C0End 'h000114FF // 8 32-bit registers + `define I2C1Base 'h00011500 + `define I2C1End 'h000115FF // 8 32-bit registers + `define DMABase 'h00011600 + `define DMAEnd 'h000116FF // TODO + `define SDRAMCfgBase 'h00011700 + `define SDRAMCfgEnd 'h000117FF // 12 32-bit registers + `define QSPI0CfgBase 'h00011800 + `define QSPI0CfgEnd 'h000118FF // 13 32-bit registers + `define QSPI1CfgBase 'h00011900 + `define QSPI1CfgEnd 'h000119FF // 13 32-bit registers + `define PWMBase 'h00011A00 + `define PWMEnd 'h00011A0C // 4 32-bit registers + `define TCMBase 'h00020000 // + `define TCMEnd 'h00040000 // 128KB + `define VMEBase 'h40000000 + `define VMEEnd 'h4FFFFFFF // 1GB + `ifdef FlexBus_verify + `define FlexBusBase 'h80000000 + `define FlexBusEnd 'h8FFFFFFF + `else + `define FlexBusBase 'h50000000 + `define FlexBusEnd 'h5FFFFFFF + `endif + `define ClintBase 'h02000000 + `define ClintEnd 'h020BFFFF + `ifdef FlexBus_verify + `define SDRAMMemBase 'h50000000 + `define SDRAMMemEnd 'h5FFFFFFF // 1GB + `else + `define SDRAMMemBase 'h80000000 + `define SDRAMMemEnd 'h8FFFFFFF // 1GB + `endif + `define QSPI0MemBase 'h90000000 + `define QSPI0MemEnd 'h9FFFFFFF // 256 MB + `define QSPI1MemBase 'hA0000000 + `define QSPI1MemEnd 'hAFFFFFFF // 256 MB + `define PLICBase 'h0c000000 + `define PLICEnd 'h10000000 + `define AxiExp1Base 'hC0000000 + `define AxiExp1End 'hFFFFFFFF +/*=================================================== */ +/*== Define the range of bytes per peripheral==== +`ifdef simulate + `define MemCRange 'h7FFFFFFF + `define ConfigMRange 'h3000 + `define DebugRange 'h44 +`else + `define BootRomRange 'hFFF //4KB for Now + `define GPIORange 'h8 //2 registers for GPIO + `define UART0Range 'h38 //8 Registers + `define UART1Range 'h38 //2 registers + `define I2C0Range 'h38 //6 Registers. Adding +2 to just have a backup, if at all it might be required + `define I2C1Range 'h38 //8 registers + `define DMARange 'hFF + `define SDRAMCfgRange 'h58 //12 registers + `define QSPI0CfgRange 'h60 //13 registers + `define QSPI1CfgRange 'h60 //13 registers + `define HyperCfgRange 'h70 //15 registers + `define SDRAMMemRange 'hFFFFFFF //512 MB + `define HyperMemRange 'hFFFFFFF //512 MB + `define QSPI0MemRange 'h7FFFFFFF //2 GB + `define QSPI1MemRange 'h7FFFFFFF //2 GB + `define AxiExpRange 'hFFFFFFFF //512 MB for now! + `define DebugRange 'h44 //16 Registers -- 32 bit +`endif + /*=================================================== */ +`define IONum 32 + diff --git a/src/core/defined_types.bsv b/src/core/defined_types.bsv new file mode 100644 index 0000000..d71d4f5 --- /dev/null +++ b/src/core/defined_types.bsv @@ -0,0 +1,562 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Author Names : Neel Gala, Arjun Menon +Email ID : neelgala@gmail.com + +Description : + +This files contains all the types and structures that are used in any of the modules. +*/ +package defined_types; + +`include "defined_parameters.bsv" + + typedef enum {Load, Store, Atomic, Fence} Access_type deriving (Bits,Eq,FShow); + typedef enum {User=2'd0,Supervisor=2'd1,Machine=2'd3} Privilege_mode deriving (Bits,Eq,FShow); + typedef enum {Idle,Stall,Handling_Request,Handling_Memory_Read, Handling_Memory_Write, Handle_Fence} Cache_State deriving (Bits,Eq,FShow); + typedef enum {Check_vd,Update_vd} Fence_state deriving (Bits,Eq); + typedef enum {AccessFlush,Fence,None} Flush_type deriving (Bits,Eq,FShow); + + typedef union tagged{ + Bit#(width) Present; + Bit#(4) Absent; + } RFType#(numeric type width) deriving(Bits,Eq,FShow); + + typedef union tagged{ + void All; + void None; + Bit#(TLog#(`PRFDEPTH)) Specific; + }PRFFlush deriving(Bits, Eq, FShow); + + typedef union tagged{ + void All; + void None; + Tuple2#(Register_type, Bit#(5)) Specific; + }RFFlush deriving(Bits, Eq, FShow); + + + typedef struct{ + Bit#(addr_width) address; + }From_Cpu#(numeric type addr_width) deriving(Bits,Eq); + + typedef struct{ + Bit#(TMul#(word_size,8)) data_word; + Bit#(1) bus_error; + Bit#(1) misaligned_error; + Bit#(addr_width) address; + }To_Cpu#(numeric type addr_width,numeric type word_size) deriving(Bits,Eq); + + typedef struct{ + Bit#(addr_width) address; + Bit#(8) burst_length; + Access_type ld_st; + Bit#(3) transfer_size; + }To_Memory#(numeric type addr_width) deriving(Bits,Eq); + + typedef struct{ + Bit#(TMul#(word_size,8)) data_line; + Bit#(1) bus_error; + Bool last_word; + }From_Memory#(numeric type word_size) deriving(Bits,Eq); + + typedef struct{ + //Bit#(addr_width) address; + Access_type load_store; + Bit#(TMul#(word_size,8)) data; + Bit#(TLog#(TDiv#(addr_width, 8))) transfer_size; // 0 -8 bits, 1- 16 bits, 2 -32 bits 3 - 64-bits; + `ifdef atomic Bit#(5) atomic_op;`endif + Bool signextend; + }From_Cpu_D#(numeric type addr_width, numeric type word_size) deriving(Bits,Eq,FShow); + + + typedef struct{ + Bit#(TMul#(8,TMul#(word_size,block_size))) line; + Bit#(addr_width) address; + Bit#(TLog#(ways)) replace_block; + } Current_Store#(numeric type ways, numeric type addr_width, numeric type block_size, numeric type word_size) deriving(Bits,Eq); + + typedef struct{ + Bit#(TMul#(word_size,8)) data_word; + Bit#(1) bus_error; + Bit#(1) misaligned_error; + Bit#(addr_width) address; + Access_type load_store; + }To_Cpu_D#(numeric type addr_width,numeric type word_size) deriving(Bits,Eq); + + typedef struct{ + Bit#(`PADDR) address; + Bit#(8) burst_length; + Access_type ld_st; + Bit#(TLog#(TDiv#(`VADDR, 8))) transfer_size; // 0 -8 bits, 1- 16 bits, 2 -32 bits 3 - 64-bits; + Bit#(TMul#(`DCACHE_BLOCK_SIZE,TMul#(`DCACHE_WORD_SIZE,8))) data_line; + } To_Memory_Write deriving(Bits,Eq); + + typedef struct{ + Bit#(TMul#(word_size,8)) data_line; + Bit#(1) bus_error; + Bit#(1) misaligned_error; + Bit#(addr_width) address; + }From_Memory_D#(numeric type addr_width,numeric type word_size, numeric type block_size) deriving(Bits,Eq); + + +typedef enum{ + Taken, Notaken +}Actual_jump deriving (Eq,Bits,FShow); // actual branch condition used in the branch execution unit. + + +// enum defining the prediction of the branch predictor for the current PC. +typedef enum{ + Predicted_taken,Predicted_notaken +}Prediction_type deriving (Eq,Bits,FShow); // output from the branch prediction unit. + +// A typedef defining , if the prediction by the branch predictor was correct or wrong. +typedef union tagged{ + Bit#(`VADDR) Mispredicted; + Bit#(`VADDR) Correct_prediction; +}Prediction_result deriving (Eq,Bits,FShow); // result of prediuction from the branch execution unit. + +typedef struct{ + Bit#(addr_width) prog_counter_; + Prediction_type prediction_; + Bool jump; +} Predictor_output#(numeric type addr_width) deriving(Bits, Eq); // the program counter from the branch prediction unit. + +typedef struct{ + Bit#(`Reg_width) data_forward; + Bit#(TLog#(`PRFDEPTH)) pid; +}Operand_forwading_type deriving (Bits,Eq); // the data structure for operand forwarding from any stage + +typedef union tagged{ + Bit#(`Reg_width) Data; + Bit#(TLog#(`PRFDEPTH)) Pid; +} FromRf deriving (Bits,Eq,FShow); + +typedef struct{ + Bit#(`Reg_width) rs1; + Bit#(`Reg_width) rs2; + `ifdef spfpu Bit#(`Reg_width) rs3;`endif +} Output_for_operand_fetch deriving (Bits,Eq); // output from the register file to the decode stage + +typedef enum { + ALU,MUL,DIV,MEMORY,BRANCH,JAL,JALR `ifdef spfpu ,DFLOATING,FLOATING `endif ,FENCE,FENCEI,SYSTEM_INSTR,NOP +}Instruction_type deriving(Bits, Eq,FShow); // the type of the decoded instruction. + +// to distuingish between integer and floating point RF +typedef enum {IntegerRF `ifdef spfpu ,FloatingRF `endif } Register_type deriving(Bits,Eq,FShow); +typedef enum {IntegerRF `ifdef spfpu ,FloatingRF `endif , Immediate, PC} Operand_type deriving(Bits,Eq,FShow); + + +typedef union tagged{ + void None; + Bit#(`Reg_width) Address; + Bit#(`Reg_width) Data;} TriggerType deriving(Bits,Eq,FShow); + +typedef struct{ + TriggerType ttype; + Bit#(4) matchscheme;} TriggerData deriving(Bits,Eq,FShow); + +// the data stucture for the pipeline FIFO between fetch and decode. +typedef struct{ + Bit#(`VADDR) program_counter; + Bit#(32) instruction; + Trap_type exception; + Bit#(`VADDR) nextpc; + Bit#(2) prediction; + Bit#(`PERFMONITORS) perfmonitors; + Bit#(2) epochs; +}IF_ID_type deriving (Bits,Eq); + +typedef struct{ + Bit#(`Reg_width) rs1; + Bit#(`Reg_width) rs2; + Bit#(5) rs1_addr; + Bit#(12) csr_address; + Bit#(3) funct3; + }CSRInputs deriving(Bits,Eq,FShow); + +typedef struct{ + Instruction_type inst_type; + Operand_type rdtype; + Bit#(`Reg_width) rs1; + Bit#(`Reg_width) rs2; + Bit#(`Reg_width) rs3_imm; + Bit#(5) rs1addr; + Bit#(5) rs2addr; + Bit#(5) rs3addr; + Operand_type rs1_type; + Operand_type rs2_type; + Operand_type rs3_type; + Bit#(`VADDR) program_counter; + Bool word32; + Access_type mem_access; + Bit#(4) fn; // TODO Check is this can suffices for memaccess also + Trap_type exception; + Bit#(5) destination; + Bit#(`VADDR) nextpc; + Bit#(3) funct3; + `ifdef spfpu Bool fcsr_rm; `endif + `ifdef simulate Bit#(32) instruction ;`endif + Bit#(3) debugcause; + Bit#(2) prediction; + Bit#(`PERFMONITORS) perfmonitors; + Bit#(2) epochs; +}ID_IE_type deriving (Bits,Eq); + +typedef struct{ + Bit#(`Reg_width) address; + Bit#(`Reg_width) memory_data; // data to be written in the memory + Bit#(TLog#(TDiv#(`VADDR, 8))) transfer_size; // 0 -8 bits, 1- 16 bits, 2 -32 bits 3 - 64-bits; + Bit#(1) signextend; // whether the loaded value has to be signextended + Access_type mem_type; // STORE or AMO or LOAD or FENCE + `ifdef atomic Bit#(5) atomic_op;`endif + Bit#(1) epochs; + }Memout deriving(Bits,Eq,FShow); + +typedef union tagged{ + Arithout RESULT; // 64+5 + Memout MEMORY; // 64+64+3+1+3+5 = 140 + CSRInputs SYSTEM; // 64+64+5+12+3 = 148 + void Busy; +} Execution_output deriving(Bits,Eq,FShow); + +//typedef struct{ +// Bit#(`Reg_width) result_addr_rs1; +// Bit#(`Reg_width) data_rs2; +// Bit#(20) csr_mem; +//} Execution_output deriving(Bits,Eq,FShow); + +typedef struct{ + Execution_output execresult; + Bit#(`VADDR) program_counter; + Trap_type exception; + Bit#(5) destination; + Operand_type rd_type; + Bit#(TLog#(`PRFDEPTH)) index; + Bit#(4) pid; + `ifdef simulate Bit#(32) instruction ;`endif + Bit#(3) debugcause; + Bit#(`PERFMONITORS) perfmonitors; + Bit#(2) epochs; +}IE_IMEM_type deriving (Bits,Eq); + +typedef struct{ + Bit#(`Reg_width) aluresult; + Bit#(5) fflags; + } Arithout deriving(Bits,Eq,FShow); // output struct from the alu. + +typedef struct{ + Bit#(`Reg_width) address; + Bit#(3) transfer_size; + Access_type load_store; + }MemoryResponse deriving(Bits,Eq,FShow); + +typedef union tagged{ + Arithout RESULT; + CSRInputs SYSTEM; +} WriteBackType deriving(Bits,Eq); + +typedef struct{ + WriteBackType commit_data; + Bit#(5) destination; + Operand_type rd_type; + Bit#(TLog#(`PRFDEPTH)) index; + Bit#(4) pid; + Bit#(`VADDR) program_counter; + Trap_type exception; + `ifdef simulate Bit#(32) instruction ;`endif + Bit#(3) debugcause; + Bit#(`PERFMONITORS) perfmonitors; + Bit#(2) epochs; +}IMEM_IWB_type deriving(Bits,Eq); + +typedef struct { + Bit#(paddr) address; + Bit#(TLog#(TDiv#(paddr,8))) transfer_size; + Bit#(1) u_signed; + Bit#(3) byte_offset; + Bit#(TMul#(8, word_size)) write_data; + Access_type ld_st; +} UncachedMemReq#(numeric type paddr, numeric type word_size) deriving(Bits, Eq); + +/************************** Interfaces in PLIC ******************************/ + +interface Ifc_global_interrupt; + method Action irq_frm_gateway(Bool ir); +endinterface + +interface Ifc_program_registers#(numeric type addr_width,numeric type word_size); + method ActionValue#(Bit#(TMul#(8,word_size))) prog_reg(UncachedMemReq#(addr_width, word_size) mem_req); +endinterface +/****************************************************************************/ + +typedef struct { + Bit#(addr_width) pc; + Bit#(addr_width) branch_address; + Bit#(2) state;} Training_data#(numeric type addr_width) deriving (Bits, Eq); + +typedef enum {SWAP,ADD,XOR,AND,OR,MINU,MAXU,MIN,MAX} Atomic_funct deriving(Bits,Eq,FShow); + +typedef struct{ + Bit#(width) final_result; // the final result for the operation + Bit#(5) fflags; // indicates if any exception is generated. + }Floating_output#(numeric type width) deriving(Bits,Eq); // data structure of the output FIFO. + +typedef enum { + Inst_addr_misaligned=0, + Inst_access_fault=1, + Illegal_inst=2, + Breakpoint=3, + Load_addr_misaligned=4, + Load_access_fault=5, + Store_addr_misaligned=6, + Store_access_fault=7, + Ecall_from_user=8, + Ecall_from_supervisor=9, + Ecall_from_machine=11, + Inst_pagefault=12, + Load_pagefault=13, + Store_pagefault=15 + `ifdef simulate ,Endsimulation =16 `endif +} Exception_cause deriving (Bits,Eq,FShow); + +typedef struct { + Bit#(TSub#(`VADDR,TAdd#(TLog#(`DCACHE_BLOCK_SIZE), TLog#(`DCACHE_WORD_SIZE)))) vtag; + Bit#(`DCACHE_TAG_BITS) ptag; + Bit#(TLog#(`DCACHE_WAYS)) writeblock; + Bit#(1) dirty; + Bit#(1) valid; +} Linebuff_tag deriving (Bits, Eq, FShow); +typedef enum{ + /*==== Standard =============== */ + User_soft_int=0, + Supervisor_soft_int=1, + Machine_soft_int=3, + User_timer_int=4, + Supervisor_timer_int=5, + Machine_timer_int=7, + User_external_int=8, + Supervisor_external_int=9, + Machine_external_int=11, + /*=============================*/ + /*===== Non Standard========= */ + DebugInterrupt =12, + DebugResume=13, + DebugReset=14 +// Icache_miss =12, +// Icache_cacheable =13, +// Icache_linereplace =14, +// Icache_tlbmiss =15, +// Icache_misaligned =16, +// Cond_branch =17, +// Cond_branch_taken =18, +// Cond_branch_mispredicted =19, +// Taken_branch_mispredicted =20, +// Uncond_jumps =21, +// Spfpu_inst =22, +// Dpfpu_inst =23, +// Dcache_tlbmiss =24, +// Total_loads =25, +// Total_stores =26, +// Total_atomic =27, +// Dcache_load_miss =28, +// Dcache_store_miss =29, +// Dcache_atomic_miss =30, +// Dcache_cacheable_load =31, +// Dcache_cacheable_store =32, +// Dcache_cacheable_atomic =33, +// Dcache_writebacks =34, +// Dcache_linereplace =35, +// Dcache_misaligned =36, +// Exceptions_taken =37, +// Interrupts_taken =38, +// Muldiv_instructions =39, +// System_instructions =40, +// Usermode_cycles =41, +// Supervisormode_cycles =42, +// Machinemode_cyles =43, +// Misprediction_stalls =44, +// Interrupt_stalls =45, +// Dfence_cycles =46, +// Ifence_cycles =47, +// Dcache_miss_cycles =48, +// Icache_miss_cycles =49, +// Fpbusy_cycles =50, +// Divisionbusy_cycles =51, +// Total_stall_cycles =52, +// Pagewalk_cycles =53, +// Corebus_cycles =54 +} Interrupt_cause deriving (Bits,Eq,FShow); + +typedef union tagged{ + Exception_cause Exception; + Interrupt_cause Interrupt; + void None; +} Trap_type deriving(Bits,Eq,FShow); + +function String event_name(Bit#(64) eventnum); + case (eventnum) + 'h0000000000000001: return "ICACHE_MISS "; + 'h0000000000000002: return "ICACHE_CACHEABLE "; + 'h0000000000000004: return "ICACHE_LINEREPLACE "; + 'h0000000000000008: return "ICACHE_TLBMISS "; + 'h0000000000000010: return "ICACHE_MISALIGNED "; + 'h0000000000000020: return "ICACHE_PREFETCHMISS "; + 'h0000000000000040: return "COND_BRANCH "; + 'h0000000000000080: return "COND_BRANCH_TAKEN "; + 'h0000000000000100: return "COND_BRANCH_MISPREDICTED "; + 'h0000000000000200: return "TAKEN_BRANCH_MISPREDICTED "; + 'h0000000000000400: return "UNCOND_JUMPS "; + 'h0000000000000800: return "SPFPU_INST "; + 'h0000000000001000: return "DPFPU_INST "; + 'h0000000000002000: return "DCACHE_TLBMISS "; + 'h0000000000004000: return "TOTAL_LOADS "; + 'h0000000000008000: return "TOTAL_STORES "; + 'h0000000000010000: return "TOTAL_ATOMIC "; + 'h0000000000020000: return "DCACHE_LOAD_MISS "; + 'h0000000000040000: return "DCACHE_STORE_MISS "; + 'h0000000000080000: return "DCACHE_ATOMIC_MISS "; + 'h0000000000100000: return "DCACHE_CACHEABLE_LOAD "; + 'h0000000000200000: return "DCACHE_CACHEABLE_STORE "; + 'h0000000000400000: return "DCACHE_CACHEABLE_ATOMIC "; + 'h0000000000800000: return "DCACHE_WRITEBACKS "; + 'h0000000001000000: return "DCACHE_LINEREPLACE "; + 'h0000000002000000: return "DCACHE_MISALIGNED "; + 'h0000000004000000: return "EXCEPTIONS_TAKEN "; + 'h0000000008000000: return "INTERRUPTS_TAKEN "; + 'h0000000010000000: return "MULDIV_INSTRUCTIONS "; + 'h0000000020000000: return "MEMORY_INSTRUCTIONS "; + 'h0000000040000000: return "EXEC_FLUSHES "; + 'h0000000080000000: return "WB_FLUSHES "; + default: return "NO EVENT"; + endcase +endfunction + +/****************************** MMU TYPES *******************************/ + +typedef struct { + bit mprv; + bit sum; + bit mxr; + Privilege_mode mpp; + Privilege_mode prv; +} Chmod deriving(Bits, Eq); + +typedef struct { + bit v; //valid + bit r; //allow reads + bit w; //allow writes + bit x; //allow execute(instruction read) + bit u; //allow supervisor + bit g; //global page + bit a; //accessed already + bit d; //dirty +} TLB_permissions deriving(Bits, Eq, FShow); + +typedef struct { + Bit#(TSub#(paddr,page_size)) ppn; + TLB_permissions tlb_perm; + Bit#(asid_width) asid; + Bit#(2) levels; +} To_TLB#(numeric type paddr, numeric type page_size, numeric type asid_width) deriving(Bits,Eq); + +typedef struct { + Bit#(data_width) vaddr; + Access_type ld_st_atomic; +} DTLB_access#(numeric type data_width) deriving(Bits, Eq); + +typedef enum { + PTW_ready, Handling_PTW, Wait_for_memory, PTW_done, Send_to_memory} PTW_state deriving(Bits, Eq); + +typedef enum { + Load, Store, Execution} Translation_type deriving(Bits, Eq); + +typedef struct { + Translation_type page_type; + Bit#(TSub#(vaddr_width,page_offset)) vpn; +} Request_PPN_PTW#(numeric type vaddr_width, numeric type page_offset) deriving (Bits,Eq); + +typedef struct { + Translation_type page_type; + To_TLB#(paddr_width,page_offset,asid_width) tlb_packet; +} Response_PPN_TLB#(numeric type paddr_width, numeric type page_offset, numeric type asid_width) deriving (Bits,Eq); + +typedef struct { + Bool ptwdone; + Translation_type page_type; + Bit#(data_width) address; +} Request_PTE_memory#(numeric type data_width) deriving (Bits,Eq); + +typedef struct { + Trap_type exception; + Bit#(data_width) address; + Bool cacheable; +} From_TLB#(numeric type data_width) deriving (Bits, Eq); + +typedef struct { + Bit#(vaddr_width) rs1; + Bit#(vaddr_width) rs2; +} Fence_VMA_type#(numeric type vaddr_width) deriving (Bits, Eq); + +typedef enum { + Store_pf, Load_pf, Instruction_pf, None} Pf_exception_type deriving (Bits, Eq); +/*=============================================================================== */ + +/* =============================== Debug related types ========================== */ +typedef enum {CPU_CONTINUE,CPU_STOPPED} CPU_State deriving(Bits,Eq,FShow); +typedef enum { + GDB_INTERRUPT, + GDB_HUP, + GDB_INT, + GDB_QUIT, + GDB_ILL, + GDB_BREAK = 5, + CPU_BUSY + } GdbStopCondition +deriving (Bits ,Eq, FShow); + +/*======= AXI4 master/slave numbers ======*/ +typedef 0 Sdram_slave_num; +typedef TAdd#(Sdram_slave_num ,`ifdef SDRAM 1 `else 0 `endif ) Sdram_cfg_slave_num; +typedef TAdd#(Sdram_cfg_slave_num,`ifdef BOOTROM 1 `else 0 `endif ) BootRom_slave_num ; +typedef TAdd#(BootRom_slave_num ,`ifdef Debug 1 `else 0 `endif ) Debug_slave_num ; +typedef TAdd#(Debug_slave_num , `ifdef TCMemory 1 `else 0 `endif ) TCM_slave_num; +typedef TAdd#(TCM_slave_num ,`ifdef DMA 1 `else 0 `endif ) Dma_slave_num; +typedef TAdd#(Dma_slave_num ,1 ) SlowPeripheral_slave_num; +typedef TAdd#(SlowPeripheral_slave_num,`ifdef VME 1 `else 0 `endif ) VME_slave_num; +typedef TAdd#(VME_slave_num,`ifdef FlexBus 1 `else 0 `endif ) FlexBus_slave_num; +typedef TAdd#(FlexBus_slave_num,1) Num_Slaves; +typedef 0 Dmem_master_num; +typedef 1 Imem_master_num; +typedef TAdd#(Imem_master_num , `ifdef Debug 1 `else 0 `endif ) Debug_master_num; +typedef TAdd#(Debug_master_num, `ifdef DMA 1 `else 0 `endif ) DMA_master_num; +typedef TAdd#(DMA_master_num,1) Num_Masters; + +/*=============================================================================== */ +/*====== AXI4 Lite slave declarations =======*/ +typedef 0 SlowMaster; +typedef 0 Uart0_slave_num ; +typedef TAdd#(Uart0_slave_num ,`ifdef UART1 1 `else 0 `endif ) Uart1_slave_num ; +typedef TAdd#(Uart1_slave_num ,`ifdef CLINT 1 `else 0 `endif ) CLINT_slave_num; +typedef TAdd#(CLINT_slave_num ,`ifdef PLIC 1 `else 0 `endif ) Plic_slave_num ; +typedef TAdd#(Plic_slave_num ,`ifdef PLIC 1 `else 0 `endif ) GPIO_slave_num ; +typedef TAdd#(GPIO_slave_num ,`ifdef I2C0 1 `else 0 `endif ) I2c0_slave_num ; +typedef TAdd#(I2c0_slave_num ,`ifdef I2C1 1 `else 0 `endif ) I2c1_slave_num ; +typedef TAdd#(I2c1_slave_num ,`ifdef QSPI0 1 `else 0 `endif ) Qspi0_slave_num ; +typedef TAdd#(Qspi0_slave_num ,`ifdef QSPI1 1 `else 0 `endif ) Qspi1_slave_num ; +typedef TAdd#(Qspi1_slave_num ,`ifdef AXIEXP 1 `else 0 `endif ) AxiExp1_slave_num; +typedef TAdd#(AxiExp1_slave_num ,`ifdef PWM_AXI4Lite 1 `else 0 `endif ) Pwm_slave_num; +typedef TAdd#(Pwm_slave_num,1) Num_Slow_Slaves ; +/*===========================================*/ + +endpackage diff --git a/src/core/dmem.bsv b/src/core/dmem.bsv new file mode 100644 index 0000000..4a5ca88 --- /dev/null +++ b/src/core/dmem.bsv @@ -0,0 +1,179 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package dmem; + +import defined_types :: *; +import dTLB :: *; +import dcache_asic :: *; +import GetPut :: *; +import ConfigReg :: *; +import FIFOF :: *; +import SpecialFIFOs :: *; +import MemoryMap :: *; +import DReg::*; +import ConfigReg::*; +`include "defined_parameters.bsv" + +interface Ifc_dmem; + /*======= Mandatory Interface to the core ================ */ + interface Put#(Tuple2#(Memout,Bit#(1))) request_from_cpu; + interface Get#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1)))) response_to_cpu; + method Action flush( ); +// method Bool reset_complete; + method Bool stall_fetch; + /*=============================================== */ + /*======= Mandatory Interface to the external bus ================ */ + method ActionValue#(To_Memory#(`PADDR)) request_to_memory_read; + method ActionValue#(To_Memory_Write) request_to_memory_write; + method Action response_from_memory_read(From_Memory#(`DCACHE_WORD_SIZE) resp); + method Action response_from_memory_write(From_Memory#(`DCACHE_WORD_SIZE) resp); + /*=============================================== */ + /*======= Interface to the DTLB ================ */ + `ifdef MMU + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + interface Get#(Request_PPN_PTW#(`VADDR,`OFFSET)) to_PTW; + interface Put#(Tuple2#(Bool,To_TLB#(`PADDR,`OFFSET,`ASID))) refill_TLB; + method Action get_pte_pointer(Request_PTE_memory#(`Reg_width) pte); + method Action fence_dtlb(Fence_VMA_type#(`VADDR) rsdata); + method ActionValue#(Bit#(`Reg_width)) send_pte; + //method Action fence_TLB(Fence_VMA_type#(`ADDR) rsdata); + `endif + /*=============================================== */ +endinterface +//(*conflict_free="request_from_cpu_put, response_to_cpu_get"*) +(*synthesize*) +module mkdmem(Ifc_dmem); + Ifc_dcache dcache <- mkdcache; + `ifdef MMU + Ifc_dTLB dtlb <- mkdTLB; + Reg#(Maybe#(Tuple2#(Memout,Bit#(1)))) rg_dtlb_metadata[2] <- mkCReg(2,tagged Invalid); + `endif + ConfigReg#(Bool) rg_serve_ptw <- mkConfigReg(False); + Reg#(Maybe#(Request_PTE_memory#(`Reg_width))) rg_pte_pointer[2] <- mkCReg(2,tagged Invalid); + FIFOF#(Maybe#(Tuple2#(Bit#(`Reg_width), Maybe#(Exception_cause)))) ff_response_to_cpu <- mkSizedBypassFIFOF(1); + Wire#(From_Memory#(`DCACHE_WORD_SIZE)) wr_read_response_from_memory <- mkWire(); + Reg#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1)))) wr_response_to_cpu[3] <-mkCReg(3,tagged Invalid); + Reg#(Bit#(1)) epochs[2] <-mkCReg(2,0); + Reg#(Bit#(1)) wb_epochs[2] <-mkCReg(2,0); + Reg#(Bool) request_taken <-mkDReg(False); + Reg#(Bool) drop_request[2] <-mkCReg(2,False); + rule display_stuff; + `ifdef verbose $display($time,"\tDMEM: request_taken: %b drop_request: %b",request_taken,drop_request[0]); `endif + endrule + `ifdef MMU + (*conflict_free="ptw_to_dcache, send_translated_address"*) + (*conflict_free="send_translated_address,request_from_cpu.put"*) + (*conflict_free="send_translated_address,send_cache_index"*) + rule send_translated_address(rg_dtlb_metadata[1] matches tagged Valid .z); + rg_dtlb_metadata[1] <= tagged Invalid; + let x <- dtlb.send_ppn; + //From_Cpu_D#(`Reg_width,`DCACHE_WORD_SIZE) z = rg_dtlb_metadata; + let {y,epoch} = z; + y.address = x.address; + `ifdef verbose $display($time,"\tDMEM: physical address %h to DCACHE and is cacheable %b drop_request: %b", x.address, x.cacheable,drop_request[1]); `endif + dcache.physical_address(truncate(y.address), x.exception);//, y.mem_type, y.memory_data, y.transfer_size, y.atomic_op, unpack(y.signextend)); + endrule + + rule send_cache_index(rg_dtlb_metadata[1] matches tagged Valid .t); + let {z,epoch} = t; + let x <- dtlb.send_vaddress_for_cache_index; + dcache.virtual_address(x, z.mem_type, z.memory_data, z.transfer_size, `ifdef atomic z.atomic_op, `endif unpack(z.signextend),epoch); + endrule + + rule ptw_to_dcache(rg_pte_pointer[1] matches tagged Valid .ptw_request); + `ifdef verbose $display($time,"\tDMEM: ptw request to DCACHE for address %h epochs: %b", ptw_request.address,epochs[1]); `endif + rg_serve_ptw <= True; + Bit#(`VADDR) addr = truncate(ptw_request.address); + Access_type page_access_type = Load; + Bit#(TMul#(`DCACHE_WORD_SIZE,8)) data = 0; + dcache.virtual_address(addr,Load, data, 'd3 `ifdef atomic , 5'b00100 `endif , True,epochs[1]); + dcache.physical_address(truncate(addr), tagged None); + rg_pte_pointer[1] <= tagged Invalid; + endrule + + `endif + + rule send_response_to_core; + wr_response_to_cpu[0] <= dcache.response_to_core; + endrule + + /*======= Mandatory Interface to the core ================ */ + interface request_from_cpu = interface Put + method Action put(Tuple2#(Memout,Bit#(1)) request) ; + let {req,epoch}=request; + `ifdef MMU + rg_dtlb_metadata[0] <= tagged Valid (tuple2(req,epoch)); + dtlb.get_vaddr(DTLB_access{vaddr : req.address, ld_st_atomic : req.mem_type} `ifdef atomic ,req.atomic_op `endif ); + `endif + dcache.virtual_address(truncate(req.address),req.mem_type, req.memory_data, req.transfer_size, `ifdef atomic req.atomic_op, `endif unpack(req.signextend),epoch); + `ifdef verbose $display($time,"\tDMEM: Taking request from CPU: ",fshow(request)); `endif + request_taken<=True; + endmethod + endinterface; + + interface response_to_cpu = interface Get + method ActionValue#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1)))) get if(!rg_serve_ptw); + Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1))) response=tagged Invalid; + if(wr_response_to_cpu[2] matches tagged Valid .resp)begin + let {x,trap,y,epoch}=resp; + response=tagged Valid tuple4(x,trap,y,epoch); + if(trap matches tagged None)begin + end + else + epochs[0]<=~epochs[0]; + end + return response; + endmethod + endinterface; + + method Action flush(); + dcache.flush_from_wb; + epochs[1]<=~epochs[1]; + endmethod + + + method Bool stall_fetch =!dcache.init_complete; + /*=============================================== */ + + /*======= Mandatory Interface to the external bus ================ */ + method ActionValue#(To_Memory#(`PADDR)) request_to_memory_read=dcache.read_request_to_memory; + method ActionValue#(To_Memory_Write) request_to_memory_write=dcache.write_request_to_memory; + method Action response_from_memory_read(From_Memory#(`DCACHE_WORD_SIZE) resp)=dcache.read_response_from_memory(resp); + method Action response_from_memory_write(From_Memory#(`DCACHE_WORD_SIZE) resp)=dcache.write_response_from_memory(resp); + /*=============================================== */ + /*======= Interface to the DTLB ================ */ + `ifdef MMU + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + dtlb.translation_protection_frm_csr(tlb_disable, per_bits, asid); + endmethod + interface to_PTW = dtlb.to_PTW; + interface refill_TLB = dtlb.refill_TLB; + method Action get_pte_pointer(Request_PTE_memory#(`Reg_width) pte); + rg_pte_pointer[0] <= tagged Valid pte; + endmethod + method Action fence_dtlb(Fence_VMA_type#(`VADDR) rsdata) = dtlb.fence_TLB(rsdata); + method ActionValue#(Bit#(`Reg_width)) send_pte if(isValid(dcache.response_to_core) && rg_serve_ptw); + rg_serve_ptw <= False; + Bit#(`Reg_width) data = 0; + if(dcache.response_to_core matches tagged Valid .resp) begin + let {x,y,perf,epoch} = resp; + data = x; + end + return data; + endmethod + `endif + /*=============================================== */ +endmodule + +endpackage diff --git a/src/core/execute_stage.bsv b/src/core/execute_stage.bsv new file mode 100644 index 0000000..41ff7fa --- /dev/null +++ b/src/core/execute_stage.bsv @@ -0,0 +1,374 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package execute_stage; + /*===== Package Imports ==== */ + import TxRx::*; + import FIFOF::*; + import DReg::*; + import Clocks::*; + import GetPut::*; + import SpecialFIFOs::*; + /*========================== */ + /*===== Project Imports ======*/ + import alu::*; + `include "defined_parameters.bsv" + import defined_types::*; + `ifdef muldiv + `ifdef RV64 + import muldiv::*; + `endif + `endif + `ifdef spfpu + import fpu::*; + `endif + import prf::*; + /*===============================*/ + + (*noinline*) + function Bit#(`Reg_width) multiplication (Bit#(`Reg_width) in1,Bit#(`Reg_width) in2,Bit#(2) funct3 `ifdef RV64 ,Bool word_double `endif ); + Bit#(TMul#(2,`Reg_width)) op1= ((funct3[0]^funct3[1])==1 && in1[`Reg_width-1]==1)?zeroExtend((~in1)+1):zeroExtend(in1); + Bit#(TMul#(2,`Reg_width)) op2= (funct3[1:0]==1 && in2[`Reg_width-1]==1)?zeroExtend((~in2)+1):zeroExtend(in2); + Bit#(1) lv_take_complement = 0; + if(funct3[1:0]==1) + lv_take_complement=((in1[`Reg_width-1]^in2[`Reg_width-1])==1)?1:0; + else if(funct3[1:0]==2) + lv_take_complement=in1[`Reg_width-1]; + + let product=op1*op2; + product=(lv_take_complement==1)?(~product+1): product; + `ifdef RV64 + if(!word_double)// 32-bit + product=signExtend(product[31:0]); + `endif + if(funct3==0) + return product[`Reg_width-1:0]; + else + return product[2*`Reg_width-1:`Reg_width]; + endfunction + + interface Ifc_execute_stage; + method Action flush_prf; + /* ====================== pipe connections ========= */ + interface RXe#(ID_IE_type) rx_in; + interface TXe#(IE_IMEM_type) tx_out; + /*================================================== */ + method Action roundingmode(Bit#(3) rm); + method Tuple2#(Flush_type,Bit#(`VADDR)) generate_flush; + method Maybe#(Training_data#(`VADDR)) training_data; + method Bit#(`PERFMONITORS) execute_perfmon; // icache performance counters + method Maybe#(Bit#(`VADDR)) ras_push; + interface Get#(Tuple2#(Memout,Bit#(1))) to_dmem; + method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data); + method Action update_wEpoch; + method Action inferred_xlen(Bit#(2) mxl); + endinterface:Ifc_execute_stage + + `ifdef muldiv + `ifdef spfpu + `ifdef sequential + (*mutually_exclusive="read_output_from_fpu, read_outputs_from_muldiv"*) + `endif + `endif + `endif + (*synthesize*) + (*conflict_free="rl_receive_info_from_decode_stage, to_dmem_get"*) + module mkexecute_stage(Ifc_execute_stage); + Wire#(Maybe#(Bit#(`VADDR))) wr_ras_push<-mkDWire(tagged Invalid); + Ifc_prf_new prf <-mkprf_new(); + RX#(ID_IE_type) rx <-mkRX; // receive ffrom the decode stage + TX#(IE_IMEM_type) tx <-mkTX; // send to the memory stage; + `ifdef muldiv + `ifdef sequential + Ifc_muldiv muldiv <-mkmuldiv; + `endif + `ifdef parallel + Reg#(Maybe#(Bit#(`Reg_width))) rg_mul_output<-mkReg(tagged Invalid); + `endif + `endif // instantiating + `ifdef spfpu Ifc_fpu fpu <- mkfpu(); `endif // instantiating the Floating point units. + + //Wire#(Memout) wr_info_to_dmem <-mkWire;// holds the information to be given to dmem + FIFOF#(Tuple2#(Memout,Bit#(1))) wr_info_to_dmem <-mkBypassFIFOF;// holds the information to be given to dmem + Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4)))) wr_forward_from_EXE <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage + Wire#(Bit#(3)) wr_rounding_mode<-mkDWire(0); + Reg#(Bool) multicylce_output[2] <-mkCReg(2,False); // is true if the operation is multicycle. + Wire#(Flush_type) rg_flush_execute <-mkDWire(None); + Wire#(Bit#(`VADDR)) rg_effective_address<-mkDWire(0); + Reg#(Maybe#(Training_data#(`VADDR))) wr_training_data <-mkDReg(tagged Invalid); + `ifdef perf + Reg#(Bit#(`PERFMONITORS)) rg_execute_perfmon<-mkDReg(0); + `endif + Reg#(Bit#(1)) eEpoch <-mkReg(0); + Reg#(Bit#(1)) wEpoch <-mkReg(0); + PulseWire wb_flush <-mkPulseWire(); + + Reg#(Bit#(4)) rg_pid <-mkReg(0); + Reg#(Bit#(TLog#(`PRFDEPTH))) rg_prf_index<-mkReg(0); + + Wire#(Bit#(2)) wr_mxl <- mkWire(); + + + rule update_eEpoch_reg(rg_flush_execute!=None && !wb_flush); + `ifdef verbose $display($time,"\tEXECUTION: Inverting eEPOCH"); `endif + eEpoch<=~eEpoch; + endrule + + rule forward_data_from_exe(wr_forward_from_EXE matches tagged Valid .fwdata) ; + let {data,index,pid}=fwdata; + if(wr_mxl==1) begin// 32-bits + data=signExtend(data[31:0]); + end + prf.fwd_from_execution(data,index,pid); + endrule + + rule rl_receive_info_from_decode_stage(rx.u.notEmpty && tx.u.notFull && !multicylce_output[1] ); + Bit#(`PERFMONITORS) perfmonitors=0; + let data=rx.u.first; + let pc=data.program_counter; + let dest=data.destination; + let rdtype=data.rdtype; + let exception=data.exception; + let inst_type=data.inst_type; + Bit#(`VADDR) nextpc=rx.u.first.nextpc; + `ifdef simulate let instruction=data.instruction; `endif + Execution_output result1=tagged Busy; + `ifdef verbose $display($time,"\t********** EXECUTION STAGE FIRING ************ PC: :%h EPOCHS: %b Instr-EPOCHS: %b",pc,{eEpoch,wEpoch}, data.epochs) ; `endif + if({eEpoch,wEpoch}!=data.epochs)begin + rx.u.deq; + `ifdef verbose $display($time,"\tEXECUTION: PC: %h Dropping Instruction since Epochs do not match", rx.u.first.program_counter); `endif + end + else if(exception matches tagged None)begin + Bool start_execution=False; + `ifdef verbose $display($time,"\tEXECUTION: rs1type: ",fshow(data.rs1_type)," rs2_type ",fshow(data.rs2_type)); `endif + RFType#(`Reg_width) op1<-prf.read_rs1(data.rs1addr,data.rs1_type,data.rs1); + RFType#(`Reg_width) op2<-prf.read_rs2(data.rs2addr,data.rs2_type,data.rs2); + RFType#(`Reg_width) op3<-prf.read_rs3(data.rs3addr,data.rs3_type,data.rs3_imm); + if(op1 matches tagged Present .x &&& op2 matches tagged Present .y &&& op3 matches tagged Present .z)begin + Bit#(`Reg_width) rs1=x; + Bit#(`Reg_width) rs2=y; + Bit#(`Reg_width) rs3=z; + if(wr_mxl==1) begin // 32-bit + rs1=signExtend(rs1[31:0]); + rs2=signExtend(rs2[31:0]); + rs3=signExtend(rs3[31:0]); + end + `ifdef muldiv + let {prf_index,pid}<-prf.get_index_pid(data.destination,data.rdtype); + `ifdef verbose $display($time,"\tEXECUTION: PRFINDEX: %d PID: %d",prf_index,pid); `endif + rg_prf_index<=prf_index; + rg_pid<=pid; + /*========== Multiplication =============== */ + if(inst_type==MUL || inst_type == DIV)begin + Bit#(1) is_mul=0; + if(inst_type==MUL) begin + is_mul=1; + `ifdef verbose $display($time,"\tEXECUTION: Multiplication Operation Op1: %h Op2: %h ",rs1,rs2); `endif + end + else begin + `ifdef verbose $display($time,"\tEXECUTION: Division Operation Op1: %h Op2: %h ",rs1,rs2); `endif + end + `ifdef sequential + muldiv.input_operands(rs1,rs2,data.funct3[1:0],pack(data.word32),is_mul); + prf.update_rd(prf_index,pid); + multicylce_output[1]<=True; + `endif + `ifdef parallel + let x= multiplication(rs1,rs2,data.funct3[1:0] `ifdef RV64 ,data.word32 `endif ); + rg_mul_output<=tagged Valid x; + multicylce_output[1]<=True; + `endif + end + `endif + /*========================================== */ + /*============== FLOATING POINT ============================ */ + `ifdef spfpu + else if(inst_type == FLOATING || inst_type==DFLOATING)begin + `ifdef verbose $display($time,"\tEXECUTION: Floating Point Operation "); `endif + fpu._start(truncate(rs1),truncate(rs2),truncate(rs3),data.fn,rs3[11:5],data.funct3,rs3[1:0],wr_rounding_mode, data.word32); + multicylce_output[1]<=True; + prf.update_rd(prf_index,pid); + end + `endif + /*========================================== */ + /*================ SINGLE CYCLE ALU ========================== */ + else begin + `ifdef verbose $display($time,"\tEXECUTION: Single Cycle Operation "); `endif + let {x,ea,flush,td,raspush,ex,pm} = fn_alu(data.fn,rs1,rs2,rs3,pc,data.inst_type,nextpc,data.funct3,data.mem_access,dest,data.prediction,rx.u.first.perfmonitors,data.word32); + result1=x; + `ifdef verbose $display($time,"\tEXE: rs1: %h rs2: %h rs3_imm: %h",rs1,rs2,rs3); `endif + `ifdef verbose $display($time,"\tEXECUTION: Result: ",fshow(result1)); `endif + `ifdef verbose $display($time,"\tEXECUTION: PC: %h",pc," Flush: ",fshow(flush)," EA: %h",ea," Instruction: ",fshow(data.inst_type)); `endif + if(result1 matches tagged RESULT .res)begin + if(dest!=0) + wr_forward_from_EXE <= tagged Valid tuple3(res.aluresult,prf_index,pid); + end + else begin + prf.update_rd(prf_index,pid); + end + if(result1 matches tagged MEMORY .meminfo)begin + rx.u.deq; + tx.u.enq(IE_IMEM_type{execresult:result1, + program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause, + destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instruction `endif }); + wr_info_to_dmem.enq(tuple2(meminfo,rx.u.first.epochs[0])); + rg_flush_execute<=flush; + rg_effective_address<=ea; + end + else begin + rx.u.deq; + exception=ex; + wr_ras_push<=raspush; + rg_flush_execute<=flush; + rg_effective_address<=ea; + wr_training_data<=td; + perfmonitors=pm; + tx.u.enq(IE_IMEM_type{execresult:result1, + program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause, + destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instruction `endif }); + end + end + /*========================================== */ + end + else begin + `ifdef verbose $display($time,"\tEXECUTION: Waiting for operands.\nRS1: ",fshow(op1),"\nRS2: ",fshow(op2),"\nRS3: ",fshow(op3)); `endif + end + end + else begin + rx.u.deq; + `ifdef verbose $display($time,"\tEXECUTE: EXCEPTION"); `endif + tx.u.enq(IE_IMEM_type{execresult:tagged RESULT Arithout{aluresult:0,fflags:0}, + program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause, + destination:dest, rd_type:rdtype , index:rg_prf_index, pid:rg_pid, perfmonitors:perfmonitors,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instruction `endif }); + end + endrule + + `ifdef muldiv + `ifdef sequential + rule read_outputs_from_muldiv(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] ); + `ifdef verbose $display($time,"\tEXECUTION: Multiplier sending output to Memory stage"); `endif + let res<-muldiv.muldiv_result; + rx.u.deq; + let decodedata=rx.u.first; + let pc=decodedata.program_counter; + let dest=decodedata.destination; + let rdtype=decodedata.rdtype; + let exception=decodedata.exception; + `ifdef simulate let instr=decodedata.instruction; `endif + Execution_output result1= tagged RESULT(Arithout{aluresult:res,fflags:0}); + if({eEpoch,wEpoch}!=rx.u.first.epochs)begin + `ifdef verbose $display($time,"Epochs do not match"); `endif + end + else begin + tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause, + program_counter:pc, exception:exception, + destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instr `endif }); + if(dest!=0) + wr_forward_from_EXE <= tagged Valid tuple3(res,rg_prf_index,rg_pid); + end + multicylce_output[1]<=False; + endrule + `endif + `ifdef parallel + rule read_outputs_from_muldiv(rx.u.notEmpty &&& tx.u.notFull &&& multicylce_output[1] &&& rg_mul_output matches tagged Valid .x); + rg_mul_output<=tagged Invalid; + rx.u.deq; + let decodedata=rx.u.first; + let pc=decodedata.program_counter; + let dest=decodedata.destination; + let rdtype=decodedata.rdtype; + let exception=decodedata.exception; + `ifdef simulate let instr=decodedata.instruction; `endif + Execution_output result1= tagged RESULT(Arithout{aluresult:x,fflags:0}); + if({eEpoch,wEpoch}!=rx.u.first.epochs)begin + `ifdef verbose $display($time,"Epochs do not match"); `endif + end + else begin + tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause, + program_counter:pc, exception:exception, + destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instr `endif }); + if(dest!=0) + wr_forward_from_EXE <= tagged Valid tuple3(x,rg_prf_index,rg_pid); + end + multicylce_output[1]<=False; + endrule + `endif + `endif + + `ifdef spfpu + rule read_output_from_fpu(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] ); + let res<-fpu.get_result; + rx.u.deq; + let decodedata=rx.u.first; + let pc=decodedata.program_counter; + let dest=decodedata.destination; + let rdtype=decodedata.rdtype; + let exception=decodedata.exception; + `ifdef simulate let instr=decodedata.instruction; `endif + Execution_output result1= tagged RESULT(Arithout{aluresult:res.final_result, fflags:res.fflags}); + if({eEpoch,wEpoch}!=rx.u.first.epochs)begin + `ifdef verbose $display($time,"Epochs do not match"); `endif + end + else begin + tx.u.enq(IE_IMEM_type{execresult:result1, debugcause:rx.u.first.debugcause, + program_counter:pc, exception:exception, + destination:dest, rd_type:rdtype, index:rg_prf_index,pid:rg_pid , perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs + `ifdef simulate , instruction:instr `endif }); + if((dest!=0 && rdtype==IntegerRF) || rdtype==FloatingRF) + wr_forward_from_EXE <= tagged Valid tuple3(res.final_result,rg_prf_index,rg_pid); + end + multicylce_output[1]<=False; + endrule + `endif + interface to_dmem = interface Get + method ActionValue#(Tuple2#(Memout,Bit#(1))) get ; + `ifdef verbose $display($time,"\tEXECUTION: DEQUEING MEM REQUEST",fshow(wr_info_to_dmem.first)); `endif + wr_info_to_dmem.deq; + return wr_info_to_dmem.first; + endmethod + endinterface; + method tx_out=tx.e; + method rx_in=rx.e; + method Action roundingmode(Bit#(3) rm); + wr_rounding_mode<=rm; + endmethod + method generate_flush=tuple2(rg_flush_execute,rg_effective_address); + method Maybe#(Training_data#(`VADDR)) training_data=wr_training_data; + method Maybe#(Bit#(`VADDR)) ras_push = wr_ras_push; + method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data); + if(fwd_data matches tagged Valid .fwdata)begin + let {data,index,pid}=fwdata; + if(wr_mxl==1) begin// 32-bits + data=signExtend(data[31:0]); + end + prf.fwd_from_memory(data,index,pid); + end + endmethod + method Action update_wEpoch; + `ifdef verbose $display($time,"\tEXECUTION: Updating wEPOCH"); `endif + wEpoch<=~wEpoch; + wb_flush.send; + endmethod + method Action flush_prf; + prf.flush_all; + endmethod + method Action inferred_xlen(Bit#(2) mxl); + wr_mxl <=mxl; + endmethod + endmodule +endpackage:execute_stage diff --git a/src/core/fetch_stage.bsv b/src/core/fetch_stage.bsv new file mode 100644 index 0000000..4f1b996 --- /dev/null +++ b/src/core/fetch_stage.bsv @@ -0,0 +1,175 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package fetch_stage; + /*========= package imports========== */ + import FIFOF::*; + import SpecialFIFOs::*; + import Connectable::*; + import GetPut::*; + import DReg::*; + /*==================================== */ + /* ======== project imports ========= */ + import TxRx ::*; + import defined_types::*; + `include "defined_parameters.bsv" + import Stack::*; + /* ================================== */ + + interface Ifc_fetch; + /*============================ Miscellaneous interface =========================== */ + method Action flush (Bit#(`VADDR) new_pc, Flush_type fl); + method Action stall_fetch(Bool stall); + interface Get#(Tuple5#(Bit#(2),Bit#(`VADDR),Bit#(`VADDR),Bool, Bit#(3))) request_to_imem; + method Action instruction_response_from_imem(Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3))) x); + interface Put#(Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2))) prediction_response; + interface Get#(Tuple2#(Bit#(3),Bit#(`VADDR))) send_prediction_request; + interface TXe#(IF_ID_type) tx_out; // pipe interface to the external FIFO; + method Action push_ras(Maybe#(Bit#(`VADDR)) addr); + method Action update_eEpoch; + method Action update_wEpoch; + /*============================================================================== */ + endinterface:Ifc_fetch + + (*synthesize*) + (*preempts="flush,enque_new_pc"*) + (*conflict_free="enque_new_pc,prediction_response_put"*) + module mkfetch#(Bit#(`VADDR) reset_vector)(Ifc_fetch); + FIFOF#(Tuple3#(Bit#(2),Bit#(`VADDR), Bit#(`VADDR))) generate_pc<-mkLFIFOF(); + Reg#(Bit#(`VADDR)) rg_programcounter[3]<-mkCReg(3,'h1000); + Reg#(Bit#(1)) eEpoch <-mkReg(0); + Reg#(Bit#(1)) wEpoch <-mkReg(0); + Reg#(Bit#(1)) iEpoch[2] <-mkCReg(2,0); + Wire#(Maybe#(Bit#(`VADDR))) wr_flush_prediction <-mkDWire(tagged Invalid); + + Wire#(Bool) wr_stall_fetch <- mkDWire(False); + Reg#(Bool) rg_fence[2]<-mkCReg(2,False); + FIFOF#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32),Trap_type, Bit#(`PERFMONITORS),Bit#(3))) ff_response_to_cpu <-mkSizedBypassFIFOF(1); + TX#(IF_ID_type) tx <-mkTX; + Ifc_Stack ras <-mkStack; + + rule get_instruction_from_cache; + let {pc,prediction,npc,instruction,trap,perfmonitors,epoch}=ff_response_to_cpu.first; + ff_response_to_cpu.deq; + Bool rs1_link=case (instruction[19:15]) matches 'b00?01:True; default :False; endcase; + Bool rd_link=case (instruction[11:7]) matches 'b00?01:True; default :False; endcase; + `ifdef verbose $display($time,"\t************* FETCH STAGE FIRING ************ PC: %h Instr-EPOCHS: %b Current_Epochs: %b",pc, epoch,{iEpoch[0],eEpoch,wEpoch}); `endif + if(epoch!={iEpoch[0],eEpoch,wEpoch})begin + `ifdef verbose $display($time,"\tFETCH: Dropping Instruction Since Epochs do not match"); `endif + end + else begin + if(instruction[6:2] matches 'b110?1 &&& !rd_link &&& rs1_link)begin + if(!ras.empty)begin + let x<-ras.top; + if(x!=npc || prediction[1]==0)begin + npc=x; + iEpoch[0]<=~iEpoch[0]; + wr_flush_prediction<=tagged Valid npc; + prediction='b10; + end + `ifdef verbose $display($time,"TAKING RAS as the NEXT PC: %h",x); `endif + end + end + else if((prediction[1]==1 && instruction[6:4]!='b110))begin + iEpoch[0]<=~iEpoch[0]; + wr_flush_prediction<=tagged Valid (pc+4); + prediction[1]=0; + end + tx.u.enq(IF_ID_type{program_counter:pc, + instruction:instruction[31:0], + nextpc:npc, + prediction:prediction, + perfmonitors:perfmonitors, + exception:trap, + epochs:{eEpoch,wEpoch} + }); + `ifdef verbose $display($time,"\tInstruction Fetched: %h \t PC: %h PERF: %h Prediction: ",instruction,pc,perfmonitors,fshow(prediction)," next pc: %h",npc); `endif + end + endrule + + rule enque_new_pc(wr_flush_prediction matches tagged Valid .newpc); + `ifdef verbose $display($time,"\tFETCH: Enquiing new PC to ICACHE: %h",newpc); `endif + generate_pc.enq(tuple3('b00,newpc,newpc+4)); + endrule + +// /* ================================== Methods and interface definitions =======================*/ + interface request_to_imem = interface Get + method ActionValue#(Tuple5#(Bit#(2),Bit#(`VADDR),Bit#(`VADDR),Bool, Bit#(3))) get if(tx.u.notFull && !wr_stall_fetch); + let {prediction,pc,npc}=generate_pc.first; + `ifdef verbose $display($time,"\tFETCH: Address sent to IMEM: %h epochs: %b",pc,{iEpoch[0],eEpoch,wEpoch}); `endif + rg_fence[0]<=False; + if(!rg_fence[0]) + generate_pc.deq; + return tuple5(prediction,npc,pc,rg_fence[0],{iEpoch[0],eEpoch,wEpoch}); + endmethod + endinterface; + method Action instruction_response_from_imem(Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3))) x); + if(x matches tagged Valid .instr1)begin + let {pc,prediction,npc,instruction,trap,perfmonitors,epoch}=instr1; + `ifdef verbose $display($time,"\tFETCH: GOT Instructions: ",fshow(instr1)); `endif + ff_response_to_cpu.enq(instr1); + end + endmethod + method Action flush (Bit#(`VADDR) new_pc, Flush_type fl); + `ifdef verbose $display($time,"\tFETCH: Flushing New PC: %h",new_pc); `endif + rg_programcounter[1]<=new_pc; + generate_pc.clear; + if(fl==Fence) + rg_fence[1]<=True; + endmethod + method Action stall_fetch(Bool stall); + wr_stall_fetch <= stall; + endmethod + interface tx_out = tx.e; + method Action push_ras(Maybe#(Bit#(`VADDR)) addr); + if(addr matches tagged Valid .x)begin + `ifdef verbose $display($time,"RAS: Pushing Addr: %h",x); `endif + ras.push(x); + end + endmethod + interface prediction_response=interface Put + method Action put (Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2)) x); + let {epoch,pc,npc,prediction}=x; + if(epoch=={iEpoch[1],eEpoch,wEpoch} &&& wr_flush_prediction matches tagged Invalid)begin + if(prediction[1]==0) + npc=pc+4; + rg_programcounter[0]<=npc; + `ifdef verbose $display($time,"\tFETCH: Got prediction from BPU: %b for PC: %h New PC: %h",prediction,pc,npc); `endif + generate_pc.enq(tuple3(prediction,pc,npc)); + end + `ifdef verbose + else + $display($time,"\tFETCH: Dropping response from BPU for PC: %h",pc); `endif + endmethod + endinterface; + interface send_prediction_request=interface Get + method ActionValue#(Tuple2#(Bit#(3),Bit#(`VADDR))) get; + if(wr_flush_prediction matches tagged Valid .newpc)begin + `ifdef verbose $display($time,"\tFETCH: Sending Program Counter to BPU: %h",newpc+4); `endif + return tuple2({iEpoch[1],eEpoch,wEpoch},newpc+4); + end + else begin + `ifdef verbose $display($time,"\tFETCH: Sending Program Counter to BPU: %h",rg_programcounter[2]); `endif + return tuple2({iEpoch[1],eEpoch,wEpoch},rg_programcounter[2]); + end + endmethod + endinterface; + method Action update_eEpoch; + eEpoch<=~eEpoch; + endmethod + method Action update_wEpoch; + wEpoch<=~wEpoch; + endmethod + /*================================================================================================= */ + endmodule:mkfetch +endpackage:fetch_stage diff --git a/src/core/fpu/fpu.bsv b/src/core/fpu/fpu.bsv new file mode 100644 index 0000000..a283fd1 --- /dev/null +++ b/src/core/fpu/fpu.bsv @@ -0,0 +1,581 @@ +/* +Authors : Vinod.G, Arjun Menon +Email : g.vinod1993@gmail.com, c.arjunmenon@gmail.com +Last update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ +package fpu; +/*==== Project imports ==== */ +`include "defined_parameters.bsv" +import defined_types::*; +import fpu_compare_min_max::*; +import fpu_int_to_sp::*; +import fpu_int_to_dp::*; +import fpu_sign_injection::*; +import fpu_divider::*; +import fpu_sqrt::*; +import fpu_sp_to_int::*; +import fpu_dp_to_int::*; +import fpu_fm_add_sub::*; +import fpu_convert_sp_dp::*; +import fpu_fclass::*; +/*========================= */ +/*===== Package imports ==== */ +import FIFO::*; +import FIFOF::*; +import SpecialFIFOs::*; +import DReg::*; +import UniqueWrappers::*; +import SpecialFIFOs::*; +import Clocks::*; +/*========================= */ + +interface Ifc_fpu; //interface to module mk_fpu + method Action _start(Bit#(`FLEN) operand1, Bit#(`FLEN) operand2, Bit#(`FLEN) operand3, Bit#(4) opcode, Bit#(7) funct7, Bit#(3) funct3, Bit#(2) imm, Bit#(3) fsr, Bool issp); + method ActionValue#(Floating_output#(`FLEN)) get_result; + method Action flush; +endinterface + +typedef struct{ + Bit#(`FLEN) operand1; + Bit#(`FLEN) operand2; + Bit#(`FLEN) operand3; + Bit#(4) opcode; + Bit#(7) funct7; + Bit#(3) funct3; + Bit#(2) imm; + Bit#(3) fsr; + Bool issp; + }Input_Packet deriving (Bits,Eq); + +(*synthesize*) +module mkfpu(Ifc_fpu); + // ============================================ + // Decode and Maintenance Registers + // ============================================ + FIFO# (Floating_output#(`FLEN)) ff_result <- mkFIFO1; + FIFO# (Input_Packet) ff_input <- mkFIFO1; + Wire#(Bool) wr_flush<-mkDWire(False); + // ============================================= + + // ============================================== + // Module Instantiations + // ============================================== + + `ifdef fpu_hierarchical + Ifc_fpu_compare_min_max32 inst_fpu_compare_min_max <- mkfpu_compare_min_max32(); + Ifc_fpu_sign_injection32 inst_spfpu_sign_injection <- mkfpu_sign_injection32(); + Ifc_fpu_divider32 inst_spfpu_divider <- mkfpu_divider32(); + Ifc_fpu_sqrt32 inst_spfpu_sqrt <- mkfpu_sqrt32(); + Ifc_fpu_fm_add_sub32 inst_spfm_add_sub <- mkfpu_fm_add_sub32(); + Ifc_fpu_fclass32 inst_spfpu_fclass <- mkfpu_fclass32(); + + Ifc_fpu_compare_min_max64 inst_dpfpu_compare_min_max <- mkfpu_compare_min_max64(); + Ifc_fpu_sign_injection64 inst_dpfpu_sign_injection <- mkfpu_sign_injection64(); + Ifc_fpu_divider64 inst_dpfpu_divider <- mkfpu_divider64(); + Ifc_fpu_sqrt64 inst_dpfpu_sqrt <- mkfpu_sqrt64(); + Ifc_fpu_fm_add_sub64 inst_dpfm_add_sub <- mkfpu_fm_add_sub64(); + Ifc_fpu_fclass64 inst_dpfpu_fclass <- mkfpu_fclass64(); + + `else + Ifc_fpu_compare_min_max#(32,23,8) inst_fpu_compare_min_max <- mkfpu_compare_min_max(); // No Flush + Ifc_fpu_sign_injection#(32,23,8) inst_spfpu_sign_injection <- mkfpu_sign_injection(); // No Flush + Ifc_fpu_divider#(32,23,8) inst_spfpu_divider <- mkfpu_divider(); + Ifc_fpu_sqrt#(32,23,8) inst_spfpu_sqrt <- mkfpu_sqrt(); + Ifc_fpu_fm_add_sub#(32,23,8) inst_spfm_add_sub <- mkfpu_fm_add_sub(); + Ifc_fpu_fclass#(32,23,8) inst_spfpu_fclass <- mkfpu_fclass(); // No Flush + + Ifc_fpu_compare_min_max#(64,52,11) inst_dpfpu_compare_min_max <- mkfpu_compare_min_max(); // No Flush + Ifc_fpu_sign_injection#(64,52,11) inst_dpfpu_sign_injection <- mkfpu_sign_injection(); // No Flush + Ifc_fpu_divider#(64,52,11) inst_dpfpu_divider <- mkfpu_divider(); + Ifc_fpu_sqrt#(64,52,11) inst_dpfpu_sqrt <- mkfpu_sqrt(); + Ifc_fpu_fm_add_sub#(64,52,11) inst_dpfm_add_sub <- mkfpu_fm_add_sub(); + Ifc_fpu_fclass#(64,52,11) inst_dpfpu_fclass <- mkfpu_fclass(); // No Flush + `endif + + Ifc_fpu_sp_to_int inst_spfp_to_int <- mkfpu_sp_to_int(); // No Flush + Ifc_fpu_convert_sp_dp inst_spfpu_cnvt <- mkfpu_convert_sp_dp(); // No Flush + Ifc_fpu_int_to_sp inst_fpu_int_to_fp <- mkfpu_int_to_sp(); // No Flush + + Ifc_fpu_dp_to_int inst_dpfp_to_int <- mkfpu_dp_to_int(); // No Flush + Ifc_fpu_convert_dp_sp inst_dpfpu_cnvt <- mkfpu_convert_dp_sp(); // No Flush + Ifc_fpu_int_to_dp inst_dpfpu_int_to_fp <- mkfpu_int_to_dp(); // No Flush + + +// ============================================== + +// ============================================== +// Function definitions +// ============================================== + function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z); + let s = valueOf(m); + let mantissa1 = tpl_1(x); + let exponent1 = tpl_2(x); + let mantissa2 = tpl_1(y); + let exponent2 = tpl_2(y); + let mantissa3 = tpl_1(z); + let exponent3 = tpl_2(z); + bit man10 = |mantissa1; bit man20 = |mantissa2; bit man30 = |mantissa3; + bit exp10 = |exponent1; bit exp20 = |exponent2; bit exp30 = |exponent3; + bit man11 = &mantissa1; bit man21 = &mantissa2; bit man31 = &mantissa3; + bit exp11 = &exponent1; bit exp21 = &exponent2; bit exp31 = &exponent3; + Bit#(5) flags1, flags2,flags3; + Bool expZ1 = (exp10 == 0); + Bool manZ1 = (man10 == 0); + Bool expO1 = (exp11 == '1); + Bool manO1 = (man11 == '1); + Bool topB1 = (mantissa1[s-1] == 1); + Bool expZ2 = (exp20 == 0); + Bool manZ2 = (man20 == 0); + Bool expO2 = (exp21 == '1); + Bool manO2 = (man21 == '1); + Bool topB2 = (mantissa2[s-1] == 1 && man20 ==1); + Bool expZ3 = (exp30 == 0); + Bool manZ3 = (man30 == 0); + Bool expO3 = (exp31 == 1); + Bool manO3 = (man31 == 1); + Bool topB3 = (mantissa3[s-1] == 1 && man30 ==1); + flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN + return tuple3(flags1,flags2,flags3); + endfunction + + function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let expo = valueOf(e); + let man = valueOf(m); + return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]); + endfunction + + function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let inp = valueOf(n); + let man = valueOf(m); + return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]); + endfunction + + function Bool isNaNBox(Bit#(64) op); + return (&(op[63:32])==1); + endfunction + + function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3); + return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000); + endfunction + +// ================================================ +// Function Wrappers +// ================================================ + Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN); + Reg#(Bool) rg_multicycle_op <-mkReg(False); + + + (*mutually_exclusive="rl_get_output_from_spfpu_divider, rl_get_output_from_dpfpu_divider,rl_get_output_from_spfpu_sqrt, rl_get_output_from_dpfpu_sqrt,rl_get_output_from_fm_add_sub,rl_get_output_from_dpfm_add_sub"*) + + + rule start_stage; +/// Bool issp = (funct7[0] == 0); + let input_packet = ff_input.first; + Bit#(`FLEN) operand1 = input_packet.operand1; + Bit#(`FLEN) operand2 = input_packet.operand2; + Bit#(`FLEN) operand3 = input_packet.operand3; + Bit#(4) opcode = input_packet.opcode; + Bit#(7) funct7 = input_packet.funct7; + Bit#(3) funct3 = input_packet.funct3; + Bit#(2) imm = input_packet.imm; + Bit#(3) fsr = input_packet.fsr; + Bool issp = input_packet.issp; + ff_input.deq; + funct3 = (funct3 == 'b111) ? fsr : funct3; + if(((funct7[6:2]==`FCMP_f5) || funct7[6:2] == `FMMAX_f5) && opcode == `FP_OPCODE)begin // compare min max + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,'1); + let {man1,man2,man3} <- getMant32.func(op1, op2,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, op2,0); + let {flags1,flags2,flags3} <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(0,0)); + let sign1 = op1[31]; + let sign2 = op2[31]; + let x<-inst_fpu_compare_min_max._start(op1,op2,funct3,funct7[2],tuple2(flags1,flags2)); + Floating_output#(`Reg_width) y=?; + `ifdef dpfpu + y.final_result=funct7[2]==0?zeroExtend(x.final_result):{'1,x.final_result}; + `else + y.final_result=x.final_result; + `endif + y.fflags=x.fflags; + ff_result.enq(y); + end + else begin + let {man3,man4,man5} <- getMant64.func(operand1, operand2,0); + let {exp3,exp4,exp5} <- getExp64.func(operand1,operand2,0); + let {f1,f2,f3} <- condFlags64.func(tuple2(man3,exp3),tuple2(man4,exp4),tuple2(0,0)); + let sign3 = operand1[63]; + let sign4 = operand2[63]; + let x<-inst_dpfpu_compare_min_max._start(operand1,operand2,funct3,funct7[2],tuple2(f1,f2)); + ff_result.enq(x); + end + `ifdef verbose $display($time,"\tGiving inputs to fpu_compare_min_max %h operand2 %h funct7 : %h",operand1, operand2,funct7); `endif + end + else if((funct7[6:2]==`FCVT_F_I_f5) && opcode == `FP_OPCODE) begin + if(issp)begin + let x <-inst_fpu_int_to_fp._start(operand1, imm[0],imm[1], funct3); + Floating_output#(`Reg_width) y=?; + `ifdef dpfpu + y.final_result={'1,x.final_result}; + `else + y.final_result=x.final_result; + `endif + y.fflags=x.fflags; + ff_result.enq(y); + end + else begin + let x<-inst_dpfpu_int_to_fp._start(operand1,imm[0],imm[1],funct3); + ff_result.enq(x); + end + `ifdef verbose $display($time,"\tGiving inputs to fpu_int_to_fp %h operand2[0] %h operand2[1] : %h",operand1, operand2[0],operand2[1]); `endif + end + else if((funct7[6:2] == `FSGNJN_f5) && opcode == `FP_OPCODE)begin + if(issp)begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,'1); + let x <-inst_spfpu_sign_injection._start(op1, op2, funct3); + Floating_output#(`Reg_width) y=?; + `ifdef dpfpu + y.final_result={'1,x.final_result}; + `else + y.final_result=x.final_result; + `endif + y.fflags=x.fflags; + ff_result.enq(y); + end + else begin + let x<-inst_dpfpu_sign_injection._start(operand1,operand2,funct3); + ff_result.enq(x); + end + `ifdef verbose $display($time,"\tGiving inputs to the fpu sign injection"); `endif + end + else if((funct7[6:2] == `FCVT_I_F_f5) && opcode == `FP_OPCODE) begin + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,'0,'0); + let {man1,man2,man3} <- getMant32.func(op1, 0,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, 0,0); + let {flags1,flags2,flags3} <- condFlags32.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + let x <- inst_spfp_to_int._start(op1[31],exp1,man1, imm[0],imm[1],funct3,flags1); + ff_result.enq(x); + end + else begin + let {man4,man5,man6} <- getMant64.func(operand1, 0,0); + let {exp4,exp5,exp6} <- getExp64.func(operand1, 0,0); + let {flags4,flags5,flags6} <- condFlags64.func(tuple2(man4,exp4),tuple2(0,0),tuple2(0,0)); + let x<-inst_dpfp_to_int._start(operand1[63],exp4,man4,imm[0],imm[1],funct3,flags4); + ff_result.enq(x); + end + `ifdef verbose $display($time,"\tGiving Inputs to fpu to int Conversion Module"); `endif + end + else if(((funct7[6:2] == `FCLASS_f5)&&(funct3=='b001))&&(opcode == `FP_OPCODE))begin + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,'1,'1); + let {man1,man2,man3} <- getMant32.func(op1, 0,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, 0,0); + let {x1,x2,x3} <- condFlags32.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + let x<-inst_spfpu_fclass._start(op1[31],man1,exp1,x1); + Floating_output#(`FLEN) y = Floating_output{final_result:zeroExtend(x.final_result),fflags:x.fflags}; + ff_result.enq(y); + end + else begin + let {man1,man2,man3} <- getMant64.func(operand1, 0,0); + let {exp1,exp2,exp3} <- getExp64.func(operand1, 0,0); + let {x1,x2,x3} <- condFlags64.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + let x<-inst_dpfpu_fclass._start(operand1[63],man1,exp1,x1); + ff_result.enq(x); + end + `ifdef verbose $display($time,"\tGiving inputs to floating classify module"); `endif + end + else if((funct7[6:2] == `FCVT_S_D_f5) && opcode == `FP_OPCODE)begin + if(!issp) begin + `ifdef verbose $display("Giving inputs to Convert SP to DP"); `endif + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,'1,'1); + let {man1,man2,man3} <- getMant32.func(op1, 0,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, 0,0); + let {x1,x2,x3} <- condFlags32.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + let x<-inst_spfpu_cnvt._start(op1[31],exp1,man1,funct3,x1); + ff_result.enq(x); + end + else begin + `ifdef verbose $display("Giving inputs to Convert DP to SP"); `endif + let {man1,man2,man3} <- getMant64.func(operand1, 0,0); + let {exp1,exp2,exp3} <- getExp64.func(operand1, 0,0); + let {x1,x2,x3} <- condFlags64.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + let x<-inst_dpfpu_cnvt._start(operand1[63],exp1,man1,funct3,x1); + Floating_output#(`Reg_width) y=?; + `ifdef dpfpu + y.final_result = {'hffffffff,x.final_result[31:0]}; + `else + y.final_result = x.final_result; + `endif + y.fflags=x.fflags; + ff_result.enq(y); + end + end + else if(((funct7 == `FMV_X_S_f7 || funct7 == `FMV_S_X_f7) && funct3 == 'b000) && opcode == `FP_OPCODE)begin + `ifdef verbose $display($time,"\tGiving inputs to FMV"); `endif + Bit#(`FLEN) final_result=0; + if(funct7==`FMV_X_S_f7) // sp to integer FMV.X.W + final_result = signExtend(operand1[31:0]); + else // integer to sp FMV.W.X + `ifdef dpfpu + final_result = {'1,operand1[31:0]}; + `else + final_result= operand1[31:0]; + `endif + ff_result.enq(Floating_output{final_result:final_result, fflags : 0}); + end + else if(((funct7 == `FMV_X_D_f7 || funct7 == `FMV_D_X_f7) && funct3 == 'b000) && opcode == `FP_OPCODE)begin // TODO merge with above condition + `ifdef verbose $display($time,"\tGiving inputs to FMV"); `endif + Bit#(`FLEN) final_result=0; + if(funct7==`FMV_X_D_f7) // sp to integer FMV.X.W + final_result = operand1; + else // integer to sp FMV.W.X + final_result= operand1; + ff_result.enq(Floating_output{final_result:final_result, fflags : 0}); + end + else if(((funct7[6:2] == `FADD_f5 || funct7[6:2] == `FSUB_f5) && opcode == `FP_OPCODE))begin // add sub + rg_multicycle_op<=True; + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,'1); + let {man1,man2,man3} <- getMant32.func(32'h3f800000,op1, op2); + let {exp1,exp2,exp3} <- getExp32.func(32'h3f800000,op1, op2); + let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = 0; + let sign2 = op1[31]; + let sign3 = op2[31]; + inst_spfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),funct3,funct7[2],1'b0,1'b0,0,x); + end + else begin + let {man1,man2,man3} <- getMant64.func(64'h3ff0000000000000,operand1, operand2); + let {exp1,exp2,exp3} <- getExp64.func(64'h3ff0000000000000,operand1, operand2); + let x <- condFlags64.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = 0; + let sign2 = operand1[63]; + let sign3 = operand2[63]; + inst_dpfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2), tuple3(sign3,exp3,man3), funct3, funct7[2], 1'b0, 1'b0,0,x); + end + `ifdef verbose $display($time,"\tGiving inputs to the fpu add_sub"); `endif + end + else if((funct7[6:2] == `FDIV_f5) && opcode == `FP_OPCODE)begin// spfpu divider + rg_multicycle_op<=True; + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,'1); + let {man1,man2,man3} <- getMant32.func(op1, op2,op3); + let {exp1,exp2,exp3} <- getExp32.func(op1, op2,op3); + let {f1,f2,f3} <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + inst_spfpu_divider._start(op1[31]^op2[31],man1,exp1,man2,exp2,funct3,tuple2(f1,f2)); + end + else begin + let {man3,man4,man5} <- getMant64.func(operand1, operand2,0); + let {exp3,exp4,exp5} <- getExp64.func(operand1,operand2,0); + let {y1,y2,y3} <- condFlags64.func(tuple2(man3,exp3),tuple2(man4,exp4),tuple2(0,0)); + inst_dpfpu_divider._start(operand1[63]^operand2[63],man3,exp3,man4,exp4,funct3, tuple2(y1,y2)); + end + `ifdef verbose $display($time,"\tGiving inputs to the spfpu divider"); `endif + end + else if((funct7[6:2] == `FSQRT_f5) && opcode == `FP_OPCODE)begin// sqrt + rg_multicycle_op<=True; + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,'1,'1); + let {man1,man2,man3} <- getMant32.func(op1, 0,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, 0,0); + let x <- condFlags32.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + inst_spfpu_sqrt._start(operand1[31], man1, exp1, funct3, tpl_1(x)); + end + else begin + let {man3,man4,man5} <- getMant64.func(operand1, 0,0); + let {exp3,exp4,exp5} <- getExp64.func(operand1,0,0); + let y <- condFlags64.func(tuple2(man3,exp3),tuple2(0,0),tuple2(0,0)); + inst_dpfpu_sqrt._start(operand1[63], man3, exp3, funct3, tpl_1(y)); + `ifdef verbose $display($time,"\tGiving inputs to the spfpu sqrt"); `endif + end + end + else if((funct7[6:2] == `FMUL_f5) && opcode == `FP_OPCODE)begin + rg_multicycle_op<=True; + `ifdef verbose $display("funct3 : %h",funct3); `endif + if(issp) begin + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,'1); + let {man1,man2,man3} <- getMant32.func(op1, op2,0); + let {exp1,exp2,exp3} <- getExp32.func(op1, op2,0); + let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = op1[31]; + let sign2 = op2[31]; + let sign3 = 0; + inst_spfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),funct3,1'b1,1'b0,1'b1,0,x); + end + else begin + let {man1,man2,man3} <- getMant64.func(operand1, operand2,0); + let {exp1,exp2,exp3} <- getExp64.func(operand1, operand2,0); + let x <- condFlags64.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = operand1[63]; + let sign2 = operand2[63]; + let sign3 = 0; + inst_dpfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3), funct3, 1'b1, 1'b0, 1'b1,0,x); + end + `ifdef verbose $display($time,"\tGiving inputs to the spfloating multiplier module"); `endif + end + else if((opcode == 'b0000) || (opcode == 'b0001) || (opcode == 'b0010) || opcode == 'b0011)begin + rg_multicycle_op<=True; + if(issp) begin + `ifdef verbose $display($time,"\tGiving Inputs to sp fused multiply add Conversion Module"); `endif + let {op1,op2,op3} <- setCanonicalNaN.func(operand1,operand2,operand3); + let {man1,man2,man3} <- getMant32.func(op1, op2,op3); + let {exp1,exp2,exp3} <- getExp32.func(op1, op2,op3); + let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = op1[31]; + let sign2 = op2[31]; + let sign3 = op3[31]; + inst_spfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),funct3,opcode[0]^opcode[1],opcode[1],1'b0,1,x); + end + else begin + `ifdef verbose $display($time,"\tGiving Inputs to dp fused multiply add Conversion Module"); `endif + let {man1,man2,man3} <- getMant64.func(operand1, operand2,operand3); + let {exp1,exp2,exp3} <- getExp64.func(operand1, operand2,operand3); + let x <- condFlags64.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = operand1[63]; + let sign2 = operand2[63]; + let sign3 = operand3[63]; + inst_dpfm_add_sub._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3), funct3, opcode[0]^opcode[1],opcode[1], 1'b0,1,x); + end + `ifdef verbose $display($time,"\tOperand 1: %h Operand 2: %h Operand 3: %h",operand1, operand2, operand3); `endif + end + endrule + + //rule to get output from spfpu divider + rule rl_get_output_from_spfpu_divider(!wr_flush && rg_multicycle_op); + `ifdef verbose $display($time,"\tGot output from spfpu divider"); `endif + let x= inst_spfpu_divider.final_result_; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + `ifdef dpfpu + y.final_result={'1,x.final_result}; + `else + y.final_result=x.final_result; + `endif + ff_result.enq(y); + rg_multicycle_op<=False; + endrule + + //rule to get output from spfpu divider + rule rl_get_output_from_dpfpu_divider(!wr_flush && rg_multicycle_op); + `ifdef verbose $display($time,"\tGot output from spfpu divider"); `endif + let x= inst_dpfpu_divider.final_result_; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + y.final_result=x.final_result; + ff_result.enq(y); + rg_multicycle_op<=False; + endrule + + //rule to give inputs to spfpu square root module + + //rule to get output spfpu square root module + rule rl_get_output_from_spfpu_sqrt(inst_spfpu_sqrt.get_result matches tagged Valid .res &&& !wr_flush &&& rg_multicycle_op); // TODO check for inexact and underflow + `ifdef verbose $display($time,"\tGot output from spfpu sqrt"); `endif + let x = res; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + `ifdef dpfpu + y.final_result={'1,x.final_result}; + `else + y.final_result=x.final_result; + `endif + ff_result.enq(y); + rg_multicycle_op<=False; + endrule + + //rule to get output spfpu square root module + rule rl_get_output_from_dpfpu_sqrt(inst_dpfpu_sqrt.get_result matches tagged Valid .res &&& !wr_flush &&& rg_multicycle_op); // TODO check for inexact and underflow + `ifdef verbose $display($time,"\tGot output from spfpu sqrt"); `endif + let x = res; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + y.final_result=x.final_result; + ff_result.enq(y); + rg_multicycle_op<=False; + endrule + + + //rule to get output from fused multiply add sub + rule rl_get_output_from_fm_add_sub(!wr_flush && rg_multicycle_op); + `ifdef verbose $display($time,"\tGot output from sp fused multiple add conversion Module"); `endif + let x= inst_spfm_add_sub.get_result; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + `ifdef dpfpu + y.final_result={'hffffffff,x.final_result[31:0]}; + `else + y.final_result=x.final_result; + `endif + ff_result.enq(y); + `ifdef verbose $display($time,"\tFMA Result : %16h", y.final_result); `endif + rg_multicycle_op<=False; + endrule + + //rule to get output from fused multiply add sub + rule rl_get_output_from_dpfm_add_sub(!wr_flush && rg_multicycle_op); + `ifdef verbose $display($time,"\tGot output from sp fused multiple add conversion Module"); `endif + let x= inst_dpfm_add_sub.get_result; + Floating_output#(`FLEN) y=?; + y.fflags=x.fflags; + y.final_result=x.final_result; + ff_result.enq(y); + `ifdef verbose $display($time,"\tFMA Result : %16h", y.final_result); `endif + rg_multicycle_op<=False; + endrule + + rule flush_fifo(wr_flush); + ff_result.clear; + rg_multicycle_op<=False; + endrule + //rule to give inputs to spfloating multiplier + + // input method to start the floating point operation + + method Action _start(Bit#(`FLEN) operand1, Bit#(`FLEN) operand2, Bit#(`FLEN) operand3, Bit#(4) opcode, Bit#(7) funct7, Bit#(3) funct3, Bit#(2) imm, Bit#(3) fsr, Bool issp) if(!rg_multicycle_op); + ff_input.enq ( Input_Packet { + operand1 : operand1, + operand2 : operand2, + operand3 : operand3, + opcode : opcode, + funct7 : funct7, + funct3 : funct3, + imm : imm, + fsr : fsr, + issp : issp + }); + endmethod + + method ActionValue#(Floating_output#(`FLEN)) get_result; + ff_result.deq; + return ff_result.first; + endmethod + method Action flush; + wr_flush<=True; + inst_spfpu_divider.flush(); + inst_dpfpu_divider.flush(); + inst_spfpu_sqrt.flush(); + inst_dpfpu_sqrt.flush(); + inst_spfm_add_sub.flush(); + inst_dpfm_add_sub.flush(); + endmethod + +endmodule +endpackage diff --git a/src/core/fpu/fpu_compare_min_max.bsv b/src/core/fpu/fpu_compare_min_max.bsv new file mode 100644 index 0000000..3ce5fb7 --- /dev/null +++ b/src/core/fpu/fpu_compare_min_max.bsv @@ -0,0 +1,220 @@ +/* +Authors : Vinod.G, Arjun Menon, Aditya Govardhan +Email : g.vinod1993@gmail.com, c.arjunmenon@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ + + +package fpu_compare_min_max; +import defined_types::*; +`include "defined_parameters.bsv" + +interface Ifc_fpu_compare_min_max#(numeric type fpinp, numeric type fpman, numeric type fpexp); + method ActionValue#(Floating_output#(fpinp)) _start(Bit#(fpinp) operand1,Bit#(fpinp) operand2, Bit#(3) which_cmp_instr,bit cmp_or_min_max,Tuple2#(Bit#(5),Bit#(5)) condFlags); +endinterface + +`ifdef fpu_hierarchical + interface Ifc_fpu_compare_min_max32; + method ActionValue#(Floating_output#(32)) _start(Bit#(32) operand1,Bit#(32) operand2, Bit#(3) which_cmp_instr,bit cmp_or_min_max,Tuple2#(Bit#(5),Bit#(5)) condFlags); + endinterface + + interface Ifc_fpu_compare_min_max64; + method ActionValue#(Floating_output#(64)) _start(Bit#(64) operand1,Bit#(64) operand2, Bit#(3) which_cmp_instr,bit cmp_or_min_max,Tuple2#(Bit#(5),Bit#(5)) condFlags); + endinterface +`endif + + +//(*noinline*) +function Bit#(2) fn_comparator(bit sign1, Bit#(fpexp) exponent1, Bit#(fpman) mantissa1, bit sign2, Bit#(fpexp) exponent2, Bit#(fpman) mantissa2); + + Bit#(2) magnitude; //01 means inp2's magnitude is greater than inp1's magnitude + //10 means inp1's magnitude is greater than inp2's magnitude + //11 means inp2's magnitude is equal to inp1's magnitude + if(exponent1 'd1150) begin + exception[2] = 1; + exception[0] = 1; + `ifdef verbose $display("overflow"); `endif + if(rounding_mode == 3'b001) //Round to zero + return {exception,sign,7'd-1,1'b0,23'd-1}; //Highest positive number 7f7fffff + else if (rounding_mode == 3'b010) //Round down + if(sign == 0) + return {exception,1'b0,7'd-1,1'b0,23'd-1}; + else + return {exception,1'b1,8'd-1,23'd0}; + else if (rounding_mode == 3'b011 && sign == 1) + return {exception,1'b1,7'd-1,1'b0,23'd-1}; + else + return {exception,sign,8'd-1,23'd0}; + end + else begin + `ifdef verbose $display("sign : %b exponent %b mantissa %b rounding %b flags %b",sign,exponent,mantissa,rounding_mode,flags); `endif + Bit#(32) res = 0; + Bit#(49) man = 0; + Bit#(8) expo = 0; + bit underflow = 0; + bit lv_guard = 0; + bit lv_denormal_roundup = 0; + let lv_sticky = |mantissa[26:0]; + `ifdef verbose $display("exponent : %d",exponent); `endif + if (exponent <= 'd872) begin //1023-127-24 Underflow + if(rounding_mode == 3'b010 && sign == 1) //Round Down + res = {1'b1,30'b0,1'b1}; + else if(rounding_mode == 3'b011 && sign == 0) + res = 1; + else + res = {sign,'0}; + underflow = 1; + exception[1] = 1; + exception[0] = 1; + end + else if (exponent <= 'd896) begin //Denormal number //Set sticky bit!!! + `ifdef verbose $display("Denormal lv_sticky : %b", lv_sticky); `endif + let shiftDist = 'd896 - exponent; + man = {1'b1,mantissa[51:27],23'd0} >> shiftDist; + if(man[23:0] != 0) + lv_sticky = 1; + `ifdef verbose $display("lv_guard : %b shiftDist : %d",lv_guard, shiftDist); `endif + expo = '0; + denormal = 1; + end + else begin //Normal number + expo = truncate(exponent - 'd896); + man = zeroExtendLSB(mantissa[51:27]); + `ifdef verbose $display("expo : %b man : %b",expo,man); `endif + end + lv_guard = man[25]; + let lv_round = man[24]; + let lv_inexact = 0; + let lv_round_up = 0; + if(((lv_guard | lv_round | lv_sticky)) == 1) + lv_inexact = 1; + if(denormal == 1 && lv_inexact == 1) begin + exception[1] = 1; + exception[0] = 1; + end + exception[0] = exception[0] | lv_inexact; + if(flags[2]==0 && flags[0] == 0 && flags[1]==0 && flags[3] == 0) begin + if(rounding_mode == 'b000) + lv_round_up = lv_guard & (lv_round|lv_sticky|mantissa[29]); + else if(rounding_mode == 'b100) + lv_round_up = lv_guard; //& (lv_round|lv_sticky|sign); + else if(rounding_mode == 'b011) + lv_round_up = (lv_guard|lv_round|lv_sticky) & ~sign; + else if(rounding_mode == 'b010) + lv_round_up = (lv_guard|lv_round|lv_sticky) & sign; + `ifdef verbose $display("lv_roundup : %b",lv_round_up); `endif + Bit#(24) fman = zeroExtend(man[48:26]); + `ifdef verbose $display("fman: %b",fman); `endif + if(lv_round_up == 1) + fman = fman + 1; + if(fman[23] == 1) + expo = expo + 1; + if(underflow==0) + res = {sign,expo,fman[22:0]}; + end + return {exception,res}; + end +endactionvalue; + +`ifdef fpu_hierarchical +(*synthesize*) +`endif +module mkfpu_convert_sp_dp(Ifc_fpu_convert_sp_dp); + method ActionValue#(Floating_output#(64))_start(Bit#(1) sign, Bit#(8) exponent, Bit#(23) mantissa, Bit#(3) rounding_mode, Bit#(5) flags); + `ifdef verbose $display("sign : %b exponent %b mantissa %b rounding_mode %b flags %b",sign,exponent,mantissa,rounding_mode,flags); `endif + let x = floatDouble(sign,exponent,mantissa,rounding_mode,flags); + return Floating_output{ + final_result : x[63:0], + fflags : x[68:64] + }; + endmethod +endmodule + +`ifdef fpu_hierarchical +(*synthesize*) +`endif +module mkfpu_convert_dp_sp(Ifc_fpu_convert_dp_sp); + method ActionValue#(Floating_output#(32)) _start(Bit#(1) sign, Bit#(11) exponent, Bit#(52) mantissa, Bit#(3) rounding_mode, Bit#(5) flags); + `ifdef verbose $display("sign : %b exponent %b mantissa %b rounding_mode %b flags %b",sign,exponent,mantissa,rounding_mode,flags); `endif + let x<- doubleFloat(sign,exponent,mantissa,rounding_mode,flags); + return Floating_output{ + final_result : x[31:0], + fflags : x[36:32] //raise flags + }; + endmethod +endmodule + +module mkTb(Empty); + +function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z); + let s = valueOf(m); + let man1 = tpl_1(x); + let expo1 = tpl_2(x); + let man2 = tpl_1(y); + let expo2 = tpl_2(y); + let man3 = tpl_1(z); + let expo3 = tpl_2(z); + Bit#(5) flags1, flags2,flags3; + Bool expZ1 = (expo1 == 0); + Bool manZ1 = (man1 == 0); + Bool expO1 = (expo1 == '1); + Bool manO1 = (man1 == '1); + Bool topB1 = (man1[s-1] == 1); + Bool expZ2 = (expo2 == 0); + Bool manZ2 = (man2 == 0); + Bool expO2 = (expo2 == '1); + Bool manO2 = (man2 == '1); + Bool topB2 = (man2[s-1] == 1 && man2 !=0); + Bool expZ3 = (expo3 == 0); + Bool manZ3 = (man3 == 0); + Bool expO3 = (expo3 == '1); + Bool manO3 = (man3 == '1); + Bool topB3 = (man3[s-1] == 1 && man3 !=0); + flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN + return tuple3(flags1,flags2,flags3); + endfunction + + function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let expo = valueOf(e); + let man = valueOf(m); + return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]); + endfunction + + function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let inp = valueOf(n); + let man = valueOf(m); + return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]); + endfunction + + function Bool isNaNBox(Bit#(64) op); + return (op[63:32]=='1); + endfunction + + function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3); + return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000); + endfunction +Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN); + + + Reg#(Bit#(32)) rg_clock <-mkReg(0); + //Reg#(Bit#(64)) wr_operand1 <- mkReg(64'h3721795900000000); + //Reg#(Bit#(64)) wr_operand1 <- mkReg(64'h372741b800000000); + Reg#(Bit#(64)) wr_operand1 <- mkReg(64'hffffffff7f814000); + //Reg#(Bit#(64)) rg__operand1ut1 <- mkReg(64'h019000000000000); + + Ifc_fpu_convert_dp_sp cvt <- mkfpu_convert_dp_sp; + + rule rl_clock; + rg_clock<=rg_clock+1; + if(rg_clock=='d60) begin + $finish(0); + end + endrule + + rule give__operand1ut(rg_clock==2); + let {man1,man2,man3} <- getMant64.func(wr_operand1, 0,0); + let {exp1,exp2,exp3} <- getExp64.func(wr_operand1, 0,0); + let {x1,x2,x3} <- condFlags64.func(tuple2(man1,exp1),tuple2(0,0),tuple2(0,0)); + `ifdef verbose $display("sign: %b exponent : %b mantissa : %b",wr_operand1[63],exp1,man1); `endif + `ifdef verbose $display("exponent: %d",exp1); `endif + let x <- cvt._start(wr_operand1[63],exp1,man1,3'b011,x1); + `ifdef verbose $display("Output= %h fflags %h" , x.final_result,x.fflags,$time); `endif + endrule + + +endmodule + +endpackage diff --git a/src/core/fpu/fpu_divider.bsv b/src/core/fpu/fpu_divider.bsv new file mode 100644 index 0000000..d535838 --- /dev/null +++ b/src/core/fpu/fpu_divider.bsv @@ -0,0 +1,667 @@ +/* +Authors : Vinod.G, Arjun Menon, Aditya Govardhan +Email : g.vinod1993@gmail.com, c.arjunmenon@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ +package fpu_divider; + import DReg::*; + import defined_types::*; //contains typedef of exceptions which are generated here + import integer_divider::*; //divider module + `include "defined_parameters.bsv" + import RegFile::*; + + typedef struct{ + Bit#(TAdd#(fpexp,2)) exponent; + Bit#(TAdd#(fpman,1)) dividend; + Bit#(TAdd#(fpman,1)) divisor; + bit sign; + bit invalid; + bit infinity; + bit dz; + bit zero; + Bit#(3) rounding_mode; + bit quiet_nan; + } Stage1_type#(numeric type fpman,numeric type fpexp) deriving (Bits,Eq); //Structure of 1st Stage of the pipeline + + typedef struct { + Bit#(fpexp2) exponent; + Bit#(1) sign; + Bit#(1) infinity; + Bit#(1) invalid; + Bit#(1) dz; + Bit#(1) zero; + Bit#(3) rounding_mode; + bit quiet_nan; + } Stage2_type#(numeric type fpexp2) deriving (Bits,Eq); + + typedef struct { + Bit#(TAdd#(fpman,4)) lv_quotient; + Bit#(TAdd#(fpman,5)) lv_remainder; + Bit#(fpexp2) lv_exponent; + Bit#(1) lv_sign; + Bit#(1) lv_infinity; + Bit#(1) lv_invalid; + Bit#(1) lv_dz; + bit lv_underflow; + bit lv_overflow; + bit lv_sticky; + Bit#(1) lv_zero; + Bit#(3) lv_rounding_mode; + bit lv_quotient_is_subnormal; + bit quiet_nan; + } Stage3_type#(numeric type fpexp2, numeric type fpman) deriving (Bits,Eq); + + typedef enum { + Begin, + Stage1, + Stage2, + Stage3 + }Div_states deriving (Bits,Eq); + + + interface Ifc_fpu_divider#(numeric type fpinp, numeric type fpman, numeric type fpexp); + method Action _start(Bit#(1) lv_sign, Bit#(fpman) lv_mantissa1, Bit#(fpexp) lv_exponent1, Bit#(fpman) lv_mantissa2, Bit#(fpexp) lv_exponent2, Bit#(3) rounding_mode, Tuple2#(Bit#(5),Bit#(5)) flags); + method Floating_output#(fpinp) final_result_(); // Output method + method Action flush; + endinterface + + `ifdef fpu_hierarchical + interface Ifc_fpu_divider32; + method Action _start(Bit#(1) lv_sign, Bit#(23) lv_mantissa1, Bit#(8) lv_exponent1, Bit#(23) lv_mantissa2, Bit#(8) lv_exponent2, Bit#(3) rounding_mode, Tuple2#(Bit#(5),Bit#(5)) flags); + method Floating_output#(32) final_result_(); // Output method + method Action flush; + endinterface + + interface Ifc_fpu_divider64; + method Action _start(Bit#(1) lv_sign, Bit#(52) lv_mantissa1, Bit#(11) lv_exponent1, Bit#(52) lv_mantissa2, Bit#(11) lv_exponent2, Bit#(3) rounding_mode, Tuple2#(Bit#(5),Bit#(5)) flags); + method Floating_output#(64) final_result_(); // Output method + method Action flush; + endinterface + `endif + +//(*synthesize*) +module mkfpu_divider(Ifc_fpu_divider#(fpinp,fpman,fpexp)) + provisos( + Add#(TAdd#(fpman,fpexp),1,fpinp), // fpman -23 fpexp=8 fpinp = 32 + Add#(fpman,2,fpman2), // fpman2 = 25 + Add#(fpman2,2,fpman4), // fpman4 = 27 + Add#(fpman4,1,fpman5), // fpman5 = 28 + Add#(fpexp,2,fpexp2), // fpexp2 = 10 + Add#(fpman5,1,fpman6), // fpman6 = 29 + Add#(fpman4,fpman6,acc_bits), // acc_bits= 56 + Add#(fpexp2,b__,fpman), + Add#(TSub#(fpexp,1),c__,fpman), + //per request of bsc + Add#(a__, 1, fpexp2), + Add#(d__, TLog#(TAdd#(1, TAdd#(fpexp, c__))), fpexp2), + Add#(e__, TLog#(fpman5), fpexp2), + Add#(1, f__, fpman2), + Add#(g__, 1, fpman4), + Add#(h__, TLog#(TAdd#(1, TAdd#(c__, fpexp))), fpexp2), + Add#(1, fpexp2, TAdd#(fpexp, i__)) + ); + + Ifc_integer_divider#(fpman4) int_div <- mkinteger_divider(); // instantiation of divider module + + //Wire#(Floating_output#(fpinp)) wr_final_out <- mkWire(); // instantiation of output FIFO whose structure definition is given in riscv_types.bsv + Wire#(Floating_output#(fpinp)) wr_final_out <- mkWire(); + Reg#(Stage1_type#(fpman,fpexp)) rg_stage1 <- mkRegU(); // instantiation of Stage 1 FIFO + Reg#((Stage2_type#(fpexp2))) rg_stage2 <- mkRegU(); + Reg#((Stage3_type#(fpexp2,fpman))) rg_stage3 <- mkRegU(); + Reg#(Div_states) rg_state_handler <- mkReg(Begin); + Wire#(Bool) wr_flush <- mkDWire(False); + let fPINP = valueOf(fpinp); + let fPMAN = valueOf(fpman); + let fPMAN5 = valueOf(fpman5); + let fPEXP = valueOf(fpexp); + let aCC = valueOf(acc_bits); + + (*mutually_exclusive = "rl_flush,rl_stage2,rl_stage3"*) + rule rl_flush(wr_flush); + rg_state_handler <= Begin; + //rg_stage1 <= tagged Invalid; + //rg_stage2 <= tagged Invalid; + int_div.flush(); + endrule + + //This is the second stage of the pipe. Here the division of the two mantissas take place. Rest of the data are enqueued in another FIFO. + rule rl_stage2 (rg_state_handler == Stage1 && !wr_flush); + int_div._inputs({rg_stage1.divisor,3'd0}, + {rg_stage1.dividend,3'd0} + ); + rg_state_handler <= Stage2; + //`ifdef verbose $display("Dividing Op1: %h (%d) Op2: %h (%d)",{stage1_data.dividend,3'd0},{stage1_data.dividend,3'd0},{stage1_data.divisor,3'd0},{stage1_data.divisor,3'd0}); `endif + //`ifdef verbose $display("dz : %b",stage1_data.dz); `endif + rg_stage2 <= Stage2_type { exponent : rg_stage1.exponent, + sign : rg_stage1.sign, + infinity : rg_stage1.infinity, + invalid : rg_stage1.invalid, + dz : rg_stage1.dz, + zero : rg_stage1.zero, + rounding_mode : rg_stage1.rounding_mode, + quiet_nan : rg_stage1.quiet_nan + }; + + + endrule + + rule rl_stage3(rg_state_handler == Stage2 && !wr_flush); + let int_out = int_div.result_(); + Bit#(TSub#(fpexp,1)) bias = '1; + `ifdef verbose $display("Int Data %h", int_out); `endif + Bit#(fpman4) lv_quotient = int_out[fPMAN+3:0]; //Quotient from the integer divider + Bit#(fpman5) lv_remainder = int_out[aCC-1:fPMAN5]; //Remainder from the integer divider + Bit#(fpexp2) lv_exponent = rg_stage2.exponent; + Bit#(1) lv_sign = rg_stage2.sign; + Bit#(1) lv_infinity = rg_stage2.infinity; + Bit#(1) lv_invalid = rg_stage2.invalid; + Bit#(1) lv_dz = rg_stage2.dz; + Bit#(1) lv_zero = rg_stage2.zero; + Bit#(3) lv_rounding_mode = rg_stage2.rounding_mode; + let quiet_nan = rg_stage2.quiet_nan; + + //rg_stage2 <= tagged Invalid; + + bit lv_underflow = 0; + bit lv_overflow = 0; + + Int#(fpexp2) lv_actual_exponent = unpack(lv_exponent - {3'b0,bias}); + //Change-1 Removing not_required variable + // Int#(fpman) lv_actual_exponent_temp = signExtend(lv_actual_exponent); + let msb_zeros = pack(countZerosMSB(lv_quotient)); + `ifdef verbose $display("MSB Zeros: %d",msb_zeros); `endif + let lsb_zeros = 0; + + // lv_quotient_is_subnormal construct is like a flag which can be used in difficult situations + bit lv_quotient_is_subnormal = 0; + bit lv_sticky = lv_quotient[0]; + //Bit#(fpman) bias_temp = zeroExtend(bias); + /* + if exponent is > 128 then obviously none of the numbers are subnormal + so the product is of the form 1x.xxxx or 01.xxxx + the overflow conditions are handled in the following if condition accordingly + */ + if(lv_actual_exponent > unpack({3'b0,bias} + 1)) begin //CHECK THIS CASE WITHOUT FAIL - OPTIMIZE IT + lv_overflow = 1; + `ifdef verbose $display("lv_overflow!!!"); `endif + end + /* + -bias -fPMAN + -150 = -126 -23 -1 + -1075 = -1022 -52 -1 //for DP? + -1 is for the implicit bit + i.e. if all the bits are shifted out then its an underflow + */ + else if(lv_actual_exponent < unpack(-zeroExtend(bias)-fromInteger(fPMAN)-1)) begin //TODO What here? TODO Check <-150 or <-151 + //else if(lv_actual_exponent_temp < unpack(-bias_temp-fromInteger(fPMAN)-1)) begin //TODO What here? TODO Check <-150 or <-151 + //`ifdef verbose $display("lv_actual_exponent : %d bias-fpman-1 : %d", lv_actual_exponent, -bias_temp-fromInteger(fPMAN-1)); `endif + lv_underflow = 1; + lv_quotient = 1; + lv_exponent = 0; + //When the exponent is < -151, sticky bit is automatically set to one + `ifdef verbose $display("lv_underflow!!!"); `endif + end + + else begin + + // possible shift is positive when exponent is lesser than -126 + //Change-x it's enough if possible shift is reduced from lv_exponent - reducing again from bias is actually redundant and incurs another adder + //Same Experiment here, do all the if-else parallely and just use the if and else for assignments + Int#(fpexp2) possible_shift = 1-unpack(lv_exponent); + `ifdef verbose $display("possible_shift = %0d", possible_shift); `endif + + lsb_zeros = pack(countZerosLSB(lv_quotient)); + + let lv_quotient_shiftR = lv_quotient >> pack(possible_shift); + //lv_quotient = {lv_quotient[fPMAN+3:1], lv_quotient[0] | lv_sticky}; + let lv_exponent_inc_shift = lv_exponent + pack(possible_shift); + let shift_neg = (~pack(possible_shift)+1); + let lv_quotient_shiftL_expo = lv_quotient << shift_neg; + let lv_exponent_sub_shift = lv_exponent - shift_neg; + let lv_quotient_shiftL_zerosMSB = lv_quotient << (msb_zeros); + let lv_exponent_sub_zerosMSB = lv_exponent - (zeroExtend(msb_zeros)); + + + + if(possible_shift > 0) begin + + //Setting sticky if all lsb zeros are removed out + + if(possible_shift > unpack(zeroExtend(lsb_zeros)) || lv_quotient[0] == 1) + lv_sticky = 1; + + //Handling sticky + //lv_sticky = lv_quotient[0]; + lv_quotient = {lv_quotient_shiftR[fPMAN+3:1],lv_quotient_shiftR[0]|lv_sticky}; + lv_sticky = lv_quotient[0]; + lv_exponent = lv_exponent_inc_shift; + + `ifdef verbose $display("lv_quotient = %h since exp < -126", lv_quotient); `endif + `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif + + `ifdef verbose $display("lv_exponent : %b",lv_exponent); `endif + lv_quotient_is_subnormal = 1; + end + + /* + msb_zeros != 1 means product is of the form 00.xxxx, important case + */ + else if(msb_zeros != 0) begin + /* + if possible shift is < the number of leading zeros then the number can't be made normal + */ + if(shift_neg < zeroExtend(msb_zeros)) begin + lv_quotient = lv_quotient_shiftL_expo; + lv_exponent = lv_exponent_sub_shift; + lv_quotient_is_subnormal = 1; + end + /* + if exponent affords to give away enough such that shifting left leads to 01.xxxx and exponent >= -126 + */ + else begin + lv_quotient = lv_quotient_shiftL_zerosMSB; + lv_exponent = lv_exponent_sub_zerosMSB; + lv_quotient_is_subnormal = 0; + end + end + end + + if(lv_quotient_is_subnormal == 1) + lv_exponent = 0; + + rg_state_handler <= Stage3; + rg_stage3 <= Stage3_type{ + lv_quotient : lv_quotient, + lv_remainder : lv_remainder, + lv_exponent : lv_exponent, + lv_sign : lv_sign, + lv_infinity : lv_infinity, + lv_invalid : lv_invalid, + lv_underflow : lv_underflow, + lv_overflow : lv_overflow, + lv_dz : lv_dz, + lv_zero : lv_zero, + lv_sticky : lv_sticky, + lv_quotient_is_subnormal : lv_quotient_is_subnormal, + lv_rounding_mode : lv_rounding_mode, + quiet_nan : quiet_nan + }; + + endrule + //------------------------------------------------------------------Ex-1 Splitting Here ---------------------------------------------------------// + //Required - 1. lv_quotient 2. lv_remainder 3. lv_quotient_is_subnormal 4. lv_rounding_mode 5. all exception lv's + //Splitting the stage here + + rule rl_stage4(rg_state_handler==Stage3 && !wr_flush); + + let lv_quotient = rg_stage3.lv_quotient; + let lv_remainder = rg_stage3.lv_remainder; + let lv_exponent = rg_stage3.lv_exponent; + let lv_sign = rg_stage3.lv_sign; + let lv_infinity = rg_stage3.lv_infinity; + let lv_invalid = rg_stage3.lv_invalid; + let lv_dz = rg_stage3.lv_dz; + let lv_zero = rg_stage3.lv_zero; + let lv_rounding_mode = rg_stage3.lv_rounding_mode; + let lv_overflow = rg_stage3.lv_overflow; + let lv_underflow = rg_stage3.lv_underflow; + let lv_sticky = rg_stage3.lv_sticky; + let lv_quotient_is_subnormal = rg_stage3.lv_quotient_is_subnormal; + let quiet_nan = rg_stage3.quiet_nan; + + `ifdef verbose $display("lv_quotient = %h, lv_remainder = %h, lv_exponent = %h", lv_quotient, lv_remainder, lv_exponent); `endif + + bit lv_guard = lv_quotient[2]; + bit lv_round = lv_quotient[1]; + bit lv_inexact = 0; + bit lv_round_up = 0; + + if(lv_remainder!=0 || lv_quotient[0] == 1) // if the remainder is zero, sticky bit is set to 1. + lv_sticky = 1; + + if((lv_sticky | lv_guard | lv_round) == 1)// if any of the sticky,guard or round bit is set, the value is inexact. + lv_inexact = 1; + + if(lv_inexact == 1 && lv_quotient_is_subnormal == 1) //Was buried deep inside the SPEC. Phew! Maybe Wrong!!! + lv_underflow = 1; + + // Following if-else condition determine the value of lv_round_up. If set, the mantissa needs to be incremented, else the mantissa remains unchanged. + if(lv_rounding_mode == 'b000) + lv_round_up = lv_guard & (lv_round|lv_sticky|lv_quotient[3]); + else if(lv_rounding_mode == 'b100) + lv_round_up = lv_guard; //& (lv_round|lv_sticky|lv_sign); + else if(lv_rounding_mode == 'b011) + lv_round_up = (lv_guard|lv_round|lv_sticky) & ~lv_sign; + else if(lv_rounding_mode == 'b010) + lv_round_up = (lv_guard|lv_round|lv_sticky) & lv_sign; + + // otherwise if round to zero mode, then do nothing + + Bit#(fpman2) lv_rounded_quotient = {1'b0,lv_quotient[fPMAN+3:3]}; + + if( lv_round_up == 1) begin + lv_rounded_quotient = lv_rounded_quotient + 1; + end + + if(lv_rounded_quotient[fPMAN+1] == 1 ) begin + `ifdef verbose $display("Exponent Incremented 1"); `endif + lv_exponent = lv_exponent + 1; + lv_rounded_quotient = lv_rounded_quotient >> 1; + end + if(lv_quotient[fPMAN+3] == 0 && lv_rounded_quotient[fPMAN] == 1) begin + `ifdef verbose $display("Exponent Incremented 2"); `endif + lv_exponent = lv_exponent + 1; + end + Bit#(fpexp) out_exp = lv_exponent[fPEXP-1:0]; + Bit#(fpman) out_man = lv_rounded_quotient[fPMAN-1:0]; + Bit#(fpexp) exp_all_zeros = '0; + Bit#(fpexp) exp_all_ones = '1; + Bit#(TSub#(fpexp,1)) exp_all_ones_1 = '1; + Bit#(fpman) man_all_zeros = '0; + Bit#(fpman) man_all_ones = '1; + Bit#(TSub#(fpman,1)) man1_all_zeros = '0; + Bit#(TSub#(fpman,1)) man_all_ones_1 = '1; + Bit#(fpinp) lv_final_output= 0; + Bit#(5) exception = 0; + + // result is infinity + if(lv_infinity == 1) begin + lv_final_output = {lv_sign, exp_all_ones, man_all_zeros}; + if(lv_dz==1) + exception[3] = 1; + end + + // the result is invalid + else if(lv_invalid == 1) begin + lv_final_output = {1'b0, exp_all_ones,1'b1, man1_all_zeros}; + exception[4] = ~quiet_nan;//Invalid; + end + // operation is divide by zero + else if(lv_dz==1) begin + lv_final_output= {lv_sign, exp_all_ones, man_all_zeros}; + exception[3] = 1;//Divide_by_Zero; + end + // result is zero + else if(lv_zero == 1) + lv_final_output={lv_sign,exp_all_zeros,man_all_zeros}; + // result is underflow + else if(lv_underflow == 1) begin + + lv_final_output= {lv_sign,exp_all_zeros,lv_rounded_quotient[fPMAN-1:0]}; //TODO to verify if it needs to be lv_rounded_quotient[22:1] and lv_inexact bit. + exception[1] = 1;//Underflow; + exception[0] = 1; + end + // result is overflow + else if(lv_overflow == 1 || out_exp == '1) begin + exception[2] = 1;//Overflow; + exception[0] = 1; //inexact -- is it? let's see! + if(lv_rounding_mode == 'b001) + lv_final_output = {lv_sign,{exp_all_ones_1,1'b0},man_all_ones}; + else if(lv_rounding_mode == 'b010 && lv_sign ==0) + lv_final_output = {lv_sign,{exp_all_ones_1,1'b0},man_all_ones}; + else if(lv_rounding_mode == 'b011 && lv_sign==1) + lv_final_output = {lv_sign,{exp_all_ones_1,1'b0},man_all_ones}; + else + lv_final_output ={lv_sign,exp_all_ones,man_all_zeros}; + end + else begin + lv_final_output = {lv_sign,out_exp,out_man}; + if(lv_inexact==1) + exception[0] = 1;//Inexact; + end + rg_state_handler <= Begin; + // Forming the new Floating point Status Register + // Enqueing the final result into the output FIFO + wr_final_out <= Floating_output{ + final_result : lv_final_output,//Appending zeros at the MSB since the result is a Single Precision number which is 32-bits wide whereas the rob entries are 64-bits. + fflags : exception}; + + endrule + + method Action _start(Bit#(1) lv_sign, Bit#(fpman) lv_mantissa1, Bit#(fpexp) lv_exponent1, Bit#(fpman) lv_mantissa2, Bit#(fpexp) lv_exponent2, Bit#(3) rounding_mode, Tuple2#(Bit#(5),Bit#(5)) flags); + + Bit#(TSub#(fpexp,1)) bias = '1; + let condFlags1 = tpl_1(flags); + let condFlags2 = tpl_2(flags); + Int#(fpexp) actual_exponent1 = unpack(lv_exponent1 - {1'b0,bias}); + Int#(fpexp) actual_exponent2 = unpack(lv_exponent2 - {1'b0,bias}); + `ifdef verbose $display("Exp1: %h, Man1: %h, Exp2: %h Man2: %h",lv_exponent1,lv_mantissa1,lv_exponent2,lv_mantissa2); `endif + `ifdef verbose $display("condFlags1 : %b condFlags2: %b",condFlags1,condFlags2); `endif + Bit#(1) lv_inf = 0; + Bit#(1) lv_inv = 0; + Bit#(1) lv_zero = 0; + Bit#(1) lv_dz = 0; + bit quiet_nan = condFlags1[2] & ~condFlags2[0] | condFlags2[2] & ~condFlags1[0] ; + Bit#(1) lv_op1_is_zero = condFlags1[3]; //1 when operand1=0 + Bit#(1) lv_op2_is_zero = condFlags2[3]; //1 when operand2=0 + + Bit#(1) lv_op1_subnormal = condFlags1[4]; //1 when operand1 is subnormal + Bit#(1) lv_op2_subnormal = condFlags2[4]; //1 when operand2 is subnormal + + Bit#(1) lv_op1_is_infinity = condFlags1[1]; + Bit#(1) lv_op2_is_infinity = condFlags2[1]; + + `ifdef verbose $display("op1 is subnormal = %b , op2 is subnormal = %b", lv_op1_subnormal, lv_op2_subnormal); `endif + // `ifdef verbose $display("sign1 = %b exponent1 = %b actual_exponent1 = %0d mantissa1 = %b.%b", _operand1[31], _operand1[fPINP-2:fPMAN], actual_exponent1, ~lv_op1_subnormal, _operand1[fPMAN-1:0]); `endif +// `ifdef verbose $display("sign2 = %b exponent2 = %b actual_exponent2 = %0d mantissa2 = %b.%b", _operand2[31], _operand2[fPEXP-1:fPMAN], actual_exponent2, ~lv_op2_subnormal, _operand2[fPMAN-1:0]); `endif + + + if(((condFlags1[2] | condFlags1[0])==1) || ((condFlags2[2] | condFlags2[0])==1) || (lv_op1_is_infinity == 1 && lv_op2_is_infinity == 1) || (lv_op1_is_zero == 1 && lv_op2_is_zero == 1)) begin //op1 or op2 are NaN (or) both are infinity (or) both are zero + lv_inv = 1; //result is invalid + end + else if(lv_op1_is_infinity ==1) begin //op 2 is neither NaN nor infinity, and op1 is infinity + lv_inf=1; //result is infinity + end + else if(lv_op2_is_zero==1) begin //op 1 is neither NaN nor infinity, and op2 is zero + lv_inf=1; //result is infinity + lv_dz=1; //setting the divide by zero flag + end + else if(lv_op2_is_infinity == 1 || lv_op1_is_zero == 1) //{op1 and op2 are not NaN} (and) {op1 is zero and op2 is not zero (or) op2 is infinity and op1 is not infinity} + lv_zero=1; //result is zero + + + let man1 ={~lv_op1_subnormal,lv_mantissa1}; + let man2 ={~lv_op2_subnormal,lv_mantissa2}; + let zeros1 =countZerosMSB(man1); + let zeros2 =countZerosMSB(man2); + man1=man1<= 'd31) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = zeroExtend(all_ones); + else begin + if(lv_original_exponent == 'd31 && lv_manzero == 0) + lv_invalid = 0; + final_result = signExtend(32'h80000000); + end + end + end + else begin //FCVT.WU.D + Bit#(32) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ? (lv_invalid==1? signExtend(all_ones) : '0) : signExtend(all_ones); + else if(lv_original_exponent < 'd32) begin + final_man = final_man << lv_original_exponent; + Bit#(32) y = final_man[83:52]; + `ifdef verbose $display("final_man = %d",final_man); `endif + final_result = signExtend(y); + lv_mantissa = final_man[51:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd32) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = signExtend(all_ones); + else + final_result = '0; + end + end + end + else begin + if(convert_unsigned == 0) begin //FCVT.L.D + Bit#(63) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ?(lv_invalid==1? zeroExtend(all_ones) : signExtend(64'h8000000000000000)) : zeroExtend(all_ones); + else if(lv_original_exponent < 'd63) begin + final_man = final_man << lv_original_exponent; + `ifdef verbose $display("final_man : %b",final_man);`endif + Bit#(64) y = zeroExtend(final_man[115:52]); + final_result = y; + lv_mantissa = final_man[51:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd63) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = zeroExtend(all_ones); + else begin + if(lv_original_exponent == 'd63 && lv_manzero == 0) + lv_invalid = 0; + final_result = signExtend(64'h8000000000000000); + end + end + end + else begin //FCVT.LU.D + Bit#(64) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ? (lv_invalid==1? signExtend(all_ones) : '0) : signExtend(all_ones); + else if(lv_original_exponent < 'd64) begin + final_man = final_man << lv_original_exponent; + `ifdef verbose $display("final_man = %d",final_man); `endif + Bit#(64) y = zeroExtend(final_man[115:52]); + final_result = y; + lv_mantissa = final_man[51:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd64) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = signExtend(all_ones); + else + final_result = '0; + end + end + + end + + bit lv_guard = lv_mantissa[51]; //MSB of the already shifted mantissa is guard bit + bit lv_round = lv_mantissa[50]; //next bit is round bit + bit lv_sticky = |(lv_mantissa<<2); //remaining bits determine the sticky bit + bit lv_round_up = 0; + bit lv_inexact1 = lv_guard | lv_round | lv_sticky; + if(to_round) begin + if(rne) lv_round_up = lv_guard & (final_result[0] | lv_round | lv_sticky); //Round to nearest ties to even + else if(rmm) lv_round_up = lv_guard; //& (lv_round | lv_sticky | ~lv_sign); //Round to nearest ties to max magnitude + else if(rdn) lv_round_up = lv_inexact1 & (lv_sign); //Round down to -infinity + else if(rup) lv_round_up = lv_inexact1 & (~lv_sign); //Round up to +infinity + lv_inexact = lv_inexact | lv_inexact1; + `ifdef verbose $display("lv_inexact: %b lv_guard: %b lv_round : %b lv_sticky: %b lv_sign %b",lv_inexact,lv_guard,lv_round,lv_sticky,lv_sign); `endif + if(lv_round_up == 1) begin + lv_invalid = 1; + if(convert_long == 0 && convert_unsigned == 0 && lv_original_exponent == 30 && final_result[30:0] == '1 && lv_sign == 0) //Overflow.. Beyond representable number after rounding + final_result = 64'h7fffffff; + else if(convert_long == 0 && convert_unsigned == 1 && lv_original_exponent == 31 && final_result[31:0] == '1 && lv_sign == 0) + final_result = 64'hffffffffffffffff; //Should verify again + else if(convert_long == 1 && convert_unsigned == 0 && lv_original_exponent == 62 && final_result[62:0] == '1 && lv_sign == 0) //Overflow.. Beyond representable number after rounding + final_result = 64'h7fffffffffffffff; + else if(convert_long == 1 && convert_unsigned == 1 && lv_original_exponent == 63 && final_result[63:0] == '1 && lv_sign == 0) + final_result = 64'hffffffffffffffff; + else begin + lv_invalid = 0; + final_result = final_result + 1; + if(convert_long == 0 && final_result[31]==1) + final_result = signExtend(final_result[31:0]); + end + end + `ifdef verbose $display("rounding_mode == %b",rounding_mode); `endif + `ifdef verbose $display("round_up = %b", lv_round_up); `endif + + if(convert_unsigned == 0 && lv_sign == 1)begin //Negating the output if floating point number is negative and converted to signed word/long + final_result = ~final_result + 1; + if(convert_long == 0 && final_result[31]==1) + final_result = signExtend(final_result[31:0]); + `ifdef verbose $display("Negating output final_result : %b", final_result); `endif + end + else if(convert_unsigned == 1 && lv_sign == 1) begin //TODO What happens when negative floating point is converted to unsigned int, right now rounded to zero + final_result = 0; + lv_invalid = 1; + end + end + if((lv_invalid|lv_infinity) == 1) begin //What about Quiet NaN?? What does the Spec Say? + lv_overflow = 0; + lv_inexact = 0; + end +Bit#(5) fflags={lv_invalid|lv_infinity,1'b0,lv_overflow,1'b0,lv_inexact}; + return Floating_output{ + final_result: final_result, + fflags: fflags}; + + + endmethod +endmodule +module mkTb(Empty); + + function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z); + let s = valueOf(m); + let man1 = tpl_1(x); + let expo1 = tpl_2(x); + let man2 = tpl_1(y); + let expo2 = tpl_2(y); + let man3 = tpl_1(z); + let expo3 = tpl_2(z); + Bit#(5) flags1, flags2,flags3; + Bool expZ1 = (expo1 == 0); + Bool manZ1 = (man1 == 0); + Bool expO1 = (expo1 == '1); + Bool manO1 = (man1 == '1); + Bool topB1 = (man1[s-1] == 1); + Bool expZ2 = (expo2 == 0); + Bool manZ2 = (man2 == 0); + Bool expO2 = (expo2 == '1); + Bool manO2 = (man2 == '1); + Bool topB2 = (man2[s-1] == 1 && man2 !=0); + Bool expZ3 = (expo3 == 0); + Bool manZ3 = (man3 == 0); + Bool expO3 = (expo3 == '1); + Bool manO3 = (man3 == '1); + Bool topB3 = (man3[s-1] == 1 && man3 !=0); + flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN + return tuple3(flags1,flags2,flags3); + endfunction + + function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let expo = valueOf(e); + let man = valueOf(m); + return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]); + endfunction + + function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let inp = valueOf(n); + let man = valueOf(m); + return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]); + endfunction + + function Bool isNaNBox(Bit#(64) op); + return (op[63:32]=='1); + endfunction + + function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3); + return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000); + endfunction +Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN); + + + Ifc_fpu_dp_to_int converter <- mkfpu_dp_to_int(); + Reg#(Bit#(32)) state_clock <- mkReg(0); + Reg#(Bit#(64)) wr_operand1 <- mkReg('h0200000000000000); + //Reg#(Bit#(32)) wr_operand1 <- mkReg('hbf214efa); + //Reg#(Bit#(32)) wr_operand1 <- mkReg('h91c82527); + + rule state_clock_count; + state_clock <= state_clock + 1; + if(state_clock == 'd5) $finish; + endrule + + rule give_input(state_clock == 'd1); + // let {op1,op2,op3} <- setCanonicalNaN.func(wr_operand1,'0,'0); + let {man1,man2,man3} <- getMant64.func(wr_operand1, 0,0); + let {exp1,exp2,exp3} <- getExp64.func(wr_operand1, 0,0); + let {flags1,flags2,flags3} <- condFlags64.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(0,0)); + let sign1 = wr_operand1[63]; +`ifdef verbose $display("input %b %b %b given at %0d", sign1, exp1, man1, state_clock); `endif + let x<- converter._start(sign1,exp1,man1, 0, 0, 3'b000,flags1); + `ifdef verbose $display("output %h fflags : %h at %h", x.final_result, x.fflags, state_clock); `endif + endrule + + + endmodule +endpackage diff --git a/src/core/fpu/fpu_fclass.bsv b/src/core/fpu/fpu_fclass.bsv new file mode 100755 index 0000000..4012ff8 --- /dev/null +++ b/src/core/fpu/fpu_fclass.bsv @@ -0,0 +1,197 @@ +/* +Authors : Vinod.G, Renuka Venkat +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See License for More Details +Description: +This module examines the floating point input and outputs the class of floating point number. The corresponding bit of the output register is set according to the class of instructions given by the table below, + bit Class + 0 Negative Infinity + 1 Negative Normal Number + 2 Negative Subnormal Number + 3 Negative Zero + 4 Positive Zero + 5 Positive Subnormal Number + 6 Positive Normal Number + 7 Positive Infinity + 8 Signaling NaN + 9 Quiet NaN + +The module examines and finds the class as follows +Sign Exponent (e) Fraction (f) Value +0 00⋯00 00⋯00 +0 + +0 00⋯00 00⋯01 Positive Denormalized Real + ⋮ 0.f × 2(−b+1) + 11⋯11 + +0 00⋯01 XX⋯XX Positive Normalized Real + ⋮ 1.f × 2(e−b) + 11⋯10 + +0 11⋯11 00⋯00 +∞ + +0 11⋯11 00⋯01 SNaN + ⋮ + 01⋯11 + +0 11⋯11 1X⋯XX QNaN + + +1 00⋯00 00⋯00 −0 + +1 00⋯00 00⋯01 Negative Denormalized Real + ⋮ −0.f × 2(−b+1) + 11⋯11 + +1 00⋯01 XX⋯XX Negative Normalized Real + ⋮ −1.f × 2(e−b) + 11⋯10 + +1 11⋯11 00⋯00 −∞ + +1 11⋯11 00⋯01 SNaN + ⋮ + 01⋯11 + +1 11⋯11 1X⋯XX QNaN + +*/ + + +package fpu_fclass; + +import defined_types::*; +`include "defined_parameters.bsv" + +interface Ifc_fpu_fclass#(numeric type fpinp, numeric type fpman, numeric type fpexp); + method ActionValue#(Floating_output#(fpinp)) _start(Bit#(1) sign1,Bit#(fpman)mantissa,Bit#(fpexp)exponent, Bit#(5) flags); +endinterface + +`ifdef fpu_hierarchical +interface Ifc_fpu_fclass32; + method ActionValue#(Floating_output#(32)) _start(Bit#(1) sign1,Bit#(23)mantissa,Bit#(8)exponent, Bit#(5) flags); +endinterface + +interface Ifc_fpu_fclass64; + method ActionValue#(Floating_output#(64)) _start(Bit#(1) sign1,Bit#(52)mantissa,Bit#(11)exponent, Bit#(5) flags); +endinterface +`endif + +module mkfpu_fclass(Ifc_fpu_fclass#(fpinp,fpman,fpexp)) + provisos ( + Add#(TAdd#(fpexp,fpman),1,fpinp), //Defining fpinp to be fpexp + fpman + 1 + Add#(fpexp,2,fpexp2), + Add#(a__, 10, fpinp) + ); + + let fPINP = valueOf(fpinp); + let fPMAN = valueOf(fpman); + let fPEXP = valueOf(fpexp); + + method ActionValue#(Floating_output#(fpinp)) _start(Bit#(1) sign1,Bit#(fpman)mantissa,Bit#(fpexp)exponent, Bit#(5) flags); + + Bit#(10) result_fclass; + Bool sbit = (sign1==1); + Bool inf = (flags[1]==1); + Bool normal = (flags == '0); + Bool subnormal = (flags[4] == 1); + Bool zero = (flags[3] == 1); + if(sbit && inf) //negtive infinity + begin + result_fclass = 'd1; + end + + else if(sbit && normal) //negative normal + begin + result_fclass = 'd2; + end + + else if(sbit && subnormal) //negative subnormal + begin + result_fclass = 'd4; + end + + else if(sbit && zero) //-0 + begin + result_fclass = 'd8; + end + + else if(!sbit && zero) // +0 + begin + result_fclass = 'd16; + end + + else if( !sbit && subnormal) //positive subnormal + begin + result_fclass = 'd32; + end + + else if(!sbit && normal) //positive normal + begin + result_fclass = 'd64; + end + + else if(!sbit && inf) //positive infinity + begin + result_fclass = 'd128; + end + + else if (flags[0]==1) //Signaling NaN + begin + result_fclass = 'd256; + end + + else //quiet NaN + begin + result_fclass = 'd512; + end + + + + return Floating_output { + final_result : zeroExtend(result_fclass), + fflags: 5'b0 }; + endmethod +endmodule + +`ifdef fpu_hierarchical +(*synthesize*) +module mkfpu_fclass32(Ifc_fpu_fclass32); + Ifc_fpu_fclass#(32,23,8) uut <- mkfpu_fclass(); + method ActionValue#(Floating_output#(32)) _start(Bit#(1) sign1,Bit#(23)mantissa,Bit#(8)exponent, Bit#(5) flags); + let x <- uut._start(sign1,mantissa,exponent,flags); + return x; + endmethod +endmodule + +(*synthesize*) +module mkfpu_fclass64(Ifc_fpu_fclass64); + Ifc_fpu_fclass#(64,52,11) uut <- mkfpu_fclass(); + method ActionValue#(Floating_output#(64)) _start(Bit#(1) sign1,Bit#(52)mantissa,Bit#(11)exponent, Bit#(5) flags); + let x <- uut._start(sign1,mantissa,exponent,flags); + return x; + endmethod +endmodule +`endif + +// (*synthesize*) +// module mkTb_fpu_fclass(); +// Ifc_fpu_fclass#(32,23,8) inst_fpu_fclass <- mkfpu_fclass(); +// Reg#(Bit#(32)) rg_clock <- mkReg(0); +// Reg#(Bit#(32)) rg_operand1<-mkReg('h7f8ff000); //positive normal set 6 + +// rule get_input(rg_clock == 0); +// inst_fpu_fclass._start(rg_operand1); +// rg_clock <= rg_clock + 1; +// endrule + +// rule get_output; +// let lv_result = inst_fpu_fclass.result_(); +// `ifdef verbose $display("Result is: %h",lv_result.final_result); `endif +// $finish(0); +// endrule + +// endmodule + +endpackage diff --git a/src/core/fpu/fpu_fm_add_sub.bsv b/src/core/fpu/fpu_fm_add_sub.bsv new file mode 100644 index 0000000..a57d66c --- /dev/null +++ b/src/core/fpu/fpu_fm_add_sub.bsv @@ -0,0 +1,1037 @@ +/* +Authors : Vinod.G, Aditya Govardhan +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Paper Reference: Floating Point Fused Multiply-Add Architectures (http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=4487224) +Description: +TODO +*/ + +package fpu_fm_add_sub; +import DReg::*; +import defined_types::*; +import RegFile::*; +import UniqueWrappers::*; +`include "defined_parameters.bsv" +import ConfigReg::*; + +interface Ifc_fpu_fm_add_sub#(numeric type fpinp, numeric type fpman, numeric type fpexp); + method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + method Floating_output#(fpinp) get_result(); + method Action flush; +endinterface + + +`ifdef fpu_hierarchical + interface Ifc_fpu_fm_add_sub32; + method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + method Floating_output#(32) get_result(); + method Action flush; + endinterface + + interface Ifc_fpu_fm_add_sub64; + method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + method Floating_output#(64) get_result(); + method Action flush; + endinterface +`endif + +typedef struct{ + Bit#(TMul#(2,TAdd#(fpman,1))) product_mantissa; + Bit#(TAdd#(fpexp,2)) lv_summed_exponent; // exponent of the resultant + bit sign; // sign bit of the result + Bit#(fpinp) _operand3; + Bit#(1) invalid; // indicating that the ff_output is NaN. + Bit#(1) infinity; // indicating that the ff_output is infinity. + Bit#(1) zero; // indicating that the ff_output is zero. + Bit#(5) add_flags; + Bit#(3) rounding_mode; // static rounding mode encoded in the instruction + bit _operation; // bit denoting the operation to be performed 0 - Add, 1 - Sub + bit _negate; // bit denoting whether the operands should be negated or not + bit mul; // bit denoting whether the operation is mul or not + bit muladd; + bit quiet_nan_two; + bit inp_denormal; +}Input_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq); + +typedef struct{ + Bit#(1) lv_product_sign; //The result of the integer multiplier stage + Bit#(1) lv_negate; + Bit#(TAdd#(fpexp,2)) lv_product_exponent; + Bit#(TAdd#(TMul#(2,TAdd#(1,fpman)),1)) lv_product_mantissa; + Bit#(fpinp) lv_operand3; + Bit#(5) add_flags; + bit operation; + bit mul; + bit muladd; + Bit#(3) rounding_mode; + bit lv_product_is_invalid; + bit lv_product_is_zero; + bit lv_product_is_infinity; + bit lv_product_overflow; + bit lv_product_underflow; + bit quiet_nan_two; + bit inp_denormal; +}Stage2_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq); + +typedef struct{ + bit actual_operation; + bit lv_resultant_sign; + bit man2_gt_man3; + Bit#(TAdd#(fpexp,2)) resultant_exponent; + Bit#(TAdd#(TMul#(fpman,3),4)) mantissa2; + Bit#(TAdd#(TMul#(fpman,3),4)) mantissa3; + Bit#(3) rounding_mode; + bit result_is_invalid; + Bit#(2) result_is_infinity; + Bit#(2) result_is_zero; + bit product_overflow; + bit product_underflow; + bit quiet_nan_two; + bit quiet_nan_three; + bit lv_product_is_zero; +}Stage4_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq); + +typedef struct{ + Bit#(TAdd#(fpexp,2)) resultant_exponent; + Bit#(TAdd#(TMul#(fpman,3),4)) resultant_mantissa; + bit lv_resultant_sign; + Bit#(3) lv_rounding_mode; + Bit#(2) add_sub_is_zero; + bit lv_result_is_invalid; + Bit#(2) lv_result_is_infinity; + Bit#(2) lv_result_is_zero; + bit lv_product_overflow; + bit lv_product_underflow; + bit quiet_nan_two; + bit quiet_nan_three; + bit lv_product_is_zero; + Bit#(TLog#(TAdd#(TAdd#(TMul#(fpman,3),4),1))) lv_zeros_on_left; +}Stage5_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq); + + typedef enum{ + Begin, + Stage1, + Stage2, + Stage3, + Stage4 + } FMA_states deriving (Bits,Eq); + +module mkfpu_fm_add_sub(Ifc_fpu_fm_add_sub#(fpinp,fpman,fpexp)) + provisos( + Add#(TAdd#(fpexp,fpman),1,fpinp), + Add#(fpexp,2,fpexp2), + Add#(TMul#(fpman,3),4,fmaman), + Add#(fpman,1,fpman1), + Add#(fpexp,1,fpexp1), + Add#(b__,fpinp,64), + Mul#(TAdd#(fpman,1),2,impfpman2), + Add#(fpinp,fpinp,fpinp2), + //per request of bsc + Add#(c__, TSub#(fpexp, 1), fpexp1), + Add#(d__,1,fpexp2), + Add#(e__, fpexp2, fpman), + Add#(f__, TSub#(fpexp, 1), fpman), + Add#(g__, TAdd#(fpman, 1), fpinp), + Add#(h__, TSub#(fpexp, 1), fpexp2), + Add#(j__, TLog#(TAdd#(1, fmaman)), fpexp2), + Add#(l__, 1, fmaman), + Add#(m__, TAdd#(2, fpman), fmaman), + Add#(n__,TAdd#(fpman,1),fpinp2), + Add#(a__, TMul#(2, TAdd#(1, fpman)), fmaman), + Mul#(2, TAdd#(1, fpman), impfpman2), + Add#(i__, TAdd#(TMul#(2, TAdd#(1, fpman)), 1), fmaman), + Add#(k__, TLog#(TAdd#(1, TAdd#(impfpman2, 1))), fpexp2), + Add#(o__, TLog#(TAdd#(1, fmaman)), TAdd#(fpexp, 2)), + Log#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1), TLog#(TAdd#(1, fmaman))), + Add#(p__, TLog#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1)), TAdd#(fpexp, 2)), + Add#(s__, TAdd#(1, TAdd#(f__, fpexp)), fmaman), + Add#(t__, TAdd#(f__, fpexp), impfpman2), + Add#(q__, TAdd#(fpexp, f__), impfpman2), + Add#(r__, TAdd#(1, TAdd#(fpexp, f__)), fmaman) + ); + + + + Wire#(Floating_output#(fpinp)) ff_final_out <- mkWire(); + Reg#(Stage2_data_type#(fpinp,fpman,fpexp)) ff_stage2 <- mkConfigRegU(); + Reg#(Stage4_data_type#(fpman,fpexp)) ff_stage4 <- mkConfigRegU(); + Reg#(Stage5_data_type#(fpman,fpexp)) ff_stage5 <- mkConfigRegU(); + Reg#(Input_data_type#(fpinp,fpman,fpexp)) ff_input_register <- mkConfigRegU(); + Reg#(FMA_states) rg_state_handler <- mkReg(Begin); + Wire#(Bool) wr_flush <- mkDWire(False); + + function zeroExtendLSB(inp_man) = unpack(reverseBits(extend(reverseBits(pack(inp_man))))); + + let fPMAN = valueOf(fpman); + let fPINP = valueOf(fpinp); + let fPEXP = valueOf(fpexp); + let iMPFPMAN2 = valueOf(impfpman2); + let fMAMAN = valueOf(fmaman); + + rule rl_flush(wr_flush); + rg_state_handler <= Begin; + //ff_input_register <= tagged Invalid; + //ff_stage2 <= tagged Invalid; + //ff_stage4 <= tagged Invalid; + //ff_stage5 <= tagged Invalid; + endrule + + rule rl_stage1_after_input_stage(rg_state_handler == Stage1 && !wr_flush); + + Bit#(impfpman2) x = ff_input_register.product_mantissa; + Bit#(TAdd#(impfpman2,1)) lv_product_mantissa = {x[iMPFPMAN2-1:0],1'b0}; //extra zero for 10.xxxx case + Bit#(fpexp2) lv_product_exponent = ff_input_register.lv_summed_exponent; + Bit#(fpinp) lv_operand3 = ff_input_register._operand3; + Bit#(5) add_flags = ff_input_register.add_flags; + bit lv_product_underflow = 0; + bit lv_product_overflow = 0; + let lv_product_is_invalid = ff_input_register.invalid; + let lv_product_is_infinity = ff_input_register.infinity; + let lv_product_is_zero = ff_input_register.zero; + let rounding_mode = ff_input_register.rounding_mode; + let operation = ff_input_register._operation; + let lv_negate = ff_input_register._negate; + let lv_product_sign = ff_input_register.sign; + let mul = ff_input_register.mul; + let muladd = ff_input_register.muladd; + let quiet_nan_two = ff_input_register.quiet_nan_two; + bit inp_denormal = ff_input_register.inp_denormal; + Bit#(TSub#(fpexp,1)) bias = '1; + Int#(fpexp2) lv_actual_product_exponent = unpack(lv_product_exponent - {3'b0,bias}); + let msb_zeros = pack(countZerosMSB(lv_product_mantissa)); + let lsb_zeros = 0; + + //Change-2 Removing Redundant Variables + //Bit#(fpman) bias_temp = zeroExtend(bias); + //Int#(fpman) lv_actual_product_exponent_temp = signExtend(lv_actual_product_exponent); + //`ifdef verbose $display("lv_actual_product_exponent_temp : %d",lv_actual_product_exponent_temp); `endif + rg_state_handler <= Stage2; + + // lv_product_is_subnormal construct is like a flag which can be used in difficult situations + // bit lv_product_is_subnormal = 0; + + bit lv_sticky = lv_product_mantissa[0]; + `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif + + /* + if exponent is > bias then obviously none of the numbers are subnormal + so the product is of the form 1x.xxxx or 01.xxxx + the overflow conditions are handled in the following if condition accordingly + */ + + `ifdef verbose $display("lv_actual_product_exponent = %d",lv_actual_product_exponent); `endif + bit exp_overflow_bit = pack(lv_actual_product_exponent)[fPEXP]; //Says if Exponent Overflows + bit exp_underflow_bit = pack(lv_actual_product_exponent)[fPEXP+1]; //Says if Exponent Underflows + Bit#(fpexp) expo_temp = pack(lv_actual_product_exponent)[fPEXP-1:0]; + bit exp_and = &(expo_temp); //Says if Exponent is equal to Bias + bit is_msb_zeros = |(msb_zeros); + + //Change-1 -- Reducing the size of the Muxes from EXP size to just a bunch of 1bits and a Or-tree + //Change-1 was wrong apparently, according to Paranoia!! Should see why! Rolling back + //if((exp_overflow_bit==1'b1 && exp_underflow_bit==1'b0) || (is_msb_zeros==1'b0 && exp_and==1'b1)) begin + if(lv_actual_product_exponent > zeroExtend(unpack(bias)) || (msb_zeros == 0 && lv_actual_product_exponent == zeroExtend(unpack(bias)))) begin + if(muladd == 0 ||(muladd==1 && ((lv_product_sign^lv_operand3[fPINP-1]^operation) == 0))) + lv_product_overflow = 1; + //When the product overflows, the FMA result is an overflow + `ifdef verbose $display("lv_product_overflow!!!"); `endif + end + + /* + -lowest_exp = -denormal_bias -mantissa_size -2 + -2 is for the implicit bit and the carry bit + i.e. if all the bits are shifted out then its an underflow + */ + + else begin + //Thought-1 -- Can something be done to reduce the countZerosMSB and countZerosLSB + //Cannot reduce this mux to 1-bit but can reduce size since it's unwanted + if(lv_actual_product_exponent < unpack(-zeroExtend(bias)-fromInteger(fPMAN)-1)) begin + //if(lv_actual_product_exponent_temp < unpack(-bias_temp-fromInteger(fPMAN)-1)) begin + if((muladd == 1'b0 || (muladd==1'b1 && (add_flags[3]==1'b1 || add_flags[4]==1'b1))) && lv_product_is_zero == 1'b0) + lv_product_underflow = 1; + `ifdef verbose $display("lv_product_underflow!!!"); `endif + end + /* + if msb of product is 1 then the case is 1x.xxxx + product is shifted right once to make it 01.xxxx + we don't care what is the exponent, just increase it by one + actual exponent is also increased by one since exponent is increased by one + this increasing of exponent leading to overflow is handled in the overflow case + msb_zeros is increased for further arising conditions + */ + //Change-4 Using the previously computed msb_zeros. Synthesis will detect this anyhow, but still. Fanout? + if(is_msb_zeros==1'b0) begin + //if(msb_zeros == 0) begin + lv_product_mantissa = lv_product_mantissa >> 1; + lv_product_exponent = lv_product_exponent + 1; + lv_actual_product_exponent = lv_actual_product_exponent + 1; + msb_zeros = msb_zeros + 1; + end + // possible shift is positive when exponent is lesser than -126 + + //Change-5 Possible shift needn't use lv_actual_product_exponent -- It's enough if exponent is used I guess +// Int#(fpexp2) possible_shift = 1-zeroExtend(unpack(bias))-(lv_actual_product_exponent); + Int#(fpexp2) possible_shift = 1-unpack(lv_product_exponent); + + //Experiment-1 -- Do all the operations parallely and use the if-else for just assignments + lsb_zeros = pack(countZerosLSB(lv_product_mantissa)); + let lv_product_mantissa_shiftR = (lv_product_mantissa >> pack(possible_shift)); + //lv_product_mantissa_shiftR = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0] | lv_sticky}; + let lv_product_exponent_inc_shift = lv_product_exponent + pack(possible_shift); + + let shift_neg = ~pack(possible_shift)+1; + + let lv_product_mantissa_shiftL_expo = lv_product_mantissa << (shift_neg); + let lv_product_exponent_sub_shift = lv_product_exponent - (shift_neg); + + let lv_product_mantissa_shiftL_zerosMSB = lv_product_mantissa << (msb_zeros - 1); + let lv_product_exponent_sub_zerosMSB = lv_product_exponent - (zeroExtend(msb_zeros) - 1); + + /* + msb_zeros = 1 when + i) the product is 1x.xxxx and shifted right once + ii) the product is 01.xxxx already + if possible_shift is negative or zero, it means that exponent is -126 or greater + and thus the product is already normalized + but if possible_shift is positive, it means that exponent is < -126 + and thus product is shifted right to make exponent -126 and the result is subnormal + */ + if(possible_shift > 0) begin + //Setting sticky if all lsb zeros are removed out + + //Is there a better logic for this? Since, lsb_zeros is a big if-else logic + //lsb_zeros = pack(countZerosLSB(lv_product_mantissa)); + if(possible_shift > unpack(zeroExtend(lsb_zeros)) || lv_product_mantissa[0] == 1) + lv_sticky = 1; + + lv_product_mantissa = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0]|lv_sticky}; + lv_sticky = lv_product_mantissa[0]; + lv_product_exponent = lv_product_exponent_inc_shift; + + `ifdef verbose $display("possible_shift",possible_shift); `endif + /*if(mul==1 && lv_product_is_zero==0) + lv_product_underflow = 1;*/ + //Handling sticky + + `ifdef verbose $display("lv_product_exponent : %d bin : %b",lv_product_exponent,lv_product_exponent); `endif + `ifdef verbose $display("lv_product_mantissa = %b lv_product_exponent : %d since exp < -126", lv_product_mantissa,lv_product_exponent); `endif + `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif + // lv_product_is_subnormal = 1; + end + + /* + msb_zeros != 1 means product is of the form 00.xxxx, important case + */ + else if(msb_zeros != 'b1) begin + /* + if possible shift is < the number of leading zeros then the number can't be made normal + */ + if((shift_neg) < zeroExtend(msb_zeros - 1)) begin + lv_product_mantissa = lv_product_mantissa_shiftL_expo; + lv_product_exponent = lv_product_exponent_sub_shift; + // lv_product_is_subnormal = 1; + end + /* + if exponent affords to give away enough such that shifting left leads to 01.xxxx and exponent >= -126 + */ + else begin + lv_product_mantissa = lv_product_mantissa_shiftL_zerosMSB; + lv_product_exponent = lv_product_exponent_sub_zerosMSB; + // lv_product_is_subnormal = 0; + end + end + end + ff_stage2 <= Stage2_data_type{ + lv_product_sign : lv_product_sign, + lv_negate : lv_negate, + lv_product_exponent : lv_product_exponent, + lv_product_mantissa : lv_product_mantissa, + lv_operand3 : lv_operand3, + add_flags : add_flags, + operation : operation, + mul : mul, + muladd : muladd, + rounding_mode : rounding_mode, + lv_product_is_invalid : lv_product_is_invalid, + lv_product_is_zero : lv_product_is_zero, + lv_product_is_infinity : lv_product_is_infinity, + lv_product_overflow : lv_product_overflow, + lv_product_underflow : lv_product_underflow, + quiet_nan_two : quiet_nan_two + }; + + + endrule + + rule rl_stage_3(rg_state_handler == Stage2 && !wr_flush); + + rg_state_handler <= Stage3; + let lv_negate = ff_stage2.lv_negate; + let lv_product_exponent = ff_stage2.lv_product_exponent; + let lv_product_mantissa = ff_stage2.lv_product_mantissa; + let lv_operand3 = ff_stage2.lv_operand3; + let add_flags = ff_stage2.add_flags; + let operation = ff_stage2.operation; + let mul = ff_stage2.mul; + let muladd = ff_stage2.muladd; + let lv_product_sign = ff_stage2.lv_product_sign; + let lv_product_is_invalid = ff_stage2.lv_product_is_invalid; + let lv_product_is_zero = ff_stage2.lv_product_is_zero; + let lv_rounding_mode = ff_stage2.rounding_mode; + let lv_product_is_infinity = ff_stage2.lv_product_is_infinity; + let lv_product_overflow = ff_stage2.lv_product_overflow; + let lv_product_underflow = ff_stage2.lv_product_underflow; + let quiet_nan_two = ff_stage2.quiet_nan_two; + let inp_denormal = ff_stage2.inp_denormal; + //ff_stage2 <= tagged Invalid; + + Bit#(1) sign2 = lv_product_sign ^ lv_negate; + Bit#(fpexp2) exponent2 = lv_product_exponent; + Bit#(fmaman) mantissa2 = zeroExtendLSB(lv_product_mantissa); + Bit#(1) sign3 = lv_operand3[fPINP-1] ^ lv_negate; + Bit#(fpexp2) exponent3 = {2'b0, lv_operand3[fPINP-2:fPMAN]}; + Bit#(fmaman) mantissa3 = 0; + Bit#(fpman) lv_man3 = lv_operand3[fPMAN-1:0]; + Bit#(fpexp) lv_exp_max = '1; + bit lv_op3_is_invalid = add_flags[2] | add_flags[0]; + bit lv_op3_is_infinity = add_flags[1]; + bit lv_op3_is_zero = add_flags[3]; + bit op3_is_subnormal = add_flags[4]; + bit quiet_nan_three = add_flags[2]; + bit expo3_zero = |exponent3; + //Change-7 Replaced all instances of operation^sign3 with op_xor_sign3 + bit op_xor_sign3 = operation ^ sign3; + + //Change-6 Avoiding exponent3==0 mux, but does that help? + if(lv_op3_is_infinity==0 && lv_op3_is_invalid ==0 && lv_op3_is_zero==0) begin + mantissa3 = zeroExtendLSB({1'b0,expo3_zero,lv_man3}); + /* if(exponent3 == '0) + mantissa3 = zeroExtendLSB({2'b0,lv_man3}); + else + mantissa3 = zeroExtendLSB({2'b01,lv_man3});*/ + end + + exponent3 = exponent3 + zeroExtend(op3_is_subnormal); + + Bit#(1) lv_result_is_invalid = 0; + Bit#(2) lv_result_is_infinity = 0; + Bit#(2) lv_result_is_zero = 0; + + if(quiet_nan_two == 0 && lv_product_is_invalid == 1) + quiet_nan_three = 0; //0*inf case + + //Result is invalid cases + if(lv_op3_is_invalid == 1 || lv_product_is_invalid == 1) + lv_result_is_invalid = 1; + + //Result is zero cases + else if(lv_op3_is_zero == 1 && lv_product_is_zero == 1) begin + if(mul==0) begin + if((lv_rounding_mode == 'b010) && (sign2 | (op_xor_sign3)) == 1) + lv_result_is_zero = 2'b11; + else if((lv_rounding_mode != 'b010) && (sign2 & (op_xor_sign3)) == 1) + lv_result_is_zero = 2'b11; + else begin + if(sign2 == 0) + lv_result_is_zero = 2'b01; + else + lv_result_is_zero = {op_xor_sign3,1'b1}; + end + end + else + lv_result_is_zero = {sign2,1}; + end + + //Result is infinity cases + else if(lv_product_is_infinity == 1 && lv_op3_is_infinity == 1) begin + lv_result_is_infinity = {sign2, ~(sign2 ^ (op_xor_sign3))}; + lv_result_is_invalid = ~lv_result_is_infinity[0]; + quiet_nan_two = 0; //inf * qNaN + inf case + end + else if(lv_product_is_infinity == 1 || lv_op3_is_infinity == 1) begin + lv_result_is_infinity = {((lv_product_is_infinity & ~lv_op3_is_infinity) & sign2) | ((~lv_product_is_infinity & lv_op3_is_infinity) & (op_xor_sign3)), 1}; + end + if(lv_product_is_zero == 1) begin + exponent2 = '0; + mantissa2 = '0; + end + + Bit#(fpexp2) lv_minuend, lv_subtrahend; + Bit#(fpexp2) exponent_difference = '0; + Bit#(fpexp2) resultant_exponent = '0; + bit op2_gt_op3 = 0; + + Bit#(fmaman) mantissa_to_shift; + let lv_zeros_on_right; + bit lv_sticky = 0; + + if(exponent2 > exponent3) begin + lv_minuend = exponent2; + lv_subtrahend = exponent3; + mantissa_to_shift = mantissa3; + op2_gt_op3 = 1; + end + else begin + lv_minuend = exponent3; + lv_subtrahend = exponent2; + mantissa_to_shift = mantissa2; + op2_gt_op3 = 0; + end + + resultant_exponent = lv_minuend; + exponent_difference = lv_minuend - lv_subtrahend; + lv_zeros_on_right = zeroExtend(pack(countZerosLSB(mantissa_to_shift))); + Bit#(1) shifted_operand_zero = (mantissa_to_shift == '0) ? 1:0; + mantissa_to_shift = mantissa_to_shift >> exponent_difference; + + //Handling sticky + if(((lv_zeros_on_right < exponent_difference) || (mantissa_to_shift[0] == 1)) && shifted_operand_zero != 1) + lv_sticky = 1; + + mantissa_to_shift = {mantissa_to_shift[fMAMAN-1:1], lv_sticky}; + + if(op2_gt_op3 == 1) begin + mantissa3 = mantissa_to_shift; + end + else begin + mantissa2 = mantissa_to_shift; + end + quiet_nan_two = quiet_nan_two & ~add_flags[0]; + `ifdef verbose $display("sign2 = %b exponent2 = %b mantissa2 = %b", sign2, resultant_exponent, mantissa2); `endif + `ifdef verbose $display("sign3 = %b exponent3 = %b mantissa3 = %b", sign3, resultant_exponent, mantissa3); `endif + `ifdef verbose $display(); `endif + bit man2_gt_man3 = 0; + if(mantissa2 > mantissa3) man2_gt_man3 = 1; //Can this be optimized? + bit lv_resultant_sign = (man2_gt_man3 & sign2) | (~man2_gt_man3 & (operation ^ sign3)); // Using Karnaugh maps + bit actual_operation = sign2 ^ (operation ^ sign3); // 0 for addition 1 for subtraction //Can this be pushed back to the prev. stage, saving 1 bit + + ff_stage4<= Stage4_data_type{ + lv_resultant_sign : lv_resultant_sign, + actual_operation : actual_operation, + mantissa2 : mantissa2, + mantissa3 : mantissa3, + man2_gt_man3 : man2_gt_man3, + resultant_exponent : resultant_exponent, + rounding_mode : lv_rounding_mode, + result_is_invalid : lv_result_is_invalid, + result_is_infinity : lv_result_is_infinity, + result_is_zero : lv_result_is_zero, + product_overflow : lv_product_overflow, + product_underflow : lv_product_underflow, + quiet_nan_two : quiet_nan_two, + quiet_nan_three : quiet_nan_three, + lv_product_is_zero : lv_product_is_zero + }; + endrule:rl_stage_3 + + rule rl_stage4(rg_state_handler == Stage3 && !wr_flush); + rg_state_handler <= Stage4; + let lv_resultant_sign = ff_stage4.lv_resultant_sign; + let man2_gt_man3 = ff_stage4.man2_gt_man3; + let mantissa2 = ff_stage4.mantissa2; + let mantissa3 = ff_stage4.mantissa3; + let actual_operation = ff_stage4.actual_operation; + Bit#(fpexp2) resultant_exponent = ff_stage4.resultant_exponent; + Bit#(3) lv_rounding_mode = ff_stage4.rounding_mode; + bit lv_result_is_invalid = ff_stage4.result_is_invalid; + Bit#(2) lv_result_is_infinity = ff_stage4.result_is_infinity; + Bit#(2) lv_result_is_zero = ff_stage4.result_is_zero; + bit lv_product_overflow = ff_stage4.product_overflow; + let quiet_nan_two = ff_stage4.quiet_nan_two; + let quiet_nan_three = ff_stage4.quiet_nan_three; + let lv_product_underflow = ff_stage4.product_underflow; + let lv_product_is_zero = ff_stage4.lv_product_is_zero; + //ff_stage4 <= tagged Invalid; + + Bit#(fmaman) resultant_mantissa = 0; + Bit#(fmaman) add_mantissa = mantissa2 + mantissa3; + + //Serial Path? + Bit#(fmaman) sub_mantissa1 = (man2_gt_man3==1)? mantissa2 : mantissa3; + Bit#(fmaman) sub_mantissa2 = (man2_gt_man3==1)? mantissa3 : mantissa2; + Bit#(fmaman) sub_mantissa = sub_mantissa1 - sub_mantissa2; + + + if(actual_operation == 0) + resultant_mantissa = add_mantissa; + else + resultant_mantissa = sub_mantissa; + + //Case when Mantissa2 = Mantissa3 and hence the result is zero + Bit#(2) add_sub_is_zero = 0; + + if(resultant_mantissa == '0) begin + if(lv_rounding_mode == 3'b010) begin + add_sub_is_zero = 2'b11; + end + else begin + add_sub_is_zero = 2'b01; // checks the resultant mantissa for zero + end + end + + let lv_zeros_on_left = pack(countZerosMSB(resultant_mantissa)); + ff_stage5 <= Stage5_data_type{ + resultant_mantissa : resultant_mantissa, + add_sub_is_zero : add_sub_is_zero, + lv_resultant_sign : lv_resultant_sign, + resultant_exponent : resultant_exponent, + lv_rounding_mode : lv_rounding_mode, + lv_result_is_invalid : lv_result_is_invalid, + lv_result_is_infinity : lv_result_is_infinity, + lv_result_is_zero : lv_result_is_zero, + lv_product_overflow : lv_product_overflow, + quiet_nan_two : quiet_nan_two, + quiet_nan_three : quiet_nan_three, + lv_product_underflow : lv_product_underflow, + lv_product_is_zero : lv_product_is_zero, + lv_zeros_on_left : lv_zeros_on_left + }; + + endrule + + + + rule rl_stage_5_final_stage(rg_state_handler == Stage4 && !wr_flush); + Bit#(fmaman) resultant_mantissa = ff_stage5.resultant_mantissa; + let add_sub_is_zero = ff_stage5.add_sub_is_zero; + let resultant_exponent = ff_stage5.resultant_exponent; + let lv_resultant_sign = ff_stage5.lv_resultant_sign; + let lv_rounding_mode = ff_stage5.lv_rounding_mode; + let lv_result_is_invalid = ff_stage5.lv_result_is_invalid; + let lv_result_is_infinity = ff_stage5.lv_result_is_infinity; + let lv_result_is_zero = ff_stage5.lv_result_is_zero; + let lv_product_overflow = ff_stage5.lv_product_overflow; + let quiet_nan_two = ff_stage5.quiet_nan_two; + let quiet_nan_three = ff_stage5.quiet_nan_three; + let lv_product_underflow = ff_stage5.lv_product_underflow; + let lv_product_is_zero = ff_stage5.lv_product_is_zero; + let lv_zeros_on_left = ff_stage5.lv_zeros_on_left; + bit add_sub_subnormal = 0; + //ff_stage5 <= tagged Invalid; + rg_state_handler <= Begin; + bit lv_sticky = resultant_mantissa[0]; + + //change-x+1 + let resultant_exponent_sub = resultant_exponent -1; + let resultant_mantissa_unnormalized = resultant_mantissa >> 1; + let resultant_exponent_inc = resultant_exponent + 1; + let resultant_mantissa_norm_expo = resultant_mantissa << resultant_exponent_sub; + let resultant_mantissa_norm_zerosMSB = resultant_mantissa << (lv_zeros_on_left - 1); + let resultant_exponent_sub_zerosMSB = resultant_exponent - ((zeroExtend(lv_zeros_on_left)) - 1); + + + if(resultant_mantissa[fMAMAN-1] == 1'b1) begin + //resultant_mantissa = resultant_mantissa >> 1; + resultant_mantissa = {resultant_mantissa_unnormalized[fMAMAN-1:1], lv_sticky | resultant_mantissa_unnormalized[0]}; + resultant_exponent = resultant_exponent_inc; + //resultant_exponent = resultant_exponent + 1; + end + + else if(resultant_mantissa[fMAMAN-2] != 1'b1) begin + if((zeroExtend(lv_zeros_on_left) - 1) > resultant_exponent_sub) begin + //if((zeroExtend(lv_zeros_on_left) - 1) > (resultant_exponent - 1)) begin + `ifdef verbose $display("resultant_exponent : %d",resultant_exponent); `endif + //resultant_mantissa = resultant_mantissa << (resultant_exponent - 1); + resultant_mantissa = resultant_mantissa_norm_expo; + resultant_exponent = 0; + `ifdef verbose $display("add_sub subnormal!!!"); `endif + add_sub_subnormal = 1; + end + else begin + //resultant_mantissa = resultant_mantissa << (lv_zeros_on_left - 1); + //resultant_exponent = resultant_exponent - (zeroExtend(lv_zeros_on_left) - 1); + resultant_mantissa = resultant_mantissa_norm_zerosMSB; + resultant_exponent = resultant_exponent_sub_zerosMSB; + end + end + + + `ifdef verbose $display("resultant_exponent : %b",resultant_exponent); `endif + Bit#(TSub#(fpexp,1)) bias = '1; + bit ex_overflow = 0; + Int#(fpexp2) res_exp_int = unpack(resultant_exponent) - zeroExtend(unpack(bias)); + `ifdef verbose $display("resultant_exponent : %d res_exp_int : %d",resultant_exponent, res_exp_int); `endif + + if(res_exp_int > zeroExtend(unpack(bias))) begin + lv_product_overflow = 1; + ex_overflow = 1; + end + /* else if(res_exp_int == zeroExtend(unpack(bias))) + ex_overflow = 1;*/ + else if(resultant_exponent[fPEXP+1] == 1 && lv_product_is_zero == 0) begin + lv_product_underflow = 1; + `ifdef verbose $display("Underflow"); `endif + end + /*`ifdef verbose $display("resultant_sign = %b resultant_exponent = %b resultant_mantissa = %b", resultant_sign, resultant_exponent, resultant_mantissa); `endif + `ifdef verbose $display(); `endif + */ + Bit#(TAdd#(fpman,2)) lv_rounded_mantissa = resultant_mantissa[fMAMAN-1:iMPFPMAN2]; + Bit#(2) lv_res_man = resultant_mantissa[fMAMAN-1:fMAMAN-2]; + Bit#(TSub#(impfpman2,2)) lv_res1 = resultant_mantissa[iMPFPMAN2-3:0]; + bit lv_guard = resultant_mantissa[iMPFPMAN2-1]; + bit lv_round = resultant_mantissa[iMPFPMAN2-2]; + lv_sticky = |lv_res1; + bit lv_round_up = 0; + bit lv_inexact = lv_guard | lv_round | lv_sticky; + + if(lv_rounding_mode == 'b000) + lv_round_up = lv_guard & (resultant_mantissa[iMPFPMAN2] | lv_round | lv_sticky); + else if(lv_rounding_mode == 'b100) + lv_round_up = lv_guard ;//& (lv_round | lv_sticky | ~lv_resultant_sign); + else if(lv_rounding_mode == 'b010) + lv_round_up = lv_inexact & (lv_resultant_sign); + else if(lv_rounding_mode == 'b011) + lv_round_up = lv_inexact & (~lv_resultant_sign); + + if(add_sub_subnormal == 1 && lv_inexact == 1) + lv_product_underflow = 1; + + `ifdef verbose $display("lv_guard = %b lv_round = %b lv_sticky = %b", lv_guard, lv_round, lv_sticky); `endif + `ifdef verbose $display("lv_round_up = %b", lv_round_up); `endif + `ifdef verbose $display("lv_rounded_mantissa = %b", lv_rounded_mantissa); `endif + + if(lv_round_up == 1) + lv_rounded_mantissa = lv_rounded_mantissa + 1; + + `ifdef verbose $display("lv_rounded_mantissa = %b after roundup", lv_rounded_mantissa); `endif + + if(lv_rounded_mantissa[fPMAN+1] == 1) begin + resultant_exponent = resultant_exponent + 1; + lv_rounded_mantissa = lv_rounded_mantissa >> 1; + end + else if(lv_res_man == 'b0 && lv_rounded_mantissa[fPMAN] == 1) begin + resultant_exponent = resultant_exponent + 1; + end + + Bit#(fpexp) lv_res_exp_temp = resultant_exponent[fPEXP-1:0]; + Bit#(fpman) man_all_zeros = '0; + Bit#(TSub#(fpman,1)) man1_all_zeros = '0; + Bit#(fpman) man_all_ones = '1; + Bit#(fpexp) exp_all_zeros = '0; + Bit#(TSub#(fpexp,1)) exp_all_ones_1 = '1; + Bit#(fpinp) lv_final_output = 0; + Bit#(fpexp) exp_all_ones = '1; + Bit#(fpexp) out_exp = resultant_exponent[fPEXP-1:0]; + Bit#(fpman) out_man = lv_rounded_mantissa[fPMAN-1:0]; + + + //Can I put these invalid, infinity, zero, cases in the first stage which will clear some of the paths???? + if(lv_result_is_invalid == 1) begin + lv_final_output = {1'b0, exp_all_ones,1'b1, man1_all_zeros}; + end + else if(lv_result_is_infinity[0] == 1) begin + lv_final_output = {lv_result_is_infinity[1], exp_all_ones, man_all_zeros}; + ex_overflow = 0; lv_product_underflow = 0; lv_inexact = 0; + end + else if(lv_result_is_zero[0] == 1) begin + lv_final_output = {lv_result_is_zero[1],exp_all_zeros, man_all_zeros}; + end + else if(add_sub_is_zero[0] == 1) begin + lv_final_output = {add_sub_is_zero[1], exp_all_zeros , man_all_zeros}; + end + else if(lv_product_overflow == 1 || lv_res_exp_temp == '1) begin + lv_inexact = 1; + ex_overflow = 1; + if(lv_rounding_mode == 'b001) + lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //?? + else if(lv_rounding_mode == 'b010 && lv_resultant_sign == 0) + lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //?? + else if(lv_rounding_mode == 'b011 && lv_resultant_sign == 1) + lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //?? + else begin + lv_final_output={lv_resultant_sign,exp_all_ones,man_all_zeros}; + end + end + else begin + lv_final_output = {lv_resultant_sign, out_exp, out_man}; + end + + if(lv_product_underflow == 1'b1 && lv_rounded_mantissa[fPMAN]==1'b1 && lv_rounding_mode!=3'b011) //Tininess vanishing after rounding + lv_product_underflow = 0; + + if(lv_result_is_invalid == 1) begin //For effectively handling the flag cases between add,sub,mul and fused mul add + ex_overflow = 0; + lv_inexact = 0; + lv_product_underflow = 0; + if(quiet_nan_two == 1 || quiet_nan_three == 1) + lv_result_is_invalid = 0; + end + + Bit#(5) fflags={lv_result_is_invalid,1'b0,ex_overflow,lv_product_underflow,lv_inexact}; + `ifdef verbose $display("lv_inv : %b ex_overflow: %b lv_inexact : %b",lv_result_is_invalid,ex_overflow,lv_inexact); `endif + ff_final_out <= Floating_output{ + final_result : lv_final_output, + fflags : fflags + }; + + `ifdef verbose $display("FMA: Result: %h fflags: %8h",lv_final_output, {24'b0,fflags}); `endif + endrule + + method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + + + Bit#(TSub#(fpexp,1)) bias = '1; //Bias for the exponent: 127 for SP and 1023 for DP + Bit#(1) sign1 = tpl_1(_operand1); + Bit#(1) sign2 = tpl_1(_operand2); + Bit#(1) sign3 = tpl_1(_operand3); + Bit#(fpexp) lv_exponent1 = tpl_2(_operand1); + Bit#(fpexp) lv_exponent2 = tpl_2(_operand2); + Bit#(fpexp) lv_exponent3 = tpl_2(_operand3); + Bit#(fpman) lv_mantissa1 = tpl_3(_operand1); + Bit#(fpman) lv_mantissa2 = tpl_3(_operand2); + Bit#(fpman) lv_mantissa3 = tpl_3(_operand3); + Bit#(5) flags1 = tpl_1(flags); + Bit#(5) flags2 = tpl_2(flags); + Bit#(5) flags3 = tpl_3(flags); + Bit#(1) lv_op1_is_zero = flags1[3]; //1 when operand1=0 + Bit#(1) lv_op2_is_zero = flags2[3]; //1 when operand2=0 + Bit#(1) lv_op1_infinity = flags1[1]; //1 when operand1=inf + Bit#(1) lv_op2_infinity = flags2[1]; //1 when operand2=inf + Bit#(1) lv_op1_subnormal = flags1[4] | flags1[3]; //1 when operand1 is subnormal + Bit#(1) lv_op2_subnormal = flags2[4] | flags2[3]; //1 when operand2 is subnormal + Bit#(1) lv_inf = 0; //Bit indicating infinity + Bit#(1) lv_inv = 0; //Invalid Bit + Bit#(1) lv_zero = 0; //Zero bit + bit quiet_nan_two = (flags1[2] & ~flags2[0]) | (flags2[2] & ~flags1[0]); + + if((((flags1[0] | flags1[2])==1) || (flags2[0] | flags2[2])==1)) //If either of the operands are NaN's (Quiet or Signalling - Not distinguishing between them here) + lv_inv = 1; + else if(lv_op1_infinity==1 || lv_op2_infinity==1) begin //If either of the operands are Infinity + if(lv_op1_is_zero == 1 || lv_op2_is_zero ==1) begin //Provided atleast one of the operands are infinity, if either of them are zero, then res is NaN (0*inf) + lv_inv = 1; + end + else begin + lv_inf = 1; //Else result is infinity - inf +/- op2 = inf + quiet_nan_two = 0; + end + end + else if(lv_op1_is_zero == 1 || lv_op2_is_zero == 1) + lv_zero = 1; //If they are not infinity - Checked for Zero, if it is then product is zero (0*x = 0) + + + `ifdef verbose $display("lv_inv : %h lv_inf : %h lv_zero : %h",lv_inv,lv_inf,lv_zero); `endif + `ifdef verbose $display("flags1 : %b flags2 : %b flags3 : %b",flags1,flags2,flags3); `endif + + /* + When normal and denormal number is multiplied, exponent is + (biased_exponent - bias) + (1 - bias) + bias = biased_exponent - bias + 1; + either _operand1[30:23] == 0 or _operand2[30:23] == 0 for the above if condition so no harm in adding both + */ + + Bit#(fpexp2) exp1_temp = {2'b0,lv_exponent1}; + Bit#(fpexp2) exp2_temp = {2'b0,lv_exponent2}; + Bit#(fpexp2) lv_summed_exponent = exp1_temp + exp2_temp - zeroExtend(bias) + zeroExtend(lv_op1_subnormal) + zeroExtend(lv_op2_subnormal); + Bit#(1) lv_sign = sign1 ^ sign2; + + `ifdef verbose $display("lv_summed_exponent = %b", lv_summed_exponent/*, lv_actual_exponent*/); `endif + + Bit#(impfpman2) x = zeroExtend({~lv_op1_subnormal, lv_mantissa1})*zeroExtend({~lv_op2_subnormal, lv_mantissa2}); //Single Cycle Int Mul + rg_state_handler <= Stage1; + ff_input_register<= Input_data_type{ + product_mantissa : x, + lv_summed_exponent : lv_summed_exponent, + sign : lv_sign, + _operand3 : {sign3,lv_exponent3,lv_mantissa3}, + rounding_mode : rounding_mode, + infinity : lv_inf, + add_flags : flags3, + invalid : lv_inv, + zero : lv_zero, + _operation : operation, + _negate : _negate, + mul : mul, + muladd : muladd, + quiet_nan_two : quiet_nan_two, + inp_denormal : lv_op1_subnormal | lv_op2_subnormal + }; + endmethod + + + method Floating_output#(fpinp) get_result(); + return ff_final_out; + endmethod + method Action flush; + wr_flush <= True; + endmethod +endmodule + + +module mkTb_fpu_fm_add_sub(Empty); + + Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub(); + + function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z); + let s = valueOf(m); + let man1 = tpl_1(x); + let expo1 = tpl_2(x); + let man2 = tpl_1(y); + let expo2 = tpl_2(y); + let man3 = tpl_1(z); + let expo3 = tpl_2(z); + Bit#(5) flags1, flags2,flags3; + Bool expZ1 = (expo1 == 0); + Bool manZ1 = (man1 == 0); + Bool expO1 = (expo1 == '1); + Bool manO1 = (man1 == '1); + Bool topB1 = (man1[s-1] == 1); + Bool expZ2 = (expo2 == 0); + Bool manZ2 = (man2 == 0); + Bool expO2 = (expo2 == '1); + Bool manO2 = (man2 == '1); + Bool topB2 = (man2[s-1] == 1 && man2 !=0); + Bool expZ3 = (expo3 == 0); + Bool manZ3 = (man3 == 0); + Bool expO3 = (expo3 == '1); + Bool manO3 = (man3 == '1); + Bool topB3 = (man3[s-1] == 1 && man3 !=0); + flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN + return tuple3(flags1,flags2,flags3); + endfunction + + function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let expo = valueOf(e); + let man = valueOf(m); + return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]); + endfunction + + function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let inp = valueOf(n); + let man = valueOf(m); + return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]); + endfunction + + function Bool isNaNBox(Bit#(64) op); + return (op[63:32]=='1); + endfunction + + function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3); + return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000); + endfunction + + Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN); + + Reg#(Bit#(32)) rg_clock <-mkReg(0); + //Reg#(Bit#(64)) operand1 <- mkReg(64'h17fffffffffff860); + //Reg#(Bit#(64)) operand2 <- mkReg(64'h0000000000000200); + //Reg#(Bit#(64)) operand3 <- mkReg(64'h000000000000005f); + Reg#(Bit#(32)) operand1 <- mkReg(32'h31f36ab4); + Reg#(Bit#(32)) operand2 <- mkReg(32'h08835f4d); + Reg#(Bit#(32)) operand3 <- mkReg(32'h0); + + rule rl_count_clock ; + rg_clock<=rg_clock+1; + if(rg_clock=='d20) $finish(0); + endrule + + rule rl_input1(rg_clock==1); + let {man1,man2,man3} <- getMant32.func(operand1,operand2, operand3); + let {exp1,exp2,exp3} <- getExp32.func(operand1,operand2, operand3); + let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3)); + let sign1 = operand1[31]; + let sign2 = operand2[31]; + let sign3 = operand3[31]; + uut._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),3'b0,1'b0,1'b0,1'b0,1'b1,x); +`ifdef verbose $display("giving inputs at %0d", rg_clock); `endif + + endrule + + rule rl_finish; + let res = uut.get_result(); + `ifdef verbose $display("Output = %h at %0d",res.final_result[31:0], rg_clock); `endif + endrule + +endmodule + +`ifdef fpu_hierarchical +(*synthesize*) +module mkfpu_fm_add_sub32(Ifc_fpu_fm_add_sub32); + Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub(); + + method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd,flags); + endmethod + method Floating_output#(32) get_result(); + return uut.get_result(); + endmethod + method Action flush; + uut.flush(); + endmethod +endmodule + +(*synthesize*) +module mkfpu_fm_add_sub64(Ifc_fpu_fm_add_sub64); + Ifc_fpu_fm_add_sub#(64,52,11) uut <- mkfpu_fm_add_sub(); + method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags); + uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd, flags); + endmethod + method Floating_output#(64) get_result(); + return uut.get_result(); + endmethod + method Action flush; + uut.flush(); + endmethod +endmodule +`endif + +//module mkTb_fpu_fm_add_sub_2 (Empty); +// +//// RegFile #(Bit #(16), Bit #(100)) input_data <- mkRegFileFullLoad("./testcases/fma_inp_nor.txt"); +//// RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/mul_denormal_testcases.txt"); +// RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/Add_normal_testcases.hex"); +// Reg #(Bit #(16)) index <- mkReg(0); +// +// Ifc_fpu_fm_add_sub#(32,23,8,16) multiplier <- mkfpu_fm_add_sub(); +// Reg #(Bit #(32)) state_clock <- mkReg(1); +// Reg #(Bit #(1)) rg_state <- mkReg(0); +// +// Reg#(int) cnt <- mkReg(0); //File Variable +// let fh <- mkReg(InvalidFile) ; //File handler +// +// //rule for file creation +// rule open (cnt == 0 ) ; +// File tb_mul_output <- $fopen("tb_madd_output.hex", "w+"); +// fh <= tb_mul_output; +// cnt <= 1 ; +// endrule +// +// rule state_clock_count; +// state_clock <= state_clock + 1; +// endrule +// +// rule take_input_in (rg_state == 0); +// // multiplier._start(input_data.sub(index)[99:68],input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0); +// // multiplier._start(input_data.sub(index)[67:36],input_data.sub(index)[35:4],32'b0,0,input_data.sub(index)[2:0],0,0); +// multiplier._start(32'h3f800000, input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0); +// index <= index + 1; +// rg_state <= 1; +// endrule +// +// rule display_output (rg_state == 1); +// let abc = multiplier.get_result(); +// $fwrite(fh, "%h\n", abc.final_result[31:0]); +// rg_state <= 0; +// endrule +// +// rule end_testing (index == 16562); +// $finish(0); +// endrule : end_testing +// +//endmodule + +endpackage diff --git a/src/core/fpu/fpu_int_to_dp.bsv b/src/core/fpu/fpu_int_to_dp.bsv new file mode 100644 index 0000000..af10fc2 --- /dev/null +++ b/src/core/fpu/fpu_int_to_dp.bsv @@ -0,0 +1,140 @@ +/* +Authors : Vinod.G +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ +package fpu_int_to_dp; +import defined_types ::*; +import UniqueWrappers::*; +`include "defined_parameters.bsv" +//TODO Rework and optimize +function Bit#(m) zeroExtendLSB(Bit#(n) value) + provisos(Add#(a__, n, m)); + + Bit#(m) resp = 0; + resp[valueOf(m)-1:valueOf(m)-valueOf(n)] = value; + return resp; +endfunction + +function Bit#(m) truncateLSB(Bit#(n) value); + return value[valueOf(n)-1:valueOf(n)-valueOf(m)]; +endfunction + +interface Ifc_fpu_int_to_dp; + method ActionValue#(Floating_output#(64)) _start(Bit#(64) inp_int, Bit#(1) unsigned_bit, Bit#(1) long, Bit#(3) rounding_mode); +endinterface + + + function Bit#(69) roundFunc(Bit#(64) unrounded, Bit#(11) expo, Bit#(3) rounding_mode); + bit guard = unrounded[10]; + bit round = unrounded[9]; + bit sticky = 0; + bit sign = unrounded[63]; + Bit#(11) local_expo = expo; + Bit#(9) sticky_check = unrounded[8:0]; + if(sticky_check != '0) + sticky = 1; + bit inexact = (guard | round | sticky); + bit lv_roundup = 0; + Bit#(54) lv_man = {2'b0,unrounded[62:11]}; + if(rounding_mode == 'b000) + lv_roundup = guard & (unrounded[11] | round | sticky); + else if (rounding_mode == 'b100) + lv_roundup = guard; //& (round | sticky | ~sign); + else if (rounding_mode == 'b011) + lv_roundup = (guard | round | sticky) & (~sign); + else if (rounding_mode == 'b010) + lv_roundup = (guard | round | sticky) & (sign); + if(lv_roundup == 1) + lv_man = lv_man + 1; + if(lv_man[52] == 1) begin + local_expo = local_expo + 1; + end + let fflags = {1'b0,1'b0,1'b0,1'b0,inexact}; + return {fflags,sign,local_expo,lv_man[51:0]}; + endfunction + + `ifdef fpu_hierarchical + (*synthesize*) + `endif + module mkfpu_int_to_dp(Ifc_fpu_int_to_dp); + + //Wrapper3#(Bit#(32), Bit#(1), Bit#(3),Bit#(69)) fcvt_d_wwu <- mkUniqueWrapper3(fcvt_s_w_l); + //Wrapper3#(Bit#(64), Bit#(1), Bit#(3),Bit#(69)) fcvt_d_llu <- mkUniqueWrapper3(fcvt_s_w_l); + + method ActionValue#(Floating_output#(64)) _start(Bit#(64) inp_int, Bit#(1) unsigned_bit, Bit#(1) long, Bit#(3) rounding_mode); + Floating_output#(64) wr_final_out=?; + `ifdef verbose $display($time,"Giving inputs: %h unsigned %b long %b rounding %b", inp_int, unsigned_bit, long, rounding_mode); `endif + if((inp_int == 0 && long==1) || (inp_int[31:0] == 0 && long == 0)) + wr_final_out = Floating_output{ final_result : 64'b0, + fflags : 5'b0 + } ; + else if(long == 0) begin + Bit#(32) inp32 = inp_int[31:0]; + `ifdef verbose $display("inp_int : %b",inp32); `endif + Bool ubit = (unsigned_bit == 1); + Bit#(1) lv_sign = ubit? 0 : inp32[31]; + Bool sbit = (lv_sign==1); + Bit#(10) bias = '1; + Bit#(11) expo = zeroExtend(bias) + 31; + if(sbit) + inp32 = ~inp32+1; + Bit#(5) lv_zeros = truncate(pack(countZerosMSB(inp32))); + inp32 = inp32 << lv_zeros; + expo = expo - zeroExtend(lv_zeros); + Bit#(52) mantissa = zeroExtendLSB(inp32[30:0]); + Bit#(64) res = {lv_sign,expo,mantissa}; + wr_final_out = Floating_output { + final_result : res, + fflags : 0 + }; + end + else begin + `ifdef verbose $display("inp_int : %b",inp_int); `endif + Bool ubit = (unsigned_bit == 1); + Bit#(1) lv_sign = ubit? 0 : inp_int[63]; + Bool sbit = (lv_sign==1); + Bit#(10) bias = '1; + Bit#(11) expo = zeroExtend(bias) + 63; + if(sbit) + inp_int = ~inp_int + 1; + Bit#(6) lv_zeros = truncate(pack(countZerosMSB(inp_int))); + inp_int = inp_int << lv_zeros; + expo = expo - zeroExtend(lv_zeros); + Bit#(69) res = roundFunc({lv_sign,inp_int[62:0]},expo,rounding_mode); + wr_final_out = Floating_output { + final_result : res[63:0], + fflags : res[68:64] + }; + + end + return wr_final_out; + endmethod +endmodule + +//module mkTb(Empty); +// Reg#(Bit#(64)) rg_operand1<-mkReg(64'hffffffffe945c730); +// //Reg#(Bit#(64)) rg_operand1<-mkReg(~(64'hfffffffffffff812)+1); +// Reg#(Bit#(32)) rg_clock<-mkReg(0); +// Ifc_fpu_int_to_dp itod <- mkfpu_int_to_dp(); +// Reg#(Bit#(32)) rg_arbit <-mkReg(0); +// +// rule rl_clk_count; +// rg_clock<=rg_clock+1; +// endrule +// +// rule rl_start_1(rg_clock=='d0); +// `ifdef verbose $display("Giving inputs rg_operand 1 : %h through testbench",rg_operand1,$time); `endif +// itod._start(zeroExtend(rg_operand1),1'b0,1'b0,3'b000); +// endrule +// +// rule rl_display_result; +// let abc = itod.result_(); +// `ifdef verbose $display("Final result= %h", abc.final_result,$time); `endif +// $finish(0); +// endrule +//endmodule +endpackage diff --git a/src/core/fpu/fpu_int_to_sp.bsv b/src/core/fpu/fpu_int_to_sp.bsv new file mode 100644 index 0000000..d3783af --- /dev/null +++ b/src/core/fpu/fpu_int_to_sp.bsv @@ -0,0 +1,152 @@ +/* +Authors : Vinod.G +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ +package fpu_int_to_sp; + +import defined_types ::*; +import UniqueWrappers::*; +`include "defined_parameters.bsv" +interface Ifc_fpu_int_to_sp; + method ActionValue#(Floating_output#(32)) _start(Bit#(64) inp_int, Bit#(1) unsigned_bit, Bit#(1) long, Bit#(3) rounding_mode); +endinterface + + + function Bit#(37) roundFunc(Bit#(n) unrounded, Bit#(8) expo, Bit#(3) rounding_mode) + provisos( + Add#(a__,32,n) + ); + let nInd = valueOf(n); + bit guard = unrounded[nInd-25]; + bit round = unrounded[nInd-26]; + bit sticky = 0; + bit sign = unrounded[nInd-1]; + Bit#(8) local_expo = expo; + Bit#(TSub#(n,26)) sticky_check = unrounded[nInd-27:0]; + if(sticky_check != '0) + sticky = 1; + bit inexact = (guard | round | sticky); + bit lv_roundup = 0; + Bit#(25) lv_man = {2'b0,unrounded[nInd-2:nInd-24]}; + if(rounding_mode == 'b000) + lv_roundup = guard & (unrounded[nInd-24] | round | sticky); + else if (rounding_mode == 'b100) + lv_roundup = guard; //& (round | sticky | ~sign); + else if (rounding_mode == 'b011) + lv_roundup = (guard | round | sticky) & (~sign); + else if (rounding_mode == 'b010) + lv_roundup = (guard | round | sticky) & (sign); + if(lv_roundup == 1) + lv_man = lv_man + 1; + if(lv_man[23] == 1) begin + local_expo = local_expo + 1; + end + let fflags = {1'b0,1'b0,1'b0,1'b0,inexact}; + return {fflags,sign,local_expo,lv_man[22:0]}; + endfunction + + function Bit#(37) fcvt_s_w_l (Bit#(n) inp, Bit#(1) unsigned_bit, Bit#(3) rounding_mode) + provisos( + Add#(a__,32,n), + Log#(n,logN), + Add#(b__,logN,8), + Add#(c__, logN, TLog#(TAdd#(1, n))) + ); + let nInd = valueOf(n); + Bool ubit = (unsigned_bit == 1); + Bit#(1) lv_sign = ubit? 0 : inp[nInd-1]; + Bool sbit = (lv_sign == 1); + Bit#(7) bias = '1; + Bit#(8) expo = zeroExtend(bias) + fromInteger(nInd-1); + if(sbit) + inp = ~inp + 1; + Bit#(logN) lv_zeros = truncate(pack(countZerosMSB(inp))); + inp = inp << lv_zeros; + expo = expo - zeroExtend(pack(lv_zeros)); + Bit#(TSub#(n,1)) inpS = inp[nInd-2:0]; + Bit#(n) inp_temp = {lv_sign,inpS}; + Bit#(37) res = roundFunc(inp_temp, expo, rounding_mode); + return res; + endfunction + + +`ifdef fpu_hierarchical +(*synthesize*) +`endif +module mkfpu_int_to_sp(Ifc_fpu_int_to_sp); + + + Wrapper3#(Bit#(32), Bit#(1), Bit#(3),Bit#(37)) fcvt_s_wwu <- mkUniqueWrapper3(fcvt_s_w_l); + Wrapper3#(Bit#(64), Bit#(1), Bit#(3),Bit#(37)) fcvt_s_llu <- mkUniqueWrapper3(fcvt_s_w_l); + + method ActionValue#(Floating_output#(32)) _start(Bit#(64) inp_int, Bit#(1) unsigned_bit, Bit#(1) long, Bit#(3) rounding_mode); + `ifdef verbose $display($time,"\tGiving inputs: %h unsigned %b long %b rounding %b", inp_int, unsigned_bit, long, rounding_mode); `endif + Floating_output#(32) wr_final_out=?; + if((inp_int == 0 && long==1) || (inp_int[31:0] == 0 && long == 0)) + wr_final_out = Floating_output{ final_result : 32'b0, + fflags : 5'b0 + } ; + else if(long == 0) begin + Bit#(32) inp32 = truncate(inp_int); + `ifdef verbose $display("inp_int : %b",inp32); `endif + Bit#(1) lv_sign = inp32[31]; + if(unsigned_bit == 0) begin + if((inp32 & 'h7fffffff) == 0) begin + Bit#(32) res = lv_sign==1? {1'b1,8'h9e,'0} : '0; + wr_final_out = Floating_output{ + final_result : res, + fflags : 0 + }; + end + else begin + Bit#(37) ressw <- fcvt_s_wwu.func(inp32,unsigned_bit,rounding_mode); + wr_final_out = Floating_output{ + final_result : (ressw[31:0]), + fflags : ressw[36:32] + }; + end + end + else begin + Bit#(37) res <- fcvt_s_wwu.func(inp32,unsigned_bit,rounding_mode); + wr_final_out = Floating_output{ + final_result : (res[31:0]), + fflags : res[36:32] + }; + end + end + else begin + Bit#(37) res <- fcvt_s_llu.func(inp_int,unsigned_bit,rounding_mode); + wr_final_out = Floating_output { + final_result : res[31:0], + fflags : res[36:32] + }; + + end + return wr_final_out; + endmethod +endmodule + +module mkTb(Empty); + Reg#(Bit#(64)) rg_operand1<-mkReg(64'h039e781bab642be4); + //Reg#(Bit#(64)) rg_operand1<-mkReg(~(64'hfffffffffffff812)+1); + Reg#(Bit#(32)) rg_clock<-mkReg(0); + Ifc_fpu_int_to_sp itof <- mkfpu_int_to_sp(); + Reg#(Bit#(32)) rg_arbit <-mkReg(0); + + rule rl_clk_count; + rg_clock<=rg_clock+1; + endrule + + rule rl_start_1(rg_clock=='d0); + `ifdef verbose $display("Giving inputs rg_operand 1 : %h through testbench",rg_operand1,$time); `endif + let abc<-itof._start(zeroExtend(rg_operand1),1'b1,1'b0,3'b000); + `ifdef verbose $display("Final result= %h fflags= %h", abc.final_result, abc.fflags, $time); `endif + $finish(0); + endrule + +endmodule +endpackage diff --git a/src/core/fpu/fpu_sign_injection.bsv b/src/core/fpu/fpu_sign_injection.bsv new file mode 100644 index 0000000..0b949df --- /dev/null +++ b/src/core/fpu/fpu_sign_injection.bsv @@ -0,0 +1,102 @@ +/* +Authors : Vinod.G, Aditya Govardhan +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details + +Description: +This module performs the sign injection on the floating point value taken from the rs1 register. +The different instructions work as follows +FSGNJ : Operation bit - 000, The final result is same as that of operand1 but has the sign of operand 2's sign. +FSGNJN : Operation bit - 001, The final result is same as that of operand 1 but has the opposite sign of operand 2's sign +FSGNJX : Operation bit - 010, The final result is same as that of operand 1 but the sign bit is the exclusive-or of operand 1 and operand 2 +*/ + +package fpu_sign_injection; + +import defined_types::*; +`include "defined_parameters.bsv" + +interface Ifc_fpu_sign_injection#(numeric type fpinp, numeric type fpman, numeric type fpexp); + method ActionValue#(Floating_output#(fpinp)) _start(Bit#(fpinp) operand1, Bit#(fpinp) operand2, Bit#(3) operation); +endinterface + +`ifdef fpu_hierarchical +interface Ifc_fpu_sign_injection32; + method ActionValue#(Floating_output#(32)) _start(Bit#(32) operand1, Bit#(32) operand2, Bit#(3) operation); +endinterface + +interface Ifc_fpu_sign_injection64; + method ActionValue#(Floating_output#(64)) _start(Bit#(64) operand1, Bit#(64) operand2, Bit#(3) operation); +endinterface +`endif + + +module mkfpu_sign_injection(Ifc_fpu_sign_injection#(fpinp,fpman,fpexp)) + provisos ( + Add#(TAdd#(fpexp,fpman),1,fpinp), //Defining fpinp to be fpexp + fpman + 1 + Add#(fpexp,2,fpexp2) + ); + + let fPINP = valueOf(fpinp); + + method ActionValue#(Floating_output#(fpinp)) _start(Bit#(fpinp) operand1, Bit#(fpinp) operand2, Bit#(3) operation); + + if(operation == 3'b000) //FSGNJ + operand1[fPINP-1] = operand2[fPINP-1]; + else if(operation == 3'b001) //FSNGNJN + operand1[fPINP-1] = ~operand2[fPINP-1]; + else //FSGNJX + operand1[fPINP-1] = operand1[fPINP-1]^operand2[fPINP-1]; + + Bit#(5) lv_exception = 0; + + return Floating_output { + final_result : zeroExtend(operand1), + fflags : lv_exception}; + endmethod +endmodule + + +`ifdef fpu_hierarchical + (*synthesize*) + module mkfpu_sign_injection32(Ifc_fpu_sign_injection32); + Ifc_fpu_sign_injection#(32,23,8) uut <- mkfpu_sign_injection(); + method ActionValue#(Floating_output#(32)) _start(Bit#(32) operand1, Bit#(32) operand2, Bit#(3) operation); + let x <- uut._start(operand1,operand2,operation); + return x; + endmethod + endmodule + + (*synthesize*) + module mkfpu_sign_injection64(Ifc_fpu_sign_injection64); + Ifc_fpu_sign_injection#(64,52,11) uut <- mkfpu_sign_injection(); + method ActionValue#(Floating_output#(64)) _start(Bit#(64) operand1, Bit#(64) operand2, Bit#(3) operation); + let x <- uut._start(operand1,operand2,operation); + return x; + endmethod + endmodule +`endif + +// module mkTb_fpu_sign_injection(); +// Ifc_fpu_sign_injection#(32,23,8) inst_fpu_sign_injection <- mkfpu_sign_injection(); +// Reg#(Bit#(32)) rg_clock <- mkReg(0); +// Reg#(Bit#(32)) rg_operand1<-mkReg('hff800000); +// Reg#(Bit#(32)) rg_operand2<-mkReg('hff800000); + +// rule get_input(rg_clock == 0); +// inst_fpu_sign_injection._start(rg_operand1,rg_operand2,'b000); +// rg_clock <= rg_clock + 1; +// endrule + +// rule get_output; +// let lv_result = inst_fpu_sign_injection.result_(); +// `ifdef verbose $display("Result is: %h",lv_result.final_result); `endif +// `ifdef verbose $display("Sign=%b Exponent=%b Mantissa=%b",lv_result.final_result[31],lv_result.final_result[30:23],lv_result.final_result[22:0]); `endif +// $finish(0); +// endrule + + + +// endmodule +endpackage diff --git a/src/core/fpu/fpu_sp_to_int.bsv b/src/core/fpu/fpu_sp_to_int.bsv new file mode 100644 index 0000000..2d20c2a --- /dev/null +++ b/src/core/fpu/fpu_sp_to_int.bsv @@ -0,0 +1,305 @@ +/* +Authors : Vinod.G +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +Description : +TODO +*/ + +package fpu_sp_to_int; +import defined_types::*; +import UniqueWrappers::*; +`include "defined_parameters.bsv" +interface Ifc_fpu_sp_to_int; + method ActionValue#(Floating_output#(`Reg_width)) _start(Bit#(1) sign,Bit#(8) exponent, Bit#(23) mantissa, bit convert_unsigned, bit convert_long, Bit#(3) rounding_mode, Bit#(5) flags); +endinterface + +`ifdef fpu_hierarchical +(*synthesize*) +`endif +module mkfpu_sp_to_int(Ifc_fpu_sp_to_int); + method ActionValue#(Floating_output#(`Reg_width)) _start(Bit#(1) lv_sign,Bit#(8) lv_exponent, Bit#(23) lv_mantissa, bit convert_unsigned, bit convert_long, Bit#(3) rounding_mode, Bit#(5) flags); + bit lv_overflow = 0; + bit lv_zero = flags[3]; + bit lv_infinity = flags[1]; + bit lv_invalid = flags[0] | flags[2]; + bit lv_denormal = flags[4]; + bit lv_manzero = |lv_mantissa; + bit lv_inexact = 0; + Bool to_round = False; + Bool rne = (rounding_mode == 3'b000); + Bool rtz = (rounding_mode == 3'b001); + Bool rdn = (rounding_mode == 3'b010); + Bool rup = (rounding_mode == 3'b011); + Bool rmm = (rounding_mode == 3'b100); + Bit#(8) lv_exp = lv_exponent; + `ifdef verbose $display("sign = %b exponent = %h mantissa = %h zero_flag = %b invalid_flag = %b inifnity: %b denormal %b", lv_sign, lv_exponent, lv_mantissa, lv_zero, lv_invalid, lv_infinity,lv_denormal); `endif + Int#(8) lv_original_exponent = unpack(lv_exp - 127); // removing the bias + `ifdef verbose $display("lv_original_exponent : %d flags: %b",lv_original_exponent,flags);`endif + Bit#(`Reg_width) final_result = 0; + Bit#(TAdd#(23, `Reg_width)) final_man = {'0,1'b1,lv_mantissa}; + if(lv_zero == 1) + final_result = 0; + else if(lv_denormal == 1 || (lv_original_exponent <= -1 && (lv_infinity|lv_invalid) == 0)) begin + if(lv_sign==1 && convert_unsigned==1 && ((lv_original_exponent==-1 && (rmm||(rne && lv_manzero==1))) || (lv_original_exponent<=-1 &&rdn))) + lv_invalid = 1; + else + lv_inexact = 1; + if(lv_sign == 0 && rup) + final_result = 1; + else if(rdn && lv_sign == 1 && convert_unsigned == 0) + final_result = '1; + else if(lv_original_exponent == -1 && (rmm||(rne && lv_manzero == 1)))begin + if(lv_sign == 0) + final_result = 1; + else if(convert_unsigned == 0) + final_result = '1; + else + final_result = 0; + end + else + final_result = 0; + end + else if(convert_long == 0) begin //FCVT.W.S FCVT.WU.S + if(convert_unsigned == 0) begin //FCVT.W.S + Bit#(31) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) begin + final_result = (lv_sign==1) ?(lv_invalid==1? zeroExtend(all_ones) : signExtend(32'h80000000)) : zeroExtend(all_ones); + end + else if(lv_original_exponent < 'd31) begin + final_man = final_man << lv_original_exponent; + Bit#(32) y = final_man[54:23]; + final_result = signExtend(y); + lv_mantissa = final_man[22:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd31) begin + `ifdef verbose $display("Overflow");`endif + // lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = zeroExtend(all_ones); + else begin + if(lv_original_exponent == 'd31 && lv_manzero == 0) + lv_invalid = 0 ; //Since we are exactly representing the number? + final_result = signExtend(32'h80000000); + end + end + end + else begin //FCVT.WU.S + Bit#(32) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ? (lv_invalid==1? signExtend(all_ones) : '0) : signExtend(all_ones); + else if(lv_original_exponent < 'd32) begin + final_man = final_man << lv_original_exponent; + Bit#(32) y = final_man[54:23]; + final_result = signExtend(y); + lv_mantissa = final_man[22:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd32) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = signExtend(all_ones); + else + final_result = '0; + end + end + end + else begin + if(convert_unsigned == 0) begin //FCVT.L.S + Bit#(63) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ?(lv_invalid==1? zeroExtend(all_ones) : signExtend(64'h8000000000000000)) : zeroExtend(all_ones); + else if(lv_original_exponent < 'd63) begin + final_man = final_man << lv_original_exponent; + `ifdef verbose $display("final_man : %b",final_man);`endif + Bit#(64) y = zeroExtend(final_man[86:23]); + final_result = y; + lv_mantissa = final_man[22:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd63) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = zeroExtend(all_ones); + else begin + if(lv_original_exponent == 'd63 && lv_manzero == 0 ) + lv_invalid = 0; //Since we are exactly representing the input number + final_result = signExtend(64'h8000000000000000); + end + end + end + else begin //FCVT.LU.S + Bit#(64) all_ones = '1; + if(lv_infinity == 1 || lv_invalid == 1) + final_result = (lv_sign==1) ? (lv_invalid==1? signExtend(all_ones) : '0) : signExtend(all_ones); + else if(lv_original_exponent < 'd64) begin + final_man = final_man << lv_original_exponent; + Bit#(64) y = zeroExtend(final_man[86:23]); + final_result = y; + lv_mantissa = final_man[22:0]; + to_round = True; + end + else if(lv_original_exponent >= 'd64) begin + `ifdef verbose $display("Overflow");`endif + //lv_overflow = 1; + lv_invalid = 1; + if(lv_sign == 0) + final_result = signExtend(all_ones); + else + final_result = '0; + end + end + + end + + bit lv_guard = lv_mantissa[22]; //MSB of the already shifted mantissa is guard bit + bit lv_round = lv_mantissa[21]; //next bit is round bit + bit lv_sticky = |(lv_mantissa<<2); //remaining bits determine the sticky bit + bit lv_round_up = 0; + bit lv_inexact1 = lv_guard | lv_round | lv_sticky; + if(to_round) begin + if(rounding_mode == 'b000) lv_round_up = lv_guard & (final_result[0] | lv_round | lv_sticky); //Round to nearest ties to even + else if(rmm) lv_round_up = lv_guard; //& (lv_round | lv_sticky | ~lv_sign); //Round to nearest ties to max magnitude + else if(rdn) lv_round_up = lv_inexact1 & (lv_sign); //Round down to -infinity + else if(rup) lv_round_up = lv_inexact1 & (~lv_sign); //Round up to +infinity + lv_inexact = lv_inexact | lv_inexact1; + if(lv_round_up == 1) begin //Should set the overflow flag here right? + lv_invalid = 1; + if(convert_long == 0 && convert_unsigned == 0 && lv_original_exponent == 30 && final_result[30:0] == '1 && lv_sign == 0) //Overflow.. Beyond representable number after rounding + final_result = 64'h7fffffff; + else if(convert_long == 0 && convert_unsigned == 1 && lv_original_exponent == 31 && final_result[31:0] == '1 && lv_sign == 0) + final_result = 64'hffffffffffffffff; + else if(convert_long == 1 && convert_unsigned == 0 && lv_original_exponent == 62 && final_result[62:0] == '1 && lv_sign == 0) //Overflow.. Beyond representable number after rounding + final_result = 64'h7fffffffffffffff; + else if(convert_long == 1 && convert_unsigned == 1 && lv_original_exponent == 63 && final_result[63:0] == '1 && lv_sign == 0) + final_result = 64'hffffffffffffffff; + else begin + lv_invalid = 0; + final_result = final_result + 1; + if(convert_long == 0 && final_result[31]==1) + final_result = signExtend(final_result[31:0]); + end + end + `ifdef verbose $display("rounding_mode == %b",rounding_mode);`endif + `ifdef verbose $display("round_up = %b", lv_round_up);`endif + + if(convert_unsigned == 0 && lv_sign == 1)begin //Negating the output if floating point number is negative and converted to signed word/long + final_result = ~final_result + 1; + if(convert_long == 0 && final_result[31] == 1) + final_result = signExtend(final_result[31:0]); + `ifdef verbose $display("Negating output final_result : %b", final_result);`endif + end + else if(convert_unsigned == 1 && lv_sign == 1) begin + final_result = 0; + lv_invalid = 1; + end + end + if((lv_invalid|lv_infinity) == 1) begin //What about Quiet NaN?? What does the Spec Say? + lv_overflow = 0; + lv_inexact = 0; + end + Bit#(5) fflags={lv_invalid|lv_infinity,1'b0,lv_overflow,1'b0,lv_inexact}; + return Floating_output{ + final_result: final_result, + fflags: fflags}; + + + endmethod +endmodule +module mkTb(Empty); + + function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z); + let s = valueOf(m); + let man1 = tpl_1(x); + let expo1 = tpl_2(x); + let man2 = tpl_1(y); + let expo2 = tpl_2(y); + let man3 = tpl_1(z); + let expo3 = tpl_2(z); + Bit#(5) flags1, flags2,flags3; + Bool expZ1 = (expo1 == 0); + Bool manZ1 = (man1 == 0); + Bool expO1 = (expo1 == '1); + Bool manO1 = (man1 == '1); + Bool topB1 = (man1[s-1] == 1); + Bool expZ2 = (expo2 == 0); + Bool manZ2 = (man2 == 0); + Bool expO2 = (expo2 == '1); + Bool manO2 = (man2 == '1); + Bool topB2 = (man2[s-1] == 1 && man2 !=0); + Bool expZ3 = (expo3 == 0); + Bool manZ3 = (man3 == 0); + Bool expO3 = (expo3 == '1); + Bool manO3 = (man3 == '1); + Bool topB3 = (man3[s-1] == 1 && man3 !=0); + flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN + flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN + return tuple3(flags1,flags2,flags3); + endfunction + + function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let expo = valueOf(e); + let man = valueOf(m); + return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]); + endfunction + + function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3) + provisos(Add#(TAdd#(m,1),e,n), + Add#(7,a__,e) + ); + let inp = valueOf(n); + let man = valueOf(m); + return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]); + endfunction + + function Bool isNaNBox(Bit#(64) op); + return (op[63:32]=='1); + endfunction + + function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3); + return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000); + endfunction +Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp); + Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN); + + + Ifc_fpu_sp_to_int converter <- mkfpu_sp_to_int(); + Reg#(Bit#(32)) state_clock <- mkReg(0); + Reg#(Bit#(32)) wr_operand1 <- mkReg('hbf7f0000); + //Reg#(Bit#(32)) wr_operand1 <- mkReg('hbf214efa); + //Reg#(Bit#(32)) wr_operand1 <- mkReg('h91c82527); + + rule state_clock_count; + state_clock <= state_clock + 1; + if(state_clock == 'd5) $finish; + endrule + + rule give_input(state_clock == 'd1); + // let {op1,op2,op3} <- setCanonicalNaN.func(wr_operand1,'0,'0); + let {man1,man2,man3} <- getMant32.func(wr_operand1, 0,0); + let {exp1,exp2,exp3} <- getExp32.func(wr_operand1, 0,0); + let {flags1,flags2,flags3} <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(0,0)); + let sign1 = wr_operand1[31]; +`ifdef verbose $display("input %b %b %b given at %0d", sign1, exp1, man1, state_clock);`endif + let x <- converter._start(sign1,exp1,man1, 1, 1, 3'b010,flags1); + `ifdef verbose $display("output : %h fflags : %h",x.final_result,x.fflags); `endif + endrule + + + endmodule +endpackage diff --git a/src/core/fpu/fpu_sqrt.bsv b/src/core/fpu/fpu_sqrt.bsv new file mode 100644 index 0000000..773f9f1 --- /dev/null +++ b/src/core/fpu/fpu_sqrt.bsv @@ -0,0 +1,511 @@ +/* +Authors : Vinod.G, Rishi Naidu, Aditya Govardhan +Email : g.vinod1993@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details + +Implementation is based on a IEEE paper Titled: +"Implementation of Single Precision Floating Point Square Root on FPGAs" +Description: +TODO +*/ + +package fpu_sqrt; +`include "defined_parameters.bsv" +import defined_types::*; +import RegFile::*; +import FIFO::*; +import SpecialFIFOs::*; +import ConfigReg::*; +typedef struct{ + Bit#(TMul#(TAdd#(fpman,3),2)) mantissa; //Holds the extended mantissa + Bit#(TAdd#(fpman,3)) result_mantissa; //Holds the Output mantissa + Bit#(TAdd#(fpexp,1)) exponent; + bit sign; //Final sign bit + Bit#(TAdd#(fpman,6)) remainder; //Remainder after eact iteration + Bit#(TAdd#(fpman,3)) root; //Root after each iteration + Bit#(3) rounding_mode; +}Stage_data#(numeric type fpman, numeric type fpexp) deriving(Bits,Eq); //Data structure of interstage FIFO and register + +import DReg::*; + + + +interface Ifc_fpu_sqrt#(numeric type fpinp, numeric type fpman, numeric type fpexp); + //Input Methods + method Action _start(Bit#(1) sign, Bit#(fpman) lv_mantissa, Bit#(fpexp) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags); + + //Output Methods +// method Action deque_buffer(); + method Maybe#(Floating_output#(fpinp)) get_result(); + method Action flush; + +endinterface + +`ifdef fpu_hierarchical +interface Ifc_fpu_sqrt32; + //Input Methods + method Action _start(Bit#(1) sign, Bit#(23) lv_mantissa, Bit#(8) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags); + + //Output Methods +// method Action deque_buffer(); + method Maybe#(Floating_output#(32)) get_result(); + method Action flush; +endinterface + +interface Ifc_fpu_sqrt64; + //Input Methods + method Action _start(Bit#(1) sign, Bit#(52) lv_mantissa, Bit#(11) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags); + //Output Methods +// method Action deque_buffer(); + method Maybe#(Floating_output#(64)) get_result(); + method Action flush; +endinterface +`endif + + + +//(*synthesize*) +module mkfpu_sqrt(Ifc_fpu_sqrt#(fpinp,fpman,fpexp)) + provisos( + Add#(TAdd#(fpman,fpexp),1,fpinp), + Add#(fpman,3,fpman3), + Add#(fpman3,2,fpman5), + Add#(fpman,5,fpman5), + Add#(fpman5,1,fpman6), + Mul#(fpman3,2,ext_fpman), + Add#(fpexp,1,fpexp1), + Log#(TAdd#(1,ext_fpman),ext_fplog), + //per request of bsc + Add#(1, a__, fpexp), + Add#(2, b__, fpman6), + Add#(c__, 1, fpman3), + Add#(d__, ext_fplog, fpexp1), + Add#(e__, 1, b__) + // Add#(d__, 1, b__) + //Add#(a__, TAdd#(1, TAdd#(fpexp, TAdd#(1, TSub#(fpman, 1)))), 64), + //Add#(b__, ext_fplog, fpexp1), + //Add#(1,c__,fpexp), + //Add#(d__,TAdd#(fpman3,1),e__), + //Add#(1,e__,f__), + //Add#(1,f__,ext_fpman), + //Add#(3,g__,fpman6), + //Add#(h__,1,g__), + //Add#(i__,1,fpman3), + //Add#(j__,2,fpman6), + //Add#(k__,1,j__) + ); + + let fPMAN = valueOf(fpman); + let fPMAN3 = valueOf(fpman3); + let fPMAN5 = valueOf(fpman5); + let fPMAN6 = valueOf(fpman6); + let fPEXP = valueOf(fpexp); + let fPINP = valueOf(fpinp); + let eXT = valueOf(ext_fpman); + + Reg#(Maybe#(Floating_output#(fpinp))) ff_final_out <- mkDReg(tagged Invalid); //Final Output FIFO + + ConfigReg#(Stage_data#(fpman,fpexp)) rg_inter_stage <- mkConfigReg(?); //Inter Stage register + ConfigReg#(Bit#(6)) rg_state <-mkConfigReg(0); //State counter of the module + Wire#(Bool) wr_flush <- mkDWire(False); + (*mutually_exclusive = "rl_flush,rl_stage2,rl_inter_stage,rl_final_stage"*) + rule rl_flush(wr_flush); + rg_state <= 0; + endrule + //***********ITERATION :2********************// + rule rl_stage2 (rg_state==1 && !wr_flush); + let lv_remainder = rg_inter_stage.remainder; //Get remainder data from stage1 + Bit#(fpman3) lv_root = rg_inter_stage.root; //Get root value from stage1 + let mantissa = rg_inter_stage.mantissa; //Updated mantissa + let rounding_mode = rg_inter_stage.rounding_mode; + Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa;//Get result value + Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]}; + Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1}; + Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1}; + //Determining remainder + if (lv_remainder[fPMAN5]==1) begin //When r <0 + lv_remainder = lv_remainder_temp + lv_root_temp_1; + end + else begin + lv_remainder = lv_remainder_temp - lv_root_temp_2; + end + + //Determining quotient + if(lv_remainder[fPMAN5]==1'b1) begin //When r <0 + lv_root = {lv_root[fPMAN3-2:0],1'b0}; + end + else begin + lv_root = {lv_root[fPMAN3-2:0],1'b1}; + end + + result_mantissa[0]= lv_root[0]; //Storing the next bit in result_mantissa + mantissa = mantissa <<2; //Shifting mantissa to get next 2 bits + result_mantissa = result_mantissa <<1; //Shifting result_mantissa to make space to store the next bit + rg_state <= rg_state +1; //Incrementing state counter + + `ifdef verbose $display("****************************************State = %d", rg_state); `endif + `ifdef verbose $display("Remainder =%h", lv_remainder);`endif + `ifdef verbose $display("Mantissa = %h",result_mantissa);`endif + + //Storing the required values in register + + rg_inter_stage <= Stage_data{mantissa : mantissa, + result_mantissa : result_mantissa, + root: lv_root , + remainder:lv_remainder, + sign : rg_inter_stage.sign, + exponent : rg_inter_stage.exponent, + rounding_mode : rounding_mode + }; + endrule + + //********************ITERATION : 3 TO 25************** + //RECURSIVE STAGE (saves hardware) + rule rl_inter_stage (rg_state>1 && rg_state < fromInteger(fPMAN3-1) && !wr_flush ); + //Here register is used instead of FIFO as we have to read and write in the same cycle + + let lv_remainder = rg_inter_stage.remainder; //Getting remainder + Bit#(fpman3) lv_root = rg_inter_stage.root; //Getting root value + let mantissa = rg_inter_stage.mantissa; //Getting updated mantissa value + let rounding_mode = rg_inter_stage.rounding_mode; + Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa; //Getting the result bit of the square root + Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]}; + Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1}; + Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1}; + + //Determining the remainder + if (lv_remainder[fPMAN5]==1'b1) begin //When r <0 + lv_remainder = lv_remainder_temp + lv_root_temp_1; + end + else begin + lv_remainder = lv_remainder_temp - lv_root_temp_2; + end + + //Determining quotient + if (lv_remainder[fPMAN5]==1'b1) begin //When r <0 + lv_root = {lv_root[fPMAN3-2:0],1'b0}; + end + else begin + lv_root = {lv_root[fPMAN3-2:0],1'b1}; + end + result_mantissa[0] = lv_root[0]; //Storing the result bit from root + mantissa = mantissa <<2; //Shifting mantissa to get the next 2 bits + result_mantissa = result_mantissa <<1; //Making space for the next bit + rg_state <= rg_state +1; //Incrementing state counter + + `ifdef verbose $display("****************************************State = %d", rg_state);`endif + `ifdef verbose $display("Remainder =%h", lv_remainder);`endif + `ifdef verbose $display("Mantissa = %h",result_mantissa);`endif + + //Storing required values in register for next iteration + rg_inter_stage <= Stage_data { mantissa:mantissa , + result_mantissa : result_mantissa, + root:lv_root , + remainder:lv_remainder, + sign : rg_inter_stage.sign, + exponent: rg_inter_stage.exponent, + rounding_mode : rounding_mode}; + endrule + + //*****************ITERATION :26 ***********************// + rule rl_final_stage (rg_state==fromInteger(fPMAN3-1) && !wr_flush); + let lv_remainder = rg_inter_stage.remainder; //Getting remainder value for iteration + Bit#(fpman3) lv_root = rg_inter_stage.root; //Getting root value for iteration + let mantissa = rg_inter_stage.mantissa; //Getting shifted mantissa value + Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa; //Getting the result bits + let result_exponent = rg_inter_stage.exponent; //Getting the final result exponent value + Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]}; + Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1}; + Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1}; + //Determining the remainder + if (lv_remainder[fPMAN5]==1'b1) begin //When r <0 + lv_remainder = lv_remainder_temp + lv_root_temp_1; + end + else begin + lv_remainder = lv_remainder_temp - lv_root_temp_2; + end + //Determining quotient + if(lv_remainder[fPMAN5]==1) begin //When r <0 + lv_root = {lv_root[fPMAN3-2:0],1'b0}; + end + else begin + lv_root = {lv_root[fPMAN3-2:0],1'b1}; + end + + result_mantissa[0]= lv_root[0]; + Bit#(fpman6) lv_root_rem = {2'b0,lv_root[fPMAN3-1:0],1'b1}; + //**********Restoring the remainder if the remainder<0***********// + //mantissa = mantissa <<2; + if (lv_remainder[fPMAN5] == 1'b1) begin + //lv_remainder = lv_remainder + {3'b0,lv_root[24:0],1'b1}; + lv_remainder = lv_remainder + lv_root_rem; + end + + //********Carrying out the rounding operation**************// + Bit#(3) rounding_mode = rg_inter_stage.rounding_mode; + + bit lv_roundup =0; //Declaring roundup bit + bit lv_guard = result_mantissa[1]; //Setting the guard bit + bit lv_round = result_mantissa[0]; //Setting the round bit + bit lv_sticky = |(lv_remainder); //Setting the sticky bit + bit lv_sign = rg_inter_stage.sign; //Getting sign bit + bit lv_inexact = lv_guard | lv_round | lv_sticky; + if(rounding_mode== 'b000) // round to nearest, ties to even + lv_roundup = lv_guard & (result_mantissa[2] | lv_round | lv_sticky); + else if(rounding_mode == 'b100) // round to nearest, ties to max magnitude + lv_roundup = lv_guard; //& (lv_round | lv_sticky | ~lv_sign); + else if(rounding_mode == 'b011 ) // round up + lv_roundup = lv_inexact & (~lv_sign); + else if(rounding_mode == 'b010) // round down + lv_roundup = lv_inexact & (lv_sign); + + Bit#(TAdd#(fpman3,1)) lv_extended_mantissa = {1'b0,result_mantissa}; + if (lv_roundup==1) begin + lv_extended_mantissa = lv_extended_mantissa + 'd4; //If roundup then add 4 as the LSB for final mantissa is 3rd bit + if (lv_extended_mantissa[fPMAN3]==1) //When mantissa overflows + result_exponent = result_exponent +1; //Increment exponent by 1 + end + + //Here most exceptions are taken care of in first stage, so module doesn't perform all iterations + + `ifdef verbose $display("****************************************State = %d", rg_state);`endif + `ifdef verbose $display("Remainder =%h", lv_remainder);`endif + `ifdef verbose $display("Mantissa = %h",lv_extended_mantissa);`endif + Bit#(fpexp) exp_out = result_exponent[fPEXP-1:0]; + Bit#(fpman) man_out = lv_extended_mantissa[fPMAN+1:2]; + Bit#(fpinp) final_result = {lv_sign, exp_out, man_out}; //Setting the final result + rg_state<=0; + ff_final_out <= tagged Valid Floating_output{ + final_result:final_result, + fflags : {4'b0,lv_inexact} + }; + + endrule + + //START METHOD + //*******************ITERATION :1 *********************************// + method Action _start(Bit#(1) sign, Bit#(fpman) lv_mantissa, Bit#(fpexp) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags) if(rg_state==0); + + bit lv_is_invalid =0; //Invalid Flag + bit signalling_nan = condFlags[0]; + Bit#(fpexp1) exponent = {1'b0, lv_exponent}; //Input exponent + Bit#(ext_fpman) mantissa = '0; + Bit#(TAdd#(fpman3,1)) man4_zeros = '0; + + if(condFlags[4]==1) begin //Subnormal input + exponent = exponent + 1;// a tweak to make exponent -126 since 8'b0000000 represents -127 which is not the real exponent of subnormal numbers + mantissa = {1'b0,1'b0,lv_mantissa,man4_zeros}; + end + else + mantissa = {1'b0,1'b1,lv_mantissa,man4_zeros}; //Extend mantissa to 48 bits as we need 24 bit output mantissa (Each iteartion use 2 bits of the opearand) + + // `ifdef verbose $display("sign = %b exponent = %b mantissa = %b.%b", sign, exponent, mantissa[eXT-1], _operand1[fPMAN-1:0]);`endif + // Int#(9) actual_exponent = unpack(exponent - 'b001111111); + // `ifdef verbose $display("actual_exponent = %0d", actual_exponent);`endif + + /******************subnormal support*********************/ + Bit#(ext_fplog) lv_leading_zeros = pack(countZerosMSB(mantissa)); + mantissa = mantissa << (lv_leading_zeros - 1); + exponent = exponent - (zeroExtend(lv_leading_zeros) - 1); //possibility for a proviso problem + + if (exponent[0]==0) //If the exponent is even + mantissa = mantissa <<1; //Mantissa is left shifted so that Exponent-127 is even + + Bit#(fpman6) lv_remainder = '0; //Declaring local remainder variable + Bit#(fpexp) bias = {1'b0,'1}; + Bit#(fpman3) lv_root = '0; //Declaring local root/quotient variable + Bit#(fpman3) result_mantissa = 0; //Will store the square root answer + + // Bit#(8) result_exponent = (exponent >>1) +'d63 + zeroExtend(exponent[0]); //Calculating the result exponent + Bit#(fpexp1) result_exponent = (exponent >> 1) + (zeroExtend((bias-1)>>1)) + zeroExtend(exponent[0]); //Calculating the result exponent + `ifdef verbose $display("Flags %h lv_mantissa : %h lv_exponent :%h lv_sign : %b",condFlags,lv_mantissa,lv_exponent,sign); `endif + `ifdef verbose $display("Result_exponent %h bias %d exponent >> 1 %h exponent[0] %h",result_exponent,(bias-1) >> 1,exponent >> 1, exponent[0]);`endif + //Determining remainder + if (lv_remainder[fPMAN5]==1) begin //When r <0 + lv_remainder = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]} + {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1}; + end + else begin + lv_remainder = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]} - {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1}; + end + `ifdef verbose $display("lv_remainder: %h",lv_remainder);`endif + + //Determining quotient + if (lv_remainder[fPMAN5]==1) begin //When r <0 + lv_root = {lv_root[fPMAN+1:0],1'b0}; + end + else begin + lv_root = {lv_root[fPMAN+1:0],1'b1}; + end + + result_mantissa[0] = lv_root [0]; //Setting the LSB of the result + mantissa = mantissa << 2; //Shifting the mantissa to get the next 2 bits of next iteration + result_mantissa = result_mantissa << 1; //Shifting the result mantissa to make space for the next bit + + Bit#(1) lv_inf=0; + Bit#(1) lv_inv=0; + Bit#(1) lv_zero=0; + Bit#(fpexp) exp_all_ones = '1; + Bit#(fpexp) exp_all_zeros = '0; + Bit#(fpman) man_all_zeros = '0; + Bit#(fpman) man_all_ones = '1; + Bit#(TSub#(fpman,1)) man1_all_zeros = '0; + + if((condFlags[2] | condFlags[0]) == 1) //operand is NaN + lv_inv=1; + else if(condFlags[1] == 1) // check if operand is infinite + begin + if(sign == 1) // if -inf then result is NaN + lv_inv=1; + else // if +inf then result is +inf + lv_inf=1; + end + else if(condFlags[3] == 1) + lv_zero=1; + + + if (lv_inv == 1 || (sign == 1 && lv_zero == 0)) begin // when the input is NAN or Negative => Invalid flag is raised + ff_final_out <= tagged Valid Floating_output{ final_result:{1'b0, exp_all_ones , {1'b1,man1_all_zeros}}, //Quite Nan + fflags :{signalling_nan | (sign&~condFlags[2]),4'b0}}; + end + else if(lv_inf == 1) begin + ff_final_out <= tagged Valid Floating_output{ final_result:{1'b0, exp_all_ones , man_all_zeros}, //Infinity + fflags : 'd0}; + end + else if (lv_zero == 1) begin + ff_final_out <= tagged Valid Floating_output{ final_result:{sign, exp_all_zeros,man_all_zeros}, //Zeros + fflags : 'd0}; + end + else begin + //State counter incremented only when it does not meet any above exceptional cases + rg_state <= rg_state+1; //Increment the State_counter for next iteration + end + + `ifdef verbose $display("****************************************State = %0d", rg_state);`endif + `ifdef verbose $display("Remainder = %b", lv_remainder);`endif + `ifdef verbose $display("Mantissa = %b",result_mantissa);`endif + + //Storing required data in FIFO stage1 for next iteration + rg_inter_stage <= Stage_data{ mantissa : mantissa, + result_mantissa : result_mantissa, + root : lv_root, + remainder : lv_remainder, + sign : sign, + exponent : result_exponent, + rounding_mode : rounding_mode }; + endmethod + + method Maybe#(Floating_output#(fpinp)) get_result(); + return ff_final_out; + endmethod + + method Action flush; + wr_flush <= True; + endmethod + +endmodule + +`ifdef fpu_hierarchical +(*synthesize*) +module mkfpu_sqrt32(Ifc_fpu_sqrt32); + Ifc_fpu_sqrt#(32,23,8) uut <- mkfpu_sqrt(); + method Action _start(Bit#(1) sign, Bit#(23) lv_mantissa, Bit#(8) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags); + uut._start(sign,lv_mantissa,lv_exponent,rounding_mode,condFlags); + endmethod + //Output Methods +// method Action deque_buffer(); + method Maybe#(Floating_output#(32)) get_result(); + return uut.get_result(); + endmethod + method Action flush; + uut.flush(); + endmethod +endmodule + +(*synthesize*) +module mkfpu_sqrt64(Ifc_fpu_sqrt64); + Ifc_fpu_sqrt#(64,52,11) uut <- mkfpu_sqrt(); + method Action _start(Bit#(1) sign, Bit#(52) lv_mantissa, Bit#(11) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags); + uut._start(sign,lv_mantissa,lv_exponent,rounding_mode,condFlags); + endmethod + //Output Methods +// method Action deque_buffer(); + method Maybe#(Floating_output#(64)) get_result(); + return uut.get_result(); + endmethod + method Action flush; + uut.flush(); + endmethod +endmodule +`endif + +// //*************Test bench******************// +//(*synthesize*) +/*module mkTb_fpu_sqrt(Empty); + + Reg#(Bit#(32)) rg_clock <-mkReg(0); + Reg#(Bit#(32)) rg__operand1ut1 <- mkReg(32'h76af0cb2); + //Reg#(Bit#(64)) rg__operand1ut1 <- mkReg(64'h019000000000000); + + Ifc_fpu_sqrt#(32,23,8) square_root <- mkfpu_sqrt; + + rule rl_clock; + rg_clock<=rg_clock+1; + if(rg_clock=='d60) begin + $finish(0); + end + endrule + + rule give__operand1ut(rg_clock==2); + `ifdef verbose $display("Giving input %h at %0d", rg__operand1ut1, rg_clock,$time);`endif + square_root._start(rg__operand1ut1, 3'b011); + endrule + + rule get_output(square_root.get_result matches tagged Valid .lv_output); + `ifdef verbose $display("taking output at %0d", rg_clock);`endif + `ifdef verbose $display("Output= %h" , lv_output.final_result,$time);`endif + square_root.deque_buffer(); + endrule + +endmodule + */ +/* +module mkTb_fpu_sqrt_2 (Empty); + + RegFile #(Bit #(10), Bit #(36)) input_data <- mkRegFileFullLoad("./testcases/fpgen_testcases/Sqrt_testcases.hex"); + Reg #(Bit #(10)) index <- mkReg(0); + Reg #(Bit #(32)) state_clock <- mkReg(1); + Reg #(Bit #(32)) rg_state <- mkReg(0); + /*****************Module Instantiation******************************/ +// Ifc_fpu_sqrt#(32,23,8) sqrt <- mkfpu_sqrt; + /******************File Creation************************************/ +/* Reg#(int) cnt <- mkReg(0); //File Creation counter + let fh <- mkReg(InvalidFile) ; //File Handler + rule open (cnt == 0 ) ; + File tb_sqrt_output <- $fopen("tb_sqrt_output.hex", "w+"); + fh <= tb_sqrt_output; + cnt <= 1 ; + endrule */ + /*******************input******************************************/ +/* rule take_input_in (rg_state == 0); + sqrt._start(input_data.sub(index)[35:4], input_data.sub(index)[2:0]); + index <= index + 1; + rg_state <= 1; + endrule +*/ + /*******************output*****************************************/ +/* rule display_output (rg_state == 1 &&& sqrt.get_result matches tagged Valid .abc); + $fwrite(fh, "%h\n", abc.final_result[31:0]); + rg_state <= 0; + sqrt.deque_buffer(); + endrule +*/ + /******************end testing*************************************/ +/* rule end_testing (index == 65); + $finish(); + endrule + + +endmodule*/ + +endpackage diff --git a/src/core/fpu/integer_divider.bsv b/src/core/fpu/integer_divider.bsv new file mode 100644 index 0000000..d6957af --- /dev/null +++ b/src/core/fpu/integer_divider.bsv @@ -0,0 +1,186 @@ +/* +Authors : Vinod.G, Arjun Menon, Aditya Govardhan +Email : g.vinod1993@gmail.com, c.arjunmenon@gmail.com +Last Update : 27th November 2017 +See LICENSE for more details +Description: +TODO +*/ + +package integer_divider; +import UniqueWrappers::*; + +interface Ifc_integer_divider#(numeric type fpman4); + method Action _inputs(Bit#(fpman4) _denominator, Bit#(fpman4) _numerator); + method Bit#(TAdd#(TMul#(fpman4,3),2)) result_(); + method Action flush; +endinterface + +//(* synthesize *) +module mkinteger_divider(Ifc_integer_divider#(fpman4)) + provisos( + Add#(TMul#(fpman4,3),2,op_fpman), + Add#(fpman4,2,fpman6), + Add#(fpman4,fpman6,acc_bits) + //per request of bsc + + ); + let fPMAN4 = valueOf(fpman4); + + Reg#(Bit#(op_fpman)) rg_inter_stage <- mkRegU(); + Reg#(Bit#(6)) rg_state <- mkReg(0); + Wire#((Bit#(op_fpman))) wr_final_out <- mkWire; + Wire#(Bool) wr_flush <- mkDWire(False); + function Bit#(op_fpman) fn_divide_step (Bit#(op_fpman) packed_div, Bit#(1) final_stage, Bit#(1) is_even) + provisos( + + // fpman4 = 27 + // fpman6 = 29 + // opfpman = 83 + // acc_btis = 56 + + ); + let fPMAN4 = valueOf(fpman4); + let aCC = valueOf(acc_bits); + let oP_FPMAN = valueOf(op_fpman); + + Bit#(fpman4) all_zeros = '0; + Bit#(fpman4) _divisor = packed_div[oP_FPMAN-1:aCC]; + Bit#(fpman6) _remainder = packed_div[aCC-1:fPMAN4]; + Bit#(fpman4) _dividend = packed_div[fPMAN4-1:0]; + Bit#(acc_bits) accumulator = 0; + + for(Integer i = 0 ; i <=1 ; i=i+1) begin + if(final_stage == 0 || (final_stage == 1 && i == 0)) begin + if(_remainder[fPMAN4+1]==1'b0) begin + accumulator = ({_remainder,_dividend}<<1) - {1'b0,_divisor,1'b0,all_zeros} ; + accumulator[0] = 1'b1; + end + else begin + accumulator = ({_remainder,_dividend}<<1) + {1'b0,_divisor,1'b0,all_zeros} ; + accumulator[0] = 1'b0; + end + _remainder = accumulator[aCC-1:fPMAN4]; + _dividend = accumulator[fPMAN4-1:0]; + end + else begin + if(is_even == 0) begin + if(_remainder[fPMAN4+1]==1'b0) begin + accumulator = ({_remainder,_dividend}<<1) - {1'b0,_divisor,1'b0,all_zeros} ; + accumulator[0] = 1'b1; + end + else begin + accumulator = ({_remainder,_dividend}<<1) + {1'b0,_divisor,1'b0,all_zeros} ; + accumulator[0] = 1'b0; + end + _remainder = accumulator[aCC-1:fPMAN4]; + _dividend = accumulator[fPMAN4-1:0]; + end + _dividend = _dividend - (_dividend ^ ('1)); + if(_remainder[fPMAN4+1] == 1'b1) begin + _remainder = _remainder + {1'b0,_divisor,1'b0}; + _dividend = _dividend - 1; + end + end + end + return {_divisor, _remainder, _dividend}; +endfunction + + Wrapper3#(Bit#(op_fpman),Bit#(1),Bit#(1),Bit#(op_fpman)) wfn_divide_step <- mkUniqueWrapper3(fn_divide_step); + + + (*mutually_exclusive = "rl_flush,stage_1,recursive_stage,end_stage"*) + + rule rl_flush(wr_flush); + rg_state <= 0; + endrule + + rule stage_1(rg_state == 1 && !wr_flush); + rg_state <= rg_state + 1; + `ifdef verbose $display("Int Data %h rg_state %d",rg_inter_stage[55:0], rg_state); `endif + let x <- wfn_divide_step.func(rg_inter_stage,0,0); + rg_inter_stage <= x; + endrule + + rule recursive_stage(rg_state > 1 && rg_state <= ((fromInteger(fPMAN4-5)>>1) +1) && !wr_flush ); + rg_state <= rg_state + 1; + `ifdef verbose $display($time,"\t Int Data %h rg_state %d", rg_inter_stage[55:0], rg_state);`endif + let x<- wfn_divide_step.func(rg_inter_stage,0,0); + rg_inter_stage <= x; + endrule + + rule end_stage(rg_state == ((fromInteger(fPMAN4-5)>>1)+2) && !wr_flush); + rg_state <= 0; + `ifdef verbose $display($time,"\t End stage Int Data %h rg_state %d fpman4[0] %d", rg_inter_stage[55:0], rg_state, fromInteger(fPMAN4)[0]);`endif + let x <- wfn_divide_step.func(rg_inter_stage,1,fromInteger(fPMAN4)[0]); + wr_final_out <= x; + endrule + + method Action _inputs(Bit#(fpman4) _denominator, Bit#(fpman4) _numerator)if(rg_state==0); + rg_state <= rg_state + 1; + Bit#(fpman4) man_all_zeros = '0; + Bit#(op_fpman) packed_div = {_denominator,{2'b0,_numerator},man_all_zeros}; + `ifdef verbose $display("Numerator: %h Denominator: %h",_numerator,_denominator);`endif + let x <- wfn_divide_step.func(packed_div,0,0); + rg_inter_stage <= x; + endmethod + + method Bit#(op_fpman) result_(); + return wr_final_out; + endmethod + + method Action flush; + wr_flush <= True; + endmethod + +endmodule + + module mkTb (Empty); + + Reg#(Bit#(32)) rg_clock <-mkReg(0); + Reg#(Bit#(55)) rg_remainder <-mkReg(0); +// Ifc_integer_divider#(27) instance_divider <-mkinteger_divider(); + + Ifc_integer_divider#(27) instance_divider <-mkinteger_divider(); + + rule rl_input1(rg_clock==1); + instance_divider._inputs(27'h6d74e20,27'h68a4e18); // divisor, dividend + endrule + + rule rl_finish; + let temp = instance_divider.result_(); + `ifdef verbose $display("Quotient=%h remainder=%h at %0d",temp[26:0],temp[53:27], rg_clock);`endif + endrule + + rule rl_count_clock ; + rg_clock<=rg_clock+1; + if(rg_clock=='d35) + $finish(0); + endrule + endmodule +// (*synthesize*) +// module mkTb (Empty); +// +// Reg#(Bit#(32)) rg_clock <-mkReg(0); +// +// Ifc_integer_divider#(27) instance_divider <-mkinteger_divider(); +// +// rule rl_count_clock ; +// rg_clock<=rg_clock+1; +// if(rg_clock=='d60) $finish(0); +// endrule +// +// rule rl_input1(rg_clock==1); +// `ifdef verbose $display("giving inputat %0d", rg_clock);`endif +// instance_divider._inputs(27'd40,27'd800); // divisor, dividend +// `ifdef verbose $display("Expected Quotient: %d",27'd800/27'd40);`endif +// endrule +// +// rule rl_finish; +// let temp = instance_divider.result_(); +// `ifdef verbose $display("Quotient=%h remainder=%h at %0d",temp[26:0],temp[53:27], rg_clock);`endif +// endrule +// +// endmodule + +endpackage diff --git a/src/core/fpu/integermultiplier.bsv b/src/core/fpu/integermultiplier.bsv new file mode 100644 index 0000000..b88c2f7 --- /dev/null +++ b/src/core/fpu/integermultiplier.bsv @@ -0,0 +1,83 @@ +/* +Copyright (c) 2013-2016, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Module Name : Sequential Integer Multiplier Unit +Author's Name : Neel Gala, Vinod.G +e-mail id : neelgala@gmail.com, g.vinod1993@gmail.com +Last updated on : 5th August 2016 + +*/ + +package integermultiplier; + import DReg::*; + interface Ifc_integermultiplier#(numeric type regwidth, numeric type loop); + method ActionValue#(Maybe#(Bit#(TMul#(2,regwidth)))) _start(Bit#(regwidth) inp1, Bit#(regwidth) inp2); //_div_name 00 : DIV/REM 01: DIVU/REMU + endinterface + //(*synthesize*) + module mkintegermultiplier(Ifc_integermultiplier#(regwidth,loop)) + provisos ( Add#(regwidth,1,regwidth1), + Add#(regwidth,regwidth,regwidth_twice), + Add#(1,TMul#(2,regwidth),regwidth_twice1), + Add#(1,TLog#(regwidth),regwidth_log1), + //per request of bsc + Add#(regwidth1,regwidth,regwidth_twice1) + ); + + Reg#(Bit#(regwidth_twice1)) partial_prod <-mkReg(0); + Reg#(Bit#(regwidth_log1)) rg_state_counter <-mkDReg(0);//Register for state machine counter + let rEGWIDTH = valueOf(regwidth); + let lOOP = valueOf(loop); + + method ActionValue#(Maybe#(Bit#(regwidth_twice))) _start(Bit#(regwidth) inp1, Bit#(regwidth) inp2); + `ifdef verbose $display("Taken inputs in multiplier. rs1: %h rs2: %h",inp1,inp2); `endif + `ifdef verbose $display("Register State Counter %h", rg_state_counter);`endif + `ifdef verbose $display("partial_prod %h", partial_prod);`endif + if(rg_state_counter==0)begin + partial_prod<=zeroExtend(inp2); + rg_state_counter<=rg_state_counter+1; + return tagged Invalid; + end + else begin + Bit#(regwidth) temp=(partial_prod[lOOP-1:0])*inp1[rEGWIDTH-1:0]; + Bit#(regwidth1) accum=partial_prod[2*rEGWIDTH:rEGWIDTH]+zeroExtend(temp); + Bit#(regwidth) partial_prod_temp = partial_prod[rEGWIDTH-1:0]; + Bit#(regwidth_twice1) temp1 ={accum,partial_prod_temp}>>lOOP; + `ifdef verbose $display("multiplication. Partial :%h Counter: %d",temp1,rg_state_counter);`endif + if(rg_state_counter==(fromInteger(rEGWIDTH)/fromInteger(lOOP)))begin + rg_state_counter<=0; + return tagged Valid temp1[2*rEGWIDTH-1:0]; + end + else begin + partial_prod<=temp1; + rg_state_counter<=rg_state_counter+1; + return tagged Invalid; + end + end + endmethod + endmodule + + module mkTb(Empty); + Ifc_integermultiplier#(8,4) mul <- mkintegermultiplier(); + Reg#(Bit#(8)) inp1 <- mkReg(8'b1100); + Reg#(Bit#(8)) inp2 <- mkReg(8'b1010); + + rule give_inputs; + let x <- mul._start(inp1,inp2); + if(x matches tagged Valid .res) begin + `ifdef verbose $display("Output is %b",res);`endif + $finish(0); + end + endrule + + endmodule +endpackage diff --git a/src/core/iTLB.bsv b/src/core/iTLB.bsv new file mode 100644 index 0000000..5dee7c3 --- /dev/null +++ b/src/core/iTLB.bsv @@ -0,0 +1,307 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package iTLB; +import defined_types::*; +import FIFO::*; +import SpecialFIFOs::*; +import GetPut::*; +import ConfigReg::*; +import MemoryMap:: *; + +`include "defined_parameters.bsv" + +`define TLB_entries 16 + +interface Ifc_TLB#(numeric type data_width, numeric type vaddr, numeric type paddr, numeric type page_size, numeric type asid_width); + method Action get_vaddr(Bit#(data_width) addr); + method ActionValue#(From_TLB#(data_width)) send_ppn; + //method Bit#(vaddr) send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,asid_width)) asid); + interface Get#(Request_PPN_PTW#(vaddr,page_size)) to_PTW; + interface Put#(Tuple2#(Bool,To_TLB#(paddr,page_size,asid_width))) refill_TLB; + method Action fence_TLB(Fence_VMA_type#(vaddr) rsdata); + //method ActionValue#(Bool) page_fault; + //method Action page_fault_frm_PTW; +endinterface + +module mkTLB(Ifc_TLB#(data_width,vaddr,paddr,page_size,asid_width)) +provisos( Add#(vpn, page_size, vaddr), + Mul#(8, num_bytes, data_width), + Log#(num_bytes, byte_addressable_bits), + Add#(vpn_split,byte_addressable_bits, page_size), + Mul#(2,vpn_split,intermediate1), + Mul#(3,vpn_split,intermediate2), + Add#(a_, paddr, data_width), + Add#(b_, vaddr, data_width), + Add#(c_, vpn_split, vpn), + Add#(d_, intermediate1, vpn), + Add#(e_, intermediate2, vpn), + Add#(ppn, page_size, paddr)); + + let v_vaddr = valueOf(vaddr); + let v_vpn = valueOf(vpn); + let v_ppn = valueOf(ppn); + let v_page_offset = valueOf(page_size); + let v_asid_width = valueOf(asid_width); + let v_vpn_split = valueOf(vpn_split); + let v_intermediate1 = valueOf(intermediate1); + + Reg#(Bit#(vpn)) tlb_vpn[`TLB_entries]; + Reg#(Bit#(ppn)) tlb_ppn[`TLB_entries]; + Reg#(TLB_permissions) tlb_permissions[`TLB_entries]; + Reg#(Bit#(asid_width)) tlb_asid[`TLB_entries]; + Reg#(Bool) tlb_cacheable[`TLB_entries]; + Reg#(Bit#(2)) tlb_levels[`TLB_entries]; + for(Integer i = 0; i < `TLB_entries; i=i+1) begin + tlb_vpn[i] <- mkReg(0); + tlb_ppn[i] <- mkReg(0); + tlb_permissions[i] <- mkReg(TLB_permissions{v:0,r:0,w:0,x:0,u:0,g:0,a:0,d:0}); + tlb_levels[i] <- mkReg(0); + tlb_cacheable[i] <- mkReg(True); + end + FIFO#(Bit#(vpn)) ff_vpn <- mkBypassFIFO(); + FIFO#(Bit#(page_size)) ff_page_offset <- mkBypassFIFO(); + Reg#(Chmod) rg_chmod[2] <- mkCReg(2,Chmod { mprv : 0, mxr : 0, sum : 0, mpp : unpack(0), prv : unpack(0)}); + Reg#(Bool) rg_page_fault[2] <- mkCReg(2,False); + Reg#(Bool) rg_hit[2] <- mkCReg(2,False); + Reg#(Bit#(2)) rg_levels[2] <- mkCReg(2,0); + Reg#(Bool) rg_handling_PTW[2] <- mkCReg(2,False); + Reg#(Bool) rg_tlb_disable <- mkConfigReg(False); + //Reg#(Bool) rg_frm_ptw[2] <- mkCReg(2,False); + Reg#(Bit#(ppn)) rg_ppn[2] <- mkCReg(2,0); + Reg#(Bool) rg_cacheable[2] <- mkCReg(2,True); + Reg#(Bit#(asid_width)) rg_asid[2] <- mkCReg(2,0); + Reg#(Bit#(4)) rg_translation_mode[2] <- mkCReg(2,0); + Reg#(Bit#(TLog#(`TLB_entries))) rg_slot_to_replace <- mkReg(0); + + + rule rl_translation(!rg_handling_PTW[0] && !rg_tlb_disable && !(rg_chmod[1].prv==Machine) + && (rg_translation_mode[1]!=0)); + Bit#(ppn) ppn = 0; + TLB_permissions perm_bits = TLB_permissions{v:0,r:0,w:0,x:0,u:0,g:0,a:0,d:0}; + Bool hit = False; + Bool page_fault = False; + Bool cacheable = False; + Bit#(vpn) vpn_bits = ff_vpn.first; + Bit#(vpn_split) lv_vpn_split= 0; + Bit#(intermediate1) lv_intermediate1 = 0; + Bit#(intermediate2) lv_intermediate2= 0; + Bit#(vpn) mask1 = {'1,lv_vpn_split}; + Bit#(vpn) vpnmask1 = vpn_bits & mask1; + Bit#(vpn) mask2 = {'1,lv_intermediate1}; + Bit#(vpn) vpnmask2 = vpn_bits & mask2; + Bit#(2) pg_levels = 0; + Integer slot = 0; + `ifdef verbose $display($time, "\tThe acquired VPN in iTLB %h", ff_vpn.first); `endif + for(Integer i = 0; i < `TLB_entries; i = i + 1) begin + if((vpn_bits==tlb_vpn[i] && tlb_levels[i]==0 + || ((vpnmask1==(tlb_vpn[i] & mask1)) && tlb_levels[i]==1) + || ((vpnmask2==(tlb_vpn[i] & mask2)) && tlb_levels[i]==2)) + && (rg_asid[1]==tlb_asid[i] || tlb_permissions[i].g==1) && tlb_permissions[i].v==1) begin + ppn = tlb_ppn[i]; + perm_bits = tlb_permissions[i]; + pg_levels = tlb_levels[i]; + hit = True; + slot = i; + cacheable = tlb_cacheable[i]; + end + end + rg_levels[0] <= pg_levels; + if(hit) begin + if(rg_chmod[1].sum==0) begin + if(rg_chmod[1].mprv==1) begin + if(rg_chmod[1].mpp==unpack(1) && perm_bits.u==1) begin + page_fault=True; + end + end + else if(rg_chmod[1].prv==unpack(1) && perm_bits.u==1) begin + page_fault=True; + end + end + else begin + if(perm_bits.x!=1) + page_fault=True; + end + rg_ppn[0] <= ppn; + rg_cacheable[0] <= cacheable; + `ifdef verbose $display($time, "\t hit in iTLB"); `endif + end + else begin + rg_handling_PTW[0] <= True; + `ifdef verbose $display($time, "\t iTLB: miss"); `endif + end + rg_page_fault[0]<=page_fault; + if(!page_fault) + rg_hit[0]<=hit; + else begin + perm_bits.v = 0; + tlb_permissions[slot] <= perm_bits; + `ifdef verbose $display($time, "\t page fault in iTLB"); `endif + end + endrule + + method Action get_vaddr(Bit#(data_width) vaddr); + `ifdef verbose $display($time, "\t vpn obtained in TLB"); `endif + ff_vpn.enq(vaddr[v_vaddr-1: v_page_offset]); + ff_page_offset.enq(vaddr[v_page_offset-1:0]); + endmethod + + method ActionValue#(From_TLB#(data_width)) send_ppn if(rg_hit[1] || rg_tlb_disable + || (rg_chmod[1].prv==Machine) || rg_page_fault[1] || (rg_translation_mode[1]==0)); + Trap_type e = tagged None; + Bit#(data_width) final_address; + Bit#(ppn) p_ppn = 0; + if(rg_levels[1]==0) begin + p_ppn = rg_ppn[1]; + end + else if(rg_levels[1]==1) begin + Bit#(TSub#(ppn,vpn_split)) lv_ppn_split = rg_ppn[1][v_ppn-1:v_vpn_split]; + Bit#(vpn_split) lv_vpn_split = ff_vpn.first[v_vpn_split-1:0]; + p_ppn = {lv_ppn_split,lv_vpn_split}; + end + else if(rg_levels[1]==2) begin + Bit#(TSub#(ppn,intermediate1)) lv_ppn_split = rg_ppn[1][v_ppn-1:v_intermediate1]; + Bit#(intermediate1) lv_vpn_split = ff_vpn.first[v_intermediate1-1:0]; + p_ppn = {lv_ppn_split,lv_vpn_split}; + end + if(rg_hit[1]) begin + rg_hit[1] <= False; + Bit#(paddr) paddress = {p_ppn,ff_page_offset.first()}; + final_address = zeroExtend(paddress); + //rg_frm_ptw[1] <= False; + end + else if(rg_page_fault[1]) begin + `ifdef verbose $display($time, "\t Instruction Page Fault"); `endif + e = tagged Exception Inst_pagefault; + Bit#(vaddr) paddress = {ff_vpn.first(),ff_page_offset.first()}; + final_address = zeroExtend(paddress); + rg_page_fault[1] <= False; + //rg_frm_ptw[1] <= False; + end + else begin + `ifdef verbose $display($time, "\t Bypass iTLB"); `endif + Bit#(vaddr) paddress = {ff_vpn.first(),ff_page_offset.first()}; + final_address = zeroExtend(paddress); + end + ff_page_offset.deq; + ff_vpn.deq; + return From_TLB{exception : e, address : final_address, cacheable : rg_cacheable[1]}; + endmethod + + //method Bit#(vaddr) send_vaddress_for_cache_index if(rg_frm_ptw[0]); + // return {ff_vpn.first, ff_page_offset.first}; + //endmethod + + + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,asid_width)) asid); + rg_tlb_disable <= unpack(tlb_disable); + rg_asid[0] <= asid[v_asid_width-1:0]; + rg_translation_mode[0] <= asid[v_asid_width+3:v_asid_width]; + rg_chmod[0] <= per_bits; + `ifdef verbose $display($time, "\t ITLB: mprv %b mxr %b sum %b mpp %b prv %b", per_bits.mprv, per_bits.mxr, per_bits.sum, pack(per_bits.mpp), pack(per_bits.prv)); `endif + endmethod + + interface to_PTW = interface Get + method ActionValue#(Request_PPN_PTW#(vaddr,page_size)) get if(rg_handling_PTW[1] && !rg_page_fault[1]); + return Request_PPN_PTW{ vpn : ff_vpn.first(), page_type : Execution}; + endmethod + endinterface; + + interface refill_TLB = interface Put + method Action put(Tuple2#(Bool, To_TLB#(paddr,page_size,asid_width)) tlb_fill) if(rg_handling_PTW[0]); + let {x,tlb_structure} = tlb_fill; + rg_page_fault[0] <= x; + Bit#(paddr) paddress= {tlb_structure.ppn,ff_page_offset.first}; + Bit#(data_width) new_address = zeroExtend(paddress); + Bool cacheable = True; //!is_IO_Addr(new_address); + if(!x) begin + rg_slot_to_replace <= rg_slot_to_replace + 1; + tlb_vpn[rg_slot_to_replace] <= ff_vpn.first(); + tlb_ppn[rg_slot_to_replace] <= tlb_structure.ppn; + tlb_permissions[rg_slot_to_replace] <= tlb_structure.tlb_perm; + tlb_levels[rg_slot_to_replace] <= tlb_structure.levels; + tlb_asid[rg_slot_to_replace] <= tlb_structure.asid; + tlb_cacheable[rg_slot_to_replace] <= cacheable; + end + rg_handling_PTW[0] <= False; + //rg_frm_ptw[0] <= True; + `ifdef verbose $display($time, "\t Filling TLB in slot %d with vpn %h with page levels i %d", rg_slot_to_replace, ff_vpn.first(), tlb_structure.levels); `endif + endmethod + endinterface; + + method Action fence_TLB(Fence_VMA_type#(vaddr) rsdata); + Bool flush_address = False; + Bool flush_address_space = False; + Bit#(vpn_split) lv_vpn_split= 0; + Bit#(intermediate1) lv_intermediate1 = 0; + Bit#(intermediate2) lv_intermediate2= 0; + Bit#(vpn) mask1 = {'1,lv_vpn_split}; + Bit#(vpn) vpnmask1 = rsdata.rs1[v_vaddr-1:v_page_offset] & mask1; + Bit#(vpn) mask2 = {'1,lv_intermediate1}; + Bit#(vpn) vpnmask2 = rsdata.rs1[v_vaddr-1:v_page_offset] & mask2; + if(rsdata.rs1!=0) begin + flush_address = True; + `ifdef verbose $display($time, "\t iTLB address flush %h", rsdata.rs1); `endif + end + if(rsdata.rs2!=0) begin + flush_address_space = True; + `ifdef verbose $display($time, "\t iTLB address space flush %h", rsdata.rs2); `endif + end + for(Integer i = 0; i < `TLB_entries; i = i+1) begin + if(((flush_address && ((rsdata.rs1[v_vaddr-1:v_page_offset] == tlb_vpn[i] && tlb_levels[i]==0) + || (vpnmask1 == (tlb_vpn[i] & mask1) && tlb_levels[i]==1) + || (vpnmask2 == (tlb_vpn[i] & mask2) && tlb_levels[i]==2))) + || (flush_address_space && rsdata.rs2[v_asid_width-1:0] == tlb_asid[i])) + || (!flush_address && !flush_address_space)) begin + `ifdef verbose $display($time, "\t iTLB entry %d with vpn %h removed",i, tlb_vpn[i]); `endif + tlb_permissions[i] <= TLB_permissions{v : 0, r : 0, w : 0, x : 0, u : 0, g : 0, a : 0, d : 0}; + end + end + endmethod + +endmodule + +interface Ifc_iTLB; + method Action get_vaddr(Bit#(`ADDR) addr); + method ActionValue#(From_TLB#(`ADDR)) send_ppn; + //method Bit#(`VADDR) send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + interface Get#(Request_PPN_PTW#(`VADDR,`OFFSET)) to_PTW; + interface Put#(Tuple2#(Bool,To_TLB#(`PADDR,`OFFSET,`ASID))) refill_TLB; + method Action fence_TLB(Fence_VMA_type#(`VADDR) rsdata); + //method ActionValue#(Bool) page_fault; + //method Action page_fault_frm_PTW; +endinterface + +(*synthesize*) +module mkiTLB(Ifc_iTLB); + +Ifc_TLB#(`ADDR,`VADDR,`PADDR,`OFFSET,`ASID) itlb <- mkTLB(); + method Action get_vaddr(Bit#(`ADDR) addr); + itlb.get_vaddr(addr); + endmethod + method ActionValue#(From_TLB#(`ADDR)) send_ppn = itlb.send_ppn; + //method Bit#(`VADDR) send_vaddress_for_cache_index = itlb.send_vaddress_for_cache_index; + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + itlb.translation_protection_frm_csr(tlb_disable,per_bits,asid); + endmethod + interface to_PTW = itlb.to_PTW; + interface refill_TLB = itlb.refill_TLB; + method Action fence_TLB(Fence_VMA_type#(`VADDR) rsdata); + itlb.fence_TLB(rsdata); + endmethod + //method ActionValue#(Bool) page_fault = itlb.page_fault; + //method Action page_fault_frm_PTW = itlb.page_fault_frm_PTW; +endmodule +endpackage diff --git a/src/core/icache.bsv b/src/core/icache.bsv new file mode 100644 index 0000000..edb512e --- /dev/null +++ b/src/core/icache.bsv @@ -0,0 +1,484 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package icache; + /*===== Pacakge imports ===== */ + import BRAMCore::*; + import FIFO::*; + import FIFOF::*; + import SpecialFIFOs::*; + import LFSR::*; + import ConfigReg::*; + import DReg::*; + import BUtils::*; + import MemoryMap::*; + import mem_config1::*; + /*===== project imports==== */ + import QuadMem::*; + import Assert::*; + import defined_types::*; + `include "defined_parameters.bsv" + /*========================= */ + + + interface Ifc_icache; + method Action virtual_address(Bit#(`VADDR) vaddress,Bool fence); + method Maybe#(Tuple3#(Bit#(32), Trap_type, Bit#(`PERFMONITORS))) response_to_core; + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + method Action stall_fetch(Bool stall); + + `ifdef MMU + method Action physical_address(Bit#(`PADDR) paddr, Trap_type ex); + `endif + + method Bit#(`PERFMONITORS) icache_perfmon; + `ifdef prefetch + method ActionValue#(Bit#(`VADDR)) prefetch(); + `endif + endinterface + + typedef enum {Idle,KeepPolling,Stall,ReadingCache,Fence,IOReadResp} IcacheState deriving (Bits,Eq,FShow); + + (*synthesize*) + (*preempts="read_from_lbdata_into_hold_reg,keep_polling_on_stall"*) + (*preempts="virtual_address,read_from_lbdata_into_hold_reg"*) + (*preempts="read_data_fromcache,read_from_lbdata_into_hold_reg"*) + module mkicache(Ifc_icache); + /* VAddr = [tag_bits|set_bits|word_bits|byte_bits] */ + let byte_bits=valueOf(TLog#(`ICACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2 + let word_bits=valueOf(TLog#(`ICACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4 + let set_bits=valueOf(TLog#(`ICACHE_SETS)); // number of bits to select a set from the cache. = + + Ifc_dcache_data data [`ICACHE_WAYS]; + Ifc_dcache_tag tag [`ICACHE_WAYS]; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + tag[i] <- mkdcache_tag; + data[i] <-mkdcache_data; + end + + LFSR#(Bit#(2)) random_line<-mkRCounter(3); // for random line replacement + Reg#(Bit#(`VADDR)) rg_vaddress<-mkReg(0); + Reg#(Bit#(`PADDR)) rg_paddress<-mkReg(0); + Reg#(Trap_type) rg_tlb_exception[2]<-mkCReg(2,tagged None); + Reg#(Bool) rg_trnslte_done[2] <- mkCReg(2, `ifdef MMU False `else True `endif ); + Reg#(Bool) rg_stall_fetch <- mkReg(False); + + Reg#(Bit#(`PERFMONITORS)) rg_perf_monitor<-mkReg(0); + Reg#(IcacheState) rg_state[3]<-mkCReg(3,Fence); // this needs to be a CReg so that request can fire in the same cycle as response + Reg#(Bit#(TAdd#(1,TLog#(`ICACHE_SETS)))) rg_index <-mkReg(0); + Reg#(Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE))) rg_we<-mkReg(0); + Reg#(Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE))) line_bytes_written<-mkReg(0); + Reg#(Bool) increment_counters <-mkReg(True); + Reg#(Bool) capture_counters <-mkDReg(False); + + Wire#(Maybe#(Bit#(`VADDR))) wr_memoperation_address <-mkDWire(tagged Invalid); + + Reg#(Bool) ignore_memory_response<-mkReg(False); + `ifdef prefetch + Reg#(Bool) prefetchmode<-mkReg(False); + Reg#(Maybe#(Bit#(`VADDR))) rg_prefetchpc<-mkReg(tagged Invalid); + `endif + + Ifc_QuadMem lbdata <-mkQuadMem; + Wire#(Maybe#(Tuple3#(Bit#(32), Trap_type,Bit#(`PERFMONITORS)))) wr_response_to_cpu<-mkDWire(tagged Invalid); + FIFOF#(To_Memory#(`PADDR)) ff_request_to_memory <-mkSizedBypassFIFOF(1); + FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_response_from_memory <-mkSizedBypassFIFOF(1); + FIFOF#(Tuple4#(Bit#(`PADDR),Bit#(`VADDR),Bit#(TLog#(`ICACHE_WAYS)),Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)))) memoperation <-mkUGSizedFIFOF(2); + + Wire#(Maybe#(Bit#(TLog#(`ICACHE_SETS)))) wr_tag_read_index <- mkDWire(tagged Invalid); + Reg#(Maybe#(Bit#(TLog#(`ICACHE_SETS)))) wr_tag_write_index <- mkDReg(tagged Invalid); + + Wire#(Maybe#(Bit#(TLog#(`ICACHE_SETS)))) wr_data_read_index <- mkDWire(tagged Invalid); + Reg#(Maybe#(Bit#(TLog#(`ICACHE_SETS)))) wr_data_write_index <- mkDReg(tagged Invalid); + + + rule display_state; + `ifdef verbose $display($time,"\tICACHE: State: ",fshow(rg_state[2])); `endif + `ifdef verbose $display($time,"\tICACHE: translation done %h tlbexception: ", rg_trnslte_done[1], fshow(rg_tlb_exception[1])); `endif + endrule + + /*====== Invalidate all the entries in the cache on startup or during Fence ==== */ + rule fencing_the_cache(rg_state[0]==Fence && !memoperation.notEmpty); + rg_we<=0; + `ifdef verbose $display($time,"\tFencing icache of index %d", rg_index); `endif + if(rg_index==fromInteger(`ICACHE_SETS)) begin + if(!rg_stall_fetch) begin + `ifdef verbose $display($time,"\tFencing icache of is over"); `endif + rg_state[0]<=Idle; + rg_index<=0; + random_line.seed('d3); + end + end + else begin + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + tag[i].write_request(True,truncate(rg_index),0); + end + rg_index<=rg_index+1; + end + endrule + /*=============================================================================== */ + rule read_data_fromcache(rg_state[0]==ReadingCache && memoperation.notFull); + /*========== Check for hit or miss =================== */ + Bit#(TLog#(`ICACHE_WAYS)) linenum=0; + Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor; + Bit#(TMul#(TMul#(`ICACHE_BLOCK_SIZE,`ICACHE_WORD_SIZE),8)) dataline=0; + Bit#(TMul#(TMul#(`ICACHE_BLOCK_SIZE,`ICACHE_WORD_SIZE),8)) dataline_lb=0; + increment_counters<=True; + Bool hit=False; + Bool lbhit=False; + Bit#(`ICACHE_WAYS) valid_values=0; // hold the valid and dirty bits + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) byteoffset=rg_vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TLog#(`ICACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef MMU + Bit#(`ICACHE_TAG_BITS) cpu_tag=rg_paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `else + Bit#(`ICACHE_TAG_BITS) cpu_tag=rg_vaddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `endif + if(rg_trnslte_done[0]) begin + `ifdef MMU + if(rg_tlb_exception[0] matches tagged None)begin + if(!is_IO_Addr(rg_paddress))begin + `else + if(!is_IO_Addr(truncate(rg_vaddress)))begin + `endif + `ifdef prefetch + if(!prefetchmode && increment_counters) + perf_monitor[`ICACHE_CACHEABLE]=1; // cacheable access increment + `else + perf_monitor[`ICACHE_CACHEABLE]=1; // cacheable access increment + `endif + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + let stored_tag=tag[i].read_response[19:0]; + let stored_valid=tag[i].read_response[20]; + valid_values[i]=tag[i].read_response[20]; + + if(stored_valid==1 && stored_tag==cpu_tag)begin // if a tag matches capture the tag and data + hit=True; + linenum=fromInteger(i); + dataline=data[i].read_response; + `ifdef verbose $display($time,"ICACHE: DATALINE: %h",dataline); `endif + end + end + //wr_tag_read_index <= tagged Valid setindex; + //wr_data_read_index <= tagged Valid setindex; + Bit#(32) data_value=(dataline>>{5'd0,byteoffset}*32)[31:0]; + + let linebuffer=lbdata.response_portA; + let {lb_paddress,lb_vaddress,lbreplaceblock,lbwriteenable}=memoperation.first; + Bit#(`ICACHE_TAG_BITS) lbtag=lb_paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + Bit#(TLog#(`ICACHE_SETS)) lbset=lb_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + if(memoperation.notEmpty && lbset==setindex && lbtag==cpu_tag)begin + dataline_lb=linebuffer; + `ifdef verbose $display($time,"\tICACHE: LB BUFFER HIT: data %h",dataline_lb); `endif + lbhit=True; + hit=False; + end + Bit#(32) data_value_lb=(dataline_lb>>{5'd0,byteoffset}*32)[31:0]; + + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) requested_word=('hF<<({2'd0,byteoffset}*4)); + Bool polling_required= (line_bytes_written & requested_word) != requested_word; + `ifdef verbose $display($time,"\tICACHE: DATAVALUE: %h DATAVALUELB: %h requested_word: %h line_bytes_written: %h",data_value,data_value_lb,requested_word, line_bytes_written); `endif + /*====================================================== */ + /*=========== Respond to Core ============================ */ + if(rg_vaddress[1:0]!=0)begin // miss-aligned error. + perf_monitor[`ICACHE_MISALIGNED]=1; // cache mis-aligned error. + wr_response_to_cpu<=tagged Valid (tuple3(0,tagged Exception Inst_addr_misaligned,perf_monitor)); + rg_perf_monitor<=0; + rg_state[0]<=Idle; + `ifdef prefetch prefetchmode<=False; `endif + `ifdef MMU rg_trnslte_done[0] <= False; `endif + end + else if(hit || (lbhit&&!polling_required))begin // if there has been a hit. + if(lbhit ) + data_value = data_value_lb; + `ifdef verbose $display($time,"\tICACHE: Hit for address : %h data: %h offset: %h line: %d hit: %b lbhit: %b, polling_required: %b",rg_vaddress,data_value,byteoffset,linenum,hit,lbhit, polling_required); `endif + `ifdef prefetch + rg_prefetchpc<=tagged Invalid; + if(!prefetchmode)begin + wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor)); + rg_perf_monitor<=0; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + end + else + prefetchmode<=False; + `else + wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor)); + rg_perf_monitor<=0; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + `endif + rg_state[0]<=Idle; + end + else if(lbhit && polling_required)begin + rg_state[0]<=KeepPolling; + end + /*====================================================== */ + /*==== Request to memory =============================== */ + else begin // miss + `ifdef prefetch + if(!prefetchmode) begin + if(rg_vaddress[11:5]!='1)begin // check that prefetch does not cross physical page boundary + Bit#(`VADDR) mask='1<<(byte_bits+word_bits); + rg_prefetchpc<=tagged Valid ((rg_vaddress&mask)+('d1<<(word_bits+byte_bits))); + perf_monitor[`ICACHE_MISS]=1; // cache miss increment. + end + rg_state[0]<=KeepPolling; + end + else // in prefetch mode send memory request and leave + rg_state[0]<=Idle; + `else + perf_monitor[`ICACHE_MISS]=1; // cache miss increment. + rg_state[0]<=KeepPolling; + `endif + Bit#(TLog#(`ICACHE_WAYS)) replaceblock; + if(valid_values=='1)begin // if all the lines are valid and no match then replace line + perf_monitor[`ICACHE_LINEREPLACE]=1; // cache line replacement increment. + replaceblock=truncate(random_line.value); + random_line.next; + `ifdef prefetch + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: Prefetch Miss of address: %h Replacing line: %d valid: %b",rg_vaddress,random_line.value[1:0],valid_values); `endif + perf_monitor[`ICACHE_PREFETCHMISS]=1; + end + `endif + `ifdef verbose else + $display($time,"\tICACHE: Miss of address: %h Replacing line: %d valid: %b",rg_vaddress,random_line.value[1:0],valid_values); `endif + end + else begin // find the line which is not valid and fill it + let x=countZerosLSB(valid_values)-1; + replaceblock=pack(truncate(x)); + `ifdef prefetch + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: Prefetch Miss of address: %h Filling line: %d",rg_vaddress,x); `endif + perf_monitor[`ICACHE_PREFETCHMISS]=1; + end + `endif + `ifdef verbose else + $display($time,"\tICACHE: Miss of address: %h Filling line: %d",rg_vaddress,x); `endif + end + `ifdef MMU + ff_request_to_memory.enq(To_Memory {address:truncate(rg_paddress&'hfffffff8),burst_length:fromInteger(`ICACHE_BLOCK_SIZE/2),ld_st:Load, transfer_size:3}); + `else + ff_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress&'hfffffff8),burst_length:fromInteger(`ICACHE_BLOCK_SIZE/2),ld_st:Load, transfer_size:3}); + `endif + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) writeenable='hFF; + writeenable=writeenable<<{3'b0,val1}*4; + memoperation.enq(tuple4(rg_paddress,rg_vaddress,replaceblock,writeenable)); + `ifdef verbose $display($time,"\tICACHE: mask: %h byteoffset: %h perfmonitors: %h",writeenable,val1,perf_monitor); `endif + rg_perf_monitor<=perf_monitor; + end + end + else begin + `ifdef prefetch + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: Do not prefetch in IO space"); `endif + rg_state[0]<=Idle; + prefetchmode<=False; + end + else `endif + begin + + `ifdef MMU + ff_request_to_memory.enq(To_Memory {address:truncate(rg_paddress),burst_length:1,ld_st:Load, transfer_size:2}); + `else + ff_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress),burst_length:1,ld_st:Load, transfer_size:2}); + `endif + rg_state[0]<=IOReadResp; + `ifdef verbose $display($time,"\tICACHE: Sending Address for IO ACCESS: %h",rg_paddress); `endif + end + end + end + else begin + `ifdef prefetch + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: do not respond if Prefetch generated a exception"); `endif + prefetchmode<=False; + end + else `endif + begin + wr_response_to_cpu<=tagged Valid tuple3(0,rg_tlb_exception[0],perf_monitor); + `ifdef verbose $display($time,"\tICACHE: TLB Exception "); `endif + end + rg_state[0]<=Idle; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + rg_tlb_exception[0]<=tagged None; + end + /*===================================================================*/ + end + else begin + `ifdef verbose $display($time,"\tICACHE: Translated Address not Available"); `endif + rg_state[0] <= KeepPolling; + end + endrule + /*======= filling up the cache from the data recieved from the external memory ======= */ + rule read_IO_response(rg_state[0]==IOReadResp && !memoperation.notEmpty); + let memresp=ff_response_from_memory.first; + ff_response_from_memory.deq; + `ifdef verbose $display($time,"\tICACHE: Got response from IO ADDRESS: %h",memresp.data_line); `endif + wr_response_to_cpu<=tagged Valid (tuple3(truncate(memresp.data_line),memresp.bus_error==1?tagged Exception Inst_access_fault:tagged None,1)); + rg_state[0]<=Idle; + endrule + rule read_from_lbdata_into_hold_reg(line_bytes_written=='1 && memoperation.notEmpty); + let lb_hold_reg=lbdata.response_portB; + let {paddress,vaddress,replaceblock,writeenable}=memoperation.first; + Bit#(`ICACHE_TAG_BITS) cpu_tag=paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + Bit#(4) lbreplaceblock=0; + case (replaceblock) + 'd0:lbreplaceblock='b0001; + 'd1:lbreplaceblock='b0010; + 'd2:lbreplaceblock='b0100; + 'd3:lbreplaceblock='b1000; + endcase + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + tag[i].write_request((unpack(lbreplaceblock[i])&&True),setindex,{2'b1,cpu_tag}); + data[i].write_request(duplicate(lbreplaceblock[i]),setindex,lb_hold_reg); + end + line_bytes_written<=0; + memoperation.deq; + `ifdef verbose $display($time,"\tICACHE: capturing lbdata cpu_tag: %h setindex: %d linenum: %b data: %h",cpu_tag, setindex,lbreplaceblock,lb_hold_reg); `endif + if(rg_state[1]==KeepPolling) + rg_state[1]<=Stall; + endrule + + rule fillcache(memoperation.notEmpty && line_bytes_written!='1); + let memresp=ff_response_from_memory.first; + ff_response_from_memory.deq; + let {paddress,vaddress,replaceblock,writeenable}=memoperation.first; + let cpu_tag=paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Response from Memory: %h writeenable: %h",memresp.data_line, writeenable); `endif + let we=writeenable; + if(|line_bytes_written!=0)begin + we=rg_we; + end + Bit#(TMul#(2,TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8; + lbdata.write_portB(we,duplicate(memresp.data_line)); + `ifdef verbose $display($time,"\tICACHE: linebytes: %h currently writing into: %h",line_bytes_written,we); `endif + if(memresp.last_word)begin // if all the data words have been fetched exit + `ifdef verbose $display($time,"\tICACHE: Received Last response from Memory"); `endif + end + `ifdef prefetch + prefetchmode<=False; + `endif + rg_we<=(extended_mask[2*`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE-1:`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE]|extended_mask[`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE-1:0]); + line_bytes_written<=line_bytes_written|we; + endrule + /*===================================================================================== */ + rule stall_the_next_request_by_one_cycle(rg_state[0]==Stall); + Bit#(TLog#(`ICACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + rg_state[0]<=ReadingCache; + endrule + /*===================================================================================== */ + rule keep_polling_on_stall(rg_state[1]==KeepPolling); + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) byteoffset=rg_vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) requested_word=('hF<<({2'd0,byteoffset}*4)); + Bit#(`PERFMONITORS) perf_monitor=0; + if(capture_counters)begin + perf_monitor[`ICACHE_CACHEABLE]=1; + perf_monitor[`ICACHE_MISS]=1; + rg_perf_monitor<=perf_monitor; + end + let {lb_paddress,lb_vaddress,replaceblock,writeenable}=memoperation.first; + Bit#(TLog#(`ICACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20]; + Bit#(`ICACHE_TAG_BITS) lbtag=lb_paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + Bit#(TLog#(`ICACHE_SETS)) lbset=lb_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + Bool generate_request=True; + `ifdef verbose $display($time,"\tICACHE: line_bytes_written: %h requested_word: %h memoperation: %b ",line_bytes_written,requested_word,memoperation.notEmpty); `endif + if(lbset==setindex && lbtag==cpu_tag && memoperation.notEmpty) + if((line_bytes_written & requested_word) != requested_word) + generate_request=False; + if(rg_trnslte_done[1] && generate_request)begin + if(rg_tlb_exception[1] matches tagged None)begin + begin + `ifdef verbose $display($time,"\tICACHE: Accessing LB"); `endif + rg_state[1]<=ReadingCache; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(setindex); + data[i].read_request(setindex); + end + end + end + else begin + rg_state[1]<=ReadingCache; + end + end + endrule + + /*============= Prediction in burst mode ================================ */ + method Action virtual_address(Bit#(`VADDR) vaddress,Bool fence)if(rg_state[1]==Idle); + if(fence)begin + rg_state[1]<=Fence; + end + else begin + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Request of VAddr: %h set: %d",vaddress, setindex); `endif + rg_vaddress<=vaddress; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(truncate(setindex)); + data[i].read_request(vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]); + end + rg_state[1]<=ReadingCache; + end + endmethod + method Maybe#(Tuple3#(Bit#(32), Trap_type, Bit#(`PERFMONITORS))) response_to_core; + return wr_response_to_cpu; + endmethod + `ifdef MMU + method Action physical_address(Bit#(`PADDR) paddr, Trap_type ex); + `ifdef verbose $display($time,"\tICACHE: Sending physical address %h to icache ",paddr); `endif + rg_paddress<=paddr; + rg_tlb_exception[1]<=ex; + //rg_state[1]<=ReadingCache; + rg_trnslte_done[1] <= True; + endmethod + `endif + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + ff_request_to_memory.deq; + return ff_request_to_memory.first; + endmethod + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + if(!ignore_memory_response) + ff_response_from_memory.enq(resp); + else if(resp.last_word) + ignore_memory_response<=False; + endmethod + method Bit#(`PERFMONITORS) icache_perfmon; + return rg_perf_monitor; + endmethod + method Action stall_fetch(Bool stall); + rg_stall_fetch <= stall; + endmethod + `ifdef prefetch + method ActionValue#(Bit#(`VADDR)) prefetch() if(rg_state[1]==Idle &&& rg_prefetchpc matches tagged Valid .vaddress &&& !memoperation.notEmpty); + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Prefetch Request of VAddr: %h set: %d",vaddress, setindex); `endif + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].read_request(truncate(setindex)); + data[i].read_request(vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]); + end + rg_state[1]<=ReadingCache; + rg_vaddress<=vaddress; + prefetchmode<=True; + rg_prefetchpc<=tagged Invalid; + return vaddress; + endmethod + `endif + endmodule +endpackage diff --git a/src/core/icache_asic.bsv b/src/core/icache_asic.bsv new file mode 100644 index 0000000..0134d52 --- /dev/null +++ b/src/core/icache_asic.bsv @@ -0,0 +1,468 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package icache_asic; + /*===== Pacakge imports ===== */ + import BRAMCore::*; + import FIFO::*; + import FIFOF::*; + import SpecialFIFOs::*; + import LFSR::*; + import ConfigReg::*; + import DReg::*; + import BUtils::*; + import MemoryMap::*; + /*===== project imports==== */ + import defined_types::*; + `ifdef bpu + import branchpredictor::*; + `endif + `include "defined_parameters.bsv" + /*========================= */ + + + interface Ifc_icache; + method Action virtual_address(Bit#(`VADDR) vaddress,Bool fence); + method Maybe#(Tuple3#(Bit#(32), Trap_type, Bit#(`PERFMONITORS))) response_to_core; + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + method Action stall_fetch(Bool stall); + +// method Bool init_complete; + `ifdef bpu + method Maybe#(Tuple2#(Bit#(`VADDR),Bit#(2))) prediction_response; + method Action training (Maybe#(Training_data#(`VADDR)) training_data); + `endif + `ifdef MMU + method Action physical_address(Bit#(`PADDR) paddr, Trap_type ex); + `endif + + method Bit#(`PERFMONITORS) icache_perfmon; + `ifdef prefetch + method ActionValue#(Bit#(`VADDR)) prefetch(); + `endif + endinterface + + typedef enum {Idle,Stall,ReadingCache,Fence,IOReadResp} IcacheState deriving (Bits,Eq,FShow); + + (*synthesize*) + (*preempts="fillcache,fencing_the_cache"*) + module mkicache(Ifc_icache); + /* VAddr = [tag_bits|set_bits|word_bits|byte_bits] */ + let byte_bits=valueOf(TLog#(`ICACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2 + let word_bits=valueOf(TLog#(`ICACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4 + let set_bits=valueOf(TLog#(`ICACHE_SETS)); // number of bits to select a set from the cache. = + `ifdef bpu + Ifc_branchpredictor bpu <-mkbranchpredictor; + Wire#(Maybe#(Tuple2#(Bit#(`VADDR),Bit#(2)))) wr_prediction_to_cpu <-mkDWire(tagged Invalid); + `endif + + BRAM_DUAL_PORT#(Bit#(TLog#(`ICACHE_SETS)),Bit#(TAdd#(`ICACHE_TAG_BITS,2))) tag [`ICACHE_WAYS]; + BRAM_DUAL_PORT_BE#(Bit#(TLog#(`ICACHE_SETS)),Bit#(TMul#(TMul#(8,`ICACHE_WORD_SIZE),`ICACHE_BLOCK_SIZE)),64) data [`ICACHE_WAYS]; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + tag[i] <- mkBRAMCore2(`ICACHE_SETS,False); + data[i] <-mkBRAMCore2BE(`ICACHE_SETS,False); + end + + LFSR#(Bit#(2)) random_line<-mkRCounter(3); // for random line replacement + Reg#(Bit#(`VADDR)) rg_vaddress<-mkReg(0); + Reg#(Bit#(`PADDR)) rg_paddress<-mkReg(0); + Reg#(Trap_type) rg_tlb_exception[2]<-mkCReg(2,tagged None); + Reg#(Bool) rg_trnslte_done[2] <- mkCReg(2, `ifdef MMU False `else True `endif ); + Reg#(Bool) rg_stall_fetch <- mkReg(False); + + Reg#(Bit#(`PERFMONITORS)) rg_perf_monitor<-mkReg(0); + Reg#(IcacheState) rg_state[3]<-mkCReg(3,Fence); // this needs to be a CReg so that request can fire in the same cycle as response + Reg#(Bit#(TAdd#(1,TLog#(`ICACHE_SETS)))) rg_index <-mkReg(0); + Reg#(Maybe#(Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)))) rg_we<-mkReg(tagged Invalid); + Reg#(Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE))) line_bytes_written<-mkReg(0); + Reg#(Bool) increment_counters <-mkReg(True); + Reg#(Bool) capture_counters <-mkDReg(False); + + Wire#(Maybe#(Bit#(`VADDR))) wr_memoperation_address <-mkDWire(tagged Invalid); + + Reg#(Bool) ignore_memory_response<-mkReg(False); + `ifdef prefetch + Reg#(Bool) prefetchmode<-mkReg(False); + Reg#(Maybe#(Bit#(`VADDR))) rg_prefetchpc<-mkReg(tagged Invalid); + `endif + + Wire#(Maybe#(Tuple3#(Bit#(32), Trap_type,Bit#(`PERFMONITORS)))) wr_response_to_cpu<-mkDWire(tagged Invalid); + FIFOF#(To_Memory#(`PADDR)) ff_request_to_memory <-mkSizedBypassFIFOF(1); + FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_response_from_memory <-mkSizedBypassFIFOF(1); + FIFOF#(Tuple4#(Bit#(`PADDR),Bit#(`VADDR),Bit#(TLog#(`ICACHE_WAYS)),Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)))) memoperation <-mkUGSizedFIFOF(2); + rule display_state; + `ifdef verbose $display($time,"\tICACHE: State: ",fshow(rg_state[2])," wr_flush ",fshow(wr_flush)," rg_flush ",fshow(rg_flush)); `endif + `ifdef verbose $display($time,"\tICACHE: translation done %h tlbexception: ", rg_trnslte_done[1], fshow(rg_tlb_exception[1])); `endif + endrule + + /*====== Invalidate all the entries in the cache on startup or during Fence ==== */ + rule fencing_the_cache(rg_state[0]==Fence); + rg_we<=tagged Invalid; + memoperation.clear; + `ifdef verbose $display($time,"\tFencing icache of index %d", rg_index); `endif + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + tag[i].b.put(True,truncate(rg_index),0); + end + if(rg_index==fromInteger(`ICACHE_SETS)) begin + if(!rg_stall_fetch) begin + `ifdef verbose $display($time,"\tFencing icache of is over"); `endif + rg_state[0]<=Idle; + rg_index<=0; + random_line.seed('d3); + end + end + else + rg_index<=rg_index+1; + endrule + rule discard_memory_responses_during_fence; + ff_response_from_memory.deq; + endrule + /*=============================================================================== */ + rule read_data_fromcache(rg_state[0]==ReadingCache && memoperation.notFull); + /*========== Check for hit or miss =================== */ + Bit#(TLog#(`ICACHE_WAYS)) linenum=0; + Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor; + Bit#(TMul#(TMul#(`ICACHE_BLOCK_SIZE,`ICACHE_WORD_SIZE),8)) dataline=0; + increment_counters<=True; + Bool hit=False; + Bit#(`ICACHE_WAYS) valid_values; // hold the valid and dirty bits + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) byteoffset=rg_vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef MMU + Bit#(`ICACHE_TAG_BITS) cpu_tag=rg_paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `else + Bit#(`ICACHE_TAG_BITS) cpu_tag=rg_vaddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `endif + if(rg_trnslte_done[0]) begin + `ifdef MMU + if(rg_tlb_exception[0] matches tagged None)begin + if(!is_IO_Addr(rg_paddress))begin + `else + if(!is_IO_Addr(truncate(rg_vaddress)))begin + `endif + `ifdef prefetch + if(!prefetchmode && increment_counters) + perf_monitor[`ICACHE_CACHEABLE]=1; // cacheable access increment + `else + perf_monitor[`ICACHE_CACHEABLE]=1; // cacheable access increment + `endif + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin + let stored_tag=tag[i].a.read[19:0]; + let stored_valid=tag[i].a.read[20]; + valid_values[i]=tag[i].a.read[20]; + if(stored_valid==1 && stored_tag==cpu_tag)begin // if a tag matches capture the tag and data + hit=True; + linenum=fromInteger(i); + dataline=data[i].a.read; + `ifdef verbose $display($time,"ICACHE: DATALINE: %h",dataline); `endif + end + end + Bit#(32) data_value=(dataline>>{5'd0,byteoffset}*32)[31:0]; + + /*====================================================== */ + /*=========== Respond to Core ============================ */ + if(rg_vaddress[1:0]!=0)begin // miss-aligned error. + perf_monitor[`ICACHE_MISALIGNED]=1; // cache mis-aligned error. + wr_response_to_cpu<=tagged Valid (tuple3(0,tagged Exception Inst_addr_misaligned,perf_monitor)); + rg_perf_monitor<=0; + rg_state[0]<=Idle; + `ifdef prefetch prefetchmode<=False; `endif + let x<-bpu.prediction_response; + wr_prediction_to_cpu<=tagged Valid x; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + end + else if(hit)begin // if there has been a hit. + `ifdef verbose $display($time,"\tICACHE: Hit for address : %h data: %h offset: %h line: %d",rg_vaddress,data_value,byteoffset,linenum); `endif + rg_prefetchpc<=tagged Invalid; + `ifdef prefetch + if(!prefetchmode)begin + wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor)); + rg_perf_monitor<=0; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + end + else + prefetchmode<=False; + `else + wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor)); + rg_perf_monitor<=0; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + `endif + rg_state[0]<=Idle; + let x<-bpu.prediction_response; + wr_prediction_to_cpu<=tagged Valid x; + end + /*====================================================== */ + /*==== Request to memory =============================== */ + else begin // miss + `ifdef prefetch + if(!prefetchmode) begin + if(rg_vaddress[11:5]!='1)begin // check that prefetch does not cross physical page boundary + Bit#(`VADDR) mask='1<<(byte_bits+word_bits); + rg_prefetchpc<=tagged Valid ((rg_vaddress&mask)+('d1<<(word_bits+byte_bits))); + perf_monitor[`ICACHE_MISS]=1; // cache miss increment. + end + rg_state[0]<=Stall; + end + else // in prefetch mode send memory request and leave + rg_state[0]<=Idle; + `else + perf_monitor[`ICACHE_MISS]=1; // cache miss increment. + rg_state[0]<=Stall; + `endif + Bit#(TLog#(`ICACHE_WAYS)) replaceblock; + if(valid_values=='1)begin // if all the lines are valid and no match then replace line + perf_monitor[`ICACHE_LINEREPLACE]=1; // cache line replacement increment. + replaceblock=truncate(random_line.value); + random_line.next; + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: Prefetch Miss of address: %h Replacing line: %d valid: %b",rg_vaddress,random_line.value[1:0],valid_values); `endif + perf_monitor[`ICACHE_PREFETCHMISS]=1; + end + `ifdef verbose else + $display($time,"\tICACHE: Miss of address: %h Replacing line: %d valid: %b",rg_vaddress,random_line.value[1:0],valid_values); `endif + end + else begin // find the line which is not valid and fill it + let x=countZerosLSB(valid_values)-1; + replaceblock=pack(truncate(x)); + if(prefetchmode)begin + `ifdef verbose $display($time,"\tICACHE: Prefetch Miss of address: %h Filling line: %d",rg_vaddress,x); `endif + perf_monitor[`ICACHE_PREFETCHMISS]=1; + end + `ifdef verbose else + $display($time,"\tICACHE: Miss of address: %h Filling line: %d",rg_vaddress,x); `endif + end + `ifdef MMU + ff_request_to_memory.enq(To_Memory {address:truncate(rg_paddress&'hfffffff8),burst_length:fromInteger(`ICACHE_BLOCK_SIZE/2),ld_st:Load, transfer_size:3}); + `else + ff_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress&'hfffffff8),burst_length:fromInteger(`ICACHE_BLOCK_SIZE/2),ld_st:Load, transfer_size:3}); + `endif + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) writeenable='hFF; + writeenable=writeenable<<{3'b0,val1}*4; + memoperation.enq(tuple4(rg_paddress,rg_vaddress,replaceblock,writeenable)); + `ifdef verbose $display($time,"\tICACHE: mask: %h byteoffset: %h perfmonitors: %h",writeenable,val1,perf_monitor); `endif + rg_perf_monitor<=perf_monitor; + end + end + else begin + if(!prefetchmode)begin + `ifdef MMU + ff_request_to_memory.enq(To_Memory {address:truncate(rg_paddress),burst_length:1,ld_st:Load, transfer_size:2}); + `else + ff_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress),burst_length:1,ld_st:Load, transfer_size:2}); + `endif + rg_state[0]<=IOReadResp; + `ifdef verbose $display($time,"\tICACHE: Sending Address for IO ACCESS: %h",rg_paddress); `endif + end + else begin + $display($time,"\tICACHE: Do not prefetch in IO space"); + rg_state[0]<=Idle; + prefetchmode<=False; + end + end + end + else begin + if(!prefetchmode)begin + wr_response_to_cpu<=tagged Valid tuple3(0,rg_tlb_exception[0],perf_monitor); + let x<-bpu.prediction_response; + wr_prediction_to_cpu<=tagged Valid x; + end + else begin + $display($time,"\tICACHE: do not respond if Prefetch generated a exception"); + prefetchmode<=False; + end + rg_state[0]<=Idle; + `ifdef MMU rg_trnslte_done[0] <= False; `endif + rg_tlb_exception[0]<=tagged None; + end + /*===================================================================*/ + end + else begin + `ifdef verbose $display($time,"\tICACHE: Translated Address not Available"); `endif + rg_state[0] <= Stall; + end + endrule + /*======= filling up the cache from the data recieved from the external memory ======= */ + rule read_IO_response(rg_state[0]==IOReadResp && !memoperation.notEmpty); + let memresp=ff_response_from_memory.first; + `ifdef verbose $display($time,"\tICACHE: Got response from IO ADDRESS: %h",memresp.data_line); `endif + wr_response_to_cpu<=tagged Valid (tuple3(truncate(memresp.data_line),memresp.bus_error==1?tagged Exception Inst_access_fault:tagged None,1)); + wr_prediction_to_cpu<=tagged Valid (tuple2(0,0)); + rg_state[0]<=Idle; + endrule + rule temp(memoperation.notEmpty); + let {paddress,vaddress,replaceblock,writeenable}=memoperation.first; + wr_memoperation_address<=tagged Valid vaddress; + endrule + rule fillcache(memoperation.notEmpty); + let memresp=ff_response_from_memory.first; + let {paddress,vaddress,replaceblock,writeenable}=memoperation.first; + `ifdef MMU + let cpu_tag=paddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `else + let cpu_tag=vaddress[`PADDR-1:`PADDR-`ICACHE_TAG_BITS]; + `endif + let setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Response from Memory: %h ",memresp.data_line); `endif + let we=writeenable; + if(rg_we matches tagged Valid .x)begin + we=x; + end + Bit#(TMul#(2,TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8; + data[replaceblock].b.put(we,setindex,duplicate(memresp.data_line)); + `ifdef verbose $display($time,"\tICACHE ExtendedMASK: %h RgMASK: %h line_bytes_written: %h",extended_mask,we,line_bytes_written); `endif + tag[replaceblock].b.put(True,setindex,{2'b1,cpu_tag}); // update the tag value + if(memresp.last_word)begin // if all the data words have been fetched exit + `ifdef verbose $display($time,"\tICACHE: Received Last response from Memory"); `endif + `ifdef prefetch + prefetchmode<=False; + `endif + rg_we<=tagged Invalid; + memoperation.deq; + line_bytes_written<=0; + end + else begin + rg_we<=tagged Valid (extended_mask[2*`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE-1:`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE]|extended_mask[`ICACHE_BLOCK_SIZE*`ICACHE_WORD_SIZE-1:0]); + line_bytes_written<=line_bytes_written|we; + end + endrule + /*===================================================================================== */ + /*===================================================================================== */ + rule keep_polling_on_stall(rg_state[0]==Stall); + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) byteoffset=rg_vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) requested_word=('hF<<({2'd0,byteoffset}*4)); + Bool generate_request=True; + Bit#(`PERFMONITORS) perf_monitor=0; + if(capture_counters)begin + perf_monitor[`ICACHE_CACHEABLE]=1; + perf_monitor[`ICACHE_MISS]=1; + rg_perf_monitor<=perf_monitor; + end + + if(!memoperation.notFull) + generate_request=False; + else if(wr_memoperation_address matches tagged Valid .x)begin // some line is being filled from memory + Bit#(TSub#(`VADDR,TAdd#(TLog#(`ICACHE_WORD_SIZE),TLog#(`ICACHE_BLOCK_SIZE)))) memline=x[`VADDR-1:word_bits+byte_bits]; + Bit#(TSub#(`VADDR,TAdd#(TLog#(`ICACHE_WORD_SIZE),TLog#(`ICACHE_BLOCK_SIZE)))) reqline=rg_vaddress[`VADDR-1:word_bits+byte_bits]; + if(memline==reqline) // they are accessing the same line + if((line_bytes_written & requested_word) != requested_word) + generate_request=False; + end + if(generate_request && rg_trnslte_done[1])begin + Bit#(TLog#(`ICACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: POLLING ON VAddr: %h set: %d rg_trnslte_done: %b",rg_vaddress, setindex,rg_trnslte_done[1]); `endif + increment_counters<=False; + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].a.put(False,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + data[i].a.put('d0,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + end + `ifdef bpu + bpu.send_prediction_request(rg_vaddress); + `endif + rg_state[0]<=ReadingCache; + end + endrule + + /*============= Prediction in burst mode ================================ */ + method Action virtual_address(Bit#(`VADDR) vaddress,Bool fence)if(rg_state[1]==Idle); + if(fence)begin + rg_state[1]<=Fence; + end + else begin + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Request of VAddr: %h set: %d",vaddress, setindex); `endif + Bit#(TLog#(`ICACHE_BLOCK_SIZE)) byteoffset=vaddress[word_bits+byte_bits-1:byte_bits]; + Bit#(TMul#(`ICACHE_WORD_SIZE,`ICACHE_BLOCK_SIZE)) requested_word=('hF<<({2'd0,byteoffset}*4)); + Bool generate_request=True; + if(!memoperation.notFull) + generate_request=False; + else if(wr_memoperation_address matches tagged Valid .x)begin // some line is being filled from memory + Bit#(TSub#(`VADDR,TAdd#(TLog#(`ICACHE_WORD_SIZE),TLog#(`ICACHE_BLOCK_SIZE)))) memline=x[`VADDR-1:word_bits+byte_bits]; + Bit#(TSub#(`VADDR,TAdd#(TLog#(`ICACHE_WORD_SIZE),TLog#(`ICACHE_BLOCK_SIZE)))) reqline=vaddress[`VADDR-1:word_bits+byte_bits]; + if(memline==reqline) // they are accessing the same line + if((line_bytes_written & requested_word) != requested_word) + generate_request=False; + end + rg_vaddress<=vaddress; + if(generate_request)begin + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].a.put(False,vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + data[i].a.put('d0,vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + end + `ifdef bpu + bpu.send_prediction_request(vaddress); + `endif + rg_state[1]<=ReadingCache; + end + else begin + `ifdef verbose $display($time,"\tICACHE: Miss during Cache Fill"); `endif + capture_counters<=True; + rg_state[1]<=Stall; + end + end + endmethod + method Maybe#(Tuple3#(Bit#(32), Trap_type, Bit#(`PERFMONITORS))) response_to_core; + return wr_response_to_cpu; + endmethod + `ifdef MMU + method Action physical_address(Bit#(`PADDR) paddr, Trap_type ex); + `ifdef verbose $display($time,"\tICACHE: Sending physical address %h to icache ",paddr); `endif + rg_paddress<=paddr; + rg_tlb_exception[1]<=ex; + //rg_state[1]<=ReadingCache; + rg_trnslte_done[1] <= True; + endmethod + `endif + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + ff_request_to_memory.deq; + return ff_request_to_memory.first; + endmethod + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp); + if(!ignore_memory_response) + ff_response_from_memory.enq(resp); + else if(resp.last_word) + ignore_memory_response<=False; + endmethod + method Bit#(`PERFMONITORS) icache_perfmon; + return rg_perf_monitor; + endmethod +// method Bool init_complete; +// return (rg_state[1]!=Fence `ifdef bpu && bpu.init_complete `endif ); +// endmethod + method Action stall_fetch(Bool stall); + rg_stall_fetch <= stall; + endmethod + `ifdef prefetch + method ActionValue#(Bit#(`VADDR)) prefetch() if(rg_state[1]==Idle &&& rg_prefetchpc matches tagged Valid .vaddress &&& !memoperation.notEmpty); + Bit#(TLog#(`ICACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits]; + `ifdef verbose $display($time,"\tICACHE: Prefetch Request of VAddr: %h set: %d",vaddress, setindex); `endif + for(Integer i=0;i<`ICACHE_WAYS;i=i+1)begin // send address to the Block_rams + tag[i].a.put(False,vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + data[i].a.put('d0,vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits], ?); + end + rg_state[1]<=ReadingCache; + rg_vaddress<=vaddress; + prefetchmode<=True; + rg_prefetchpc<=tagged Invalid; + return vaddress; + endmethod + `endif + `ifdef bpu + method Maybe#(Tuple2#(Bit#(`VADDR),Bit#(2))) prediction_response; + return wr_prediction_to_cpu; + endmethod + method Action training (Maybe#(Training_data#(`VADDR)) training_data)=bpu.training(training_data); + `endif + endmodule +endpackage diff --git a/src/core/imem.bsv b/src/core/imem.bsv new file mode 100644 index 0000000..d095989 --- /dev/null +++ b/src/core/imem.bsv @@ -0,0 +1,180 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package imem; + + /*========= package imports========== */ + import FIFOF::*; + import SpecialFIFOs::*; + import Connectable::*; + import GetPut::*; + /*==================================== */ + /* ======== project imports ========= */ + import TxRx ::*; + import icache ::*; + import defined_types::*; + import MemoryMap::*; + `ifdef bpu + import branchpredictor::*; + `endif + `include "defined_parameters.bsv" + `ifdef MMU import iTLB::*; `endif + /* ================================== */ + +interface Ifc_imem; + /*======= Mandatory Interface to the core ================ */ + interface Put#(Tuple5#(Bit#(2),Bit#(`VADDR),Bit#(`VADDR),Bool,Bit#(3))) request_from_core; + method Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS), Bit#(3))) instruction_response_to_core; + method Action flush(Flush_type _flush); + method Bit#(`PERFMONITORS) imem_perfmon; +// method Bool init_complete; + method Action stall_fetch(Bool stall); + /*=============================================== */ + /*====== Madatory Interface to the external Bus ======= */ + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) mem_data); + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + /*============================================ */ + `ifdef bpu + interface Get#(Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2))) prediction_response; + method Action training (Maybe#(Training_data#(`VADDR)) training_data); + interface Put#(Tuple2#(Bit#(3),Bit#(`VADDR))) send_prediction_request; + `endif + `ifdef MMU + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + interface Get#(Request_PPN_PTW#(`VADDR,`OFFSET)) to_PTW; + interface Put#(Tuple2#(Bool,To_TLB#(`PADDR,`OFFSET,`ASID))) refill_TLB; + method Action fence_itlb(Fence_VMA_type#(`VADDR) rsdata); + `endif + `ifdef prefetch + method Action prefetch(); + `endif +endinterface +(*synthesize*) +(*mutually_exclusive="request_from_core.put,send_cache_request_to_memory"*) +//(*mutually_exclusive="request_from_core.put,send_translated_vaddress"*) +module mkimem(Ifc_imem); + Ifc_icache icache <- mkicache; + Reg#(Bool) io_access_started<-mkReg(False); + + Wire#(Bit#(`VADDR)) wr_address_from_core <-mkWire(); + Wire#(Bool) wr_flush <-mkDWire(False); + + `ifdef bpu + Ifc_branchpredictor bpu<-mkbranchpredictor(); + `endif + + + Wire#(Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR), Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3)))) wr_response_to_cpu<-mkDWire(tagged Invalid); + FIFOF#(To_Memory#(`PADDR)) ff_request_to_memory <-mkSizedBypassFIFOF(1); + Reg#(Trap_type) rg_exception <- mkReg(tagged None); + Reg#(Bit#(3)) epochs <-mkReg(0); + Reg#(Bit#(`VADDR)) pc<-mkReg(0); + Reg#(Bit#(`VADDR)) npc<-mkReg(0); + Reg#(Bit#(2)) prediction <-mkReg(0); + + `ifdef MMU + Ifc_iTLB itlb <- mkiTLB; + rule send_translated_paddress(rg_exception matches tagged None); + let x <- itlb.send_ppn; + //if(x.exception matches tagged None) begin + icache.physical_address(truncate(x.address),x.exception); //TODO vaddr + //end + endrule + `endif + rule send_cache_request_to_memory; + let x<-icache.request_to_memory; + ff_request_to_memory.enq(x); + endrule + /*======= collect responses from the cache and store them in the FIFOs============ */ + + // This rule collects the prediction information from the branchpredictor and stores in them + // FIFO. The request to the predictor is provided as part of the request to the cache itself. + // This the branchpredictor will not work for io accesses. + // This rule collects the instructions provided by the cache. The cache can provide a one or two instructions. If there is a hit + // in the cache then a single instruction is provided. On a miss in the cache the instruction are supplied as part of the cache-line-fill + // which is 2 instructions at a time. Whethere to enque one or two or no instructions into the core pipe is indicated by the variable + // singledual. A value of 00 indicates that none of the instructions provided by the memory (as part of the line) need to be enqued + // into the cache. A value of 01 means the lower instruction should be enqued, 'b10 means the upper instruction only needs to be enqued + // and 'b11 means both the instructions need to be enqueud. + rule collect_first_instruction_from_icache; + let x=icache.response_to_core; + if(x matches tagged Valid .resp)begin + let {instr,trap,perf}=resp; + wr_response_to_cpu<=tagged Valid tuple7(pc,prediction,npc,instr,trap,perf,epochs); + end + endrule + /*======================================================================================= */ + + + /*==== prefetch should start as soon as the cache is idle and a previous line was a miss=== */ + `ifdef prefetch + rule perform_prefetch; + let x<-icache.prefetch; + `ifdef MMU itlb.get_vaddr(signExtend(x)); `endif + endrule + `endif + /*===================================================================== */ + + + /*===================== Interface to the Core ======== */ + interface request_from_core = interface Put + method Action put(Tuple5#(Bit#(2),Bit#(`VADDR),Bit#(`VADDR),Bool,Bit#(3)) request); + let {pred,nextpc,instr_addr,fence,epoch_req}=request; + epochs<=epoch_req; + pc<=instr_addr; + prediction<=pred; + npc<=nextpc; + `ifdef verbose $display($time,"\tIMEM: Sending request to cache for address: %h prediction: %b",instr_addr,prediction); `endif + `ifdef MMU + itlb.get_vaddr(signExtend(instr_addr)); + `endif + icache.virtual_address(instr_addr,fence); + endmethod + endinterface; + method Maybe#(Tuple7#(Bit#(`VADDR), Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3))) instruction_response_to_core=wr_response_to_cpu; + /*==================================================== */ + + `ifdef bpu + method Action training (Maybe#(Training_data#(`VADDR)) training_data)=bpu.training(training_data); + interface send_prediction_request=bpu.send_prediction_request; + interface prediction_response=bpu.prediction_response; + `endif + + method Action flush(Flush_type _flush); + `ifdef MMU + `endif + endmethod + method Bit#(`PERFMONITORS) imem_perfmon=icache.icache_perfmon; + method Action stall_fetch(Bool stall) = icache.stall_fetch(stall); + /*===================================================== */ + + /*======= Interface to the external Memory =========== */ + method Action response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) mem_data)=icache.response_from_memory(mem_data); + method ActionValue#(To_Memory#(`PADDR)) request_to_memory; + ff_request_to_memory.deq; + return ff_request_to_memory.first; + endmethod + /*===================================================== */ + `ifdef MMU + method Action translation_protection_frm_csr(bit tlb_disable, Chmod per_bits, Bit#(TAdd#(4,`ASID)) asid); + itlb.translation_protection_frm_csr(tlb_disable, per_bits, asid); + endmethod + interface to_PTW = itlb.to_PTW; + interface refill_TLB = itlb.refill_TLB; + method Action fence_itlb(Fence_VMA_type#(`VADDR) rsdata); + itlb.fence_TLB(rsdata); + endmethod + `endif +endmodule + +endpackage diff --git a/src/core/mem_config1.bsv b/src/core/mem_config1.bsv new file mode 100644 index 0000000..5ad0e3a --- /dev/null +++ b/src/core/mem_config1.bsv @@ -0,0 +1,92 @@ +package mem_config1; + import BRAMCore::*; + import defined_types::*; + `include "defined_parameters.bsv" + import Assert::*; + import DReg::*; + interface Ifc_dcache_data; + method Action read_request(Bit#(TLog#(`DCACHE_SETS)) address); + method Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) read_response; + method Action write_request(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))we, Bit#(TLog#(`DCACHE_SETS)) address, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data); + endinterface + + + module mkdcache_data(Ifc_dcache_data); + BRAM_DUAL_PORT_BE#(Bit#(TLog#(`DCACHE_SETS)),Bit#(128),16) dataa <-mkBRAMCore2BE(`DCACHE_SETS,False); + BRAM_DUAL_PORT_BE#(Bit#(TLog#(`DCACHE_SETS)),Bit#(128),16) datab <-mkBRAMCore2BE(`DCACHE_SETS,False); + + Wire#(Bit#(TLog#(`DCACHE_SETS))) read_address <-mkWire(); + Wire#(Bit#(TLog#(`DCACHE_SETS))) write_address<-mkWire(); + let set_bits=valueOf(TLog#(`DCACHE_SETS)); // number of bits to select a set from the cache. = + + + rule print_address; + `ifdef verbose $display("\tASSERT: DATA read_address: %d write_address: %d",read_address,write_address); `endif + dynamicAssert(read_address!=write_address,"ASSERT: DATA read and write address are the same"); + endrule + + + method Action read_request(Bit#(TLog#(`DCACHE_SETS)) address); + dataa.a.put(0,address,?); + datab.a.put(0,address,?); + read_address<=address[set_bits-1:0]; + endmethod + method Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) read_response; + return {dataa.a.read,datab.a.read}; + endmethod + method Action write_request(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))we, Bit#(TLog#(`DCACHE_SETS)) address, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data); + dataa.b.put(we[31:16],address,data[255:128]); + datab.b.put(we[15:0],address,data[127:0]); + if(we!=0) + write_address<=address[set_bits-1:0]; + endmethod + endmodule + + interface Ifc_dcache_tag; + method Action read_request(Bit#(TLog#(`DCACHE_SETS)) address); + method Bit#(TAdd#(2,`DCACHE_TAG_BITS)) read_response; + method Action write_request(Bool we, Bit#(TLog#(`DCACHE_SETS)) address, Bit#(TAdd#(2,`DCACHE_TAG_BITS)) data); + endinterface + + module mkdcache_tag(Ifc_dcache_tag); + let byte_bits=valueOf(TLog#(`DCACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2 + let word_bits=valueOf(TLog#(`DCACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4 + let set_bits=valueOf(TLog#(`DCACHE_SETS)); // number of bits to select a set from the cache. = + + BRAM_DUAL_PORT#(Bit#(TLog#(TDiv#(`DCACHE_SETS,2))),Bit#(TAdd#(`DCACHE_TAG_BITS,2))) taga<-mkBRAMCore2(`DCACHE_SETS/2,False) ; + BRAM_DUAL_PORT#(Bit#(TLog#(TDiv#(`DCACHE_SETS,2))),Bit#(TAdd#(`DCACHE_TAG_BITS,2))) tagb<-mkBRAMCore2(`DCACHE_SETS/2,False) ; + Reg#(Bit#(TLog#(`DCACHE_SETS))) rg_addr <-mkReg(0); + + Wire#(Bit#(TLog#(`DCACHE_SETS))) read_address <-mkWire(); + Wire#(Bit#(TLog#(`DCACHE_SETS))) write_address<-mkWire(); + + rule print_address; + `ifdef verbose $display("\tASSERT: read_address: %d write_address: %d",read_address,write_address); `endif + dynamicAssert(read_address!=write_address,"ASSERT: Tag read and write address are the same"); + endrule + + + method Action read_request(Bit#(TLog#(`DCACHE_SETS)) address); + if(address[set_bits-1]==1) + tagb.a.put(False,address[set_bits-2:0],?); + else + taga.a.put(False,address[set_bits-2:0],?); + rg_addr<=address; + read_address<=address[set_bits-1:0]; + endmethod + method Bit#(TAdd#(2,`DCACHE_TAG_BITS)) read_response; + if(rg_addr[set_bits-1]==1) + return tagb.a.read; + else + return taga.a.read; + endmethod + method Action write_request(Bool we, Bit#(TLog#(`DCACHE_SETS)) address, Bit#(TAdd#(2,`DCACHE_TAG_BITS)) data); + if(address[set_bits-1]==1) + tagb.b.put(we,address[set_bits-2:0],data); + else + taga.b.put(we,address[set_bits-2:0],data); + if(we) + write_address<=address[set_bits-1:0]; + endmethod + endmodule +endpackage diff --git a/src/core/memory_stage.bsv b/src/core/memory_stage.bsv new file mode 100644 index 0000000..33b6312 --- /dev/null +++ b/src/core/memory_stage.bsv @@ -0,0 +1,179 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package memory_stage; + /*===== Package Imports ==== */ + import TxRx::*; + import FIFOF::*; + import DReg::*; + import Vector::*; + import GetPut::*; + /*========================== */ + /*===== Project Imports ======*/ + `include "defined_parameters.bsv" + import defined_types::*; + //import dcache::*; + /*============================ */ + + interface Ifc_memory_stage; + /* ====================== pipe connections ========= */ + interface RXe#(IE_IMEM_type) rx_in; + interface TXe#(IMEM_IWB_type) tx_out; + /*================================================== */ + method Action update_wEpoch; + interface Put#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1)))) response_from_dmem; + method Action loadtrigger_info(TriggerData tdata); + method Action storetrigger_info(TriggerData tdata); + method Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) forwarding_data; + endinterface + + (*synthesize*) + module mkmemory_stage(Ifc_memory_stage); + RX#(IE_IMEM_type) rx <-mkRX; // receive from the execution stage + TX#(IMEM_IWB_type) tx <-mkTX; // send to the writeback stage; + Wire#(Memout) wr_info_to_dmem <-mkWire;// holds the information to be given to dmem + Wire#(TriggerData) wr_loadtrigger<-mkDWire(TriggerData{ttype:tagged None,matchscheme:0}); + Wire#(TriggerData) wr_storetrigger<-mkDWire(TriggerData{ttype:tagged None,matchscheme:0}); + Wire#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1)))) wr_response_to_cpu <- mkDWire(tagged Invalid); + //Wire#(Maybe#(Operand_forwading_type)) wr_forward_from_MEM <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage + //Reg#(Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4)))) wr_forward_from_MEM <-mkDReg(tagged Invalid);// holds the forwarded data from the memory stage + Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4)))) wr_forward_from_MEM <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage + //Ifc_dcache dcache<-mkdcache; + Reg#(Bit#(1)) wEpoch <-mkReg(0); + + function Bool checktrigger(TriggerData tdata, Bit#(`Reg_width) address, Bit#(`Reg_width) lsdata); + if(tdata.ttype matches tagged Address .addr) + if(tdata.matchscheme==0 && addr==truncate(address)) + return True; + else if(tdata.matchscheme==2 && addr>=truncate(address)) + return True; + else if(tdata.matchscheme==3 && addr<=truncate(address)) + return True; + else if(tdata.matchscheme==4 && addr[31:0]==(addr[31:0]&address[31:0])) + return True; + else if(tdata.matchscheme==5 && addr[31:0]==(addr[`VADDR-1:32]&address[`VADDR-1:32])) + return True; + else + return False; + else if(tdata.ttype matches tagged Data .data) + if(tdata.matchscheme==0 && data==lsdata) + return True; + else if(tdata.matchscheme==2 && data>=lsdata) + return True; + else if(tdata.matchscheme==3 && data<=lsdata) + return True; + else if(tdata.matchscheme==4 && data[31:0]==(data[63:32]&lsdata[31:0])) + return True; + else if(tdata.matchscheme==5 && data[31:0]==(data[63:32]&lsdata[63:32])) + return True; + else + return False; + else + return False; + endfunction + + //(*conflict_free="receive_info_from_execution_stage,response_to_core"*) + rule receive_info_from_execution_stage(rx.u.notEmpty && tx.u.notFull); + Bit#(`PERFMONITORS) pm=rx.u.first.perfmonitors; + let info=rx.u.first(); + let pc=rx.u.first.program_counter; + let dest=rx.u.first.destination; + let rdtype=rx.u.first.rd_type; + `ifdef simulate let instr=rx.u.first.instruction; `endif + let sysinstr=False; + Bit#(`ADDR) baddr = 0; + WriteBackType result1=?; + `ifdef verbose $display($time,"\t*****************MEMORY STAGE*************************\t PC: %h PRFINDEX: %d PID: %d EPOCHS: %b wEpoch: %b",pc,rx.u.first.index,rx.u.first.pid, rx.u.first.epochs, wEpoch); `endif + + /* If the instruction is a memory operation (Load/Store/Atomic/Fence) then generate + the request to the cache in this rule and expect the response in the consecutive rule*/ + if(info.execresult matches tagged MEMORY .meminfo)begin + Bool lstrigger=False; + `ifdef verbose $display($time,"\tMEMORY: load_trigger: ", fshow(wr_loadtrigger)); `endif + let exception=rx.u.first.exception; + if(wr_response_to_cpu matches tagged Valid .d)begin + let {data,e,perfmonitors,epochs}=d; + exception=exception matches tagged None?e:exception; + Bit#(`Reg_width) fwd_data=0; + if(exception matches tagged Exception .exc)begin + result1 = tagged RESULT Arithout{aluresult:meminfo.address,fflags:0}; + end + `ifdef spfpu + else if(meminfo.transfer_size==2 && rx.u.first.rd_type==FloatingRF)begin + result1 = tagged RESULT Arithout{aluresult:{'1,data[31:0]},fflags:0}; + fwd_data={'1,data[31:0]}; + end + `endif + else begin + result1 = tagged RESULT Arithout{aluresult:data,fflags:0}; + fwd_data=data; + end + pm=pm|perfmonitors; + rx.u.deq; + if(epochs==wEpoch)begin + `ifdef verbose $display($time,"\tMEMORY: Response from DCACHE: Data: %h Address: %h transfersize: %d epochs: %b wEpochs: %b",data,meminfo.address,meminfo.transfer_size,epochs,wEpoch); `endif + tx.u.enq(IMEM_IWB_type{commit_data:result1, index:rx.u.first.index, pid:rx.u.first.pid, debugcause:rx.u.first.debugcause, + program_counter:pc, destination:dest, epochs:{rx.u.first.epochs[1],epochs}, + rd_type:rdtype, exception:exception, perfmonitors:rx.u.first.perfmonitors + `ifdef simulate , instruction:instr `endif }); + if(meminfo.mem_type!=Store &&& ((rdtype==IntegerRF && dest!=0) `ifdef spfpu || rdtype==FloatingRF `endif ) &&& exception matches tagged None) + wr_forward_from_MEM <= tagged Valid tuple3(fwd_data, rx.u.first.index, rx.u.first.pid); + end + else begin + `ifdef verbose $display($time,"\tMEMORY: Dropping the received response from DCACHE"); `endif + end + end + end + /* If the instruction is not a memory operation then bypass this stage + and enque into the write-back stage. If this is a CSR operation changing + the FCSR register then generate the necessary flag. Also generate the forwarding + signal to the decode stage*/ + else begin + rx.u.deq; + if(rx.u.first.epochs[0]!=wEpoch)begin + `ifdef verbose $display($time,"\tMEMORY: PC: %h Dropping instructions",rx.u.first.program_counter); `endif + end + else begin + `ifdef verbose $display($time,"\tMEMORY: Bypassing Memory Stage"); `endif + if(info.execresult matches tagged SYSTEM .res1)begin + result1=tagged SYSTEM res1 ; + end + else if(info.execresult matches tagged RESULT .res1)begin + result1=tagged RESULT res1; + end + tx.u.enq(IMEM_IWB_type{commit_data:result1, index:rx.u.first.index, pid:rx.u.first.pid, debugcause:rx.u.first.debugcause, + program_counter:pc, destination:dest, epochs:rx.u.first.epochs, + rd_type:rdtype, exception:info.exception, perfmonitors:rx.u.first.perfmonitors + `ifdef simulate , instruction:instr `endif }); + end + end + endrule + method tx_out=tx.e; + method rx_in=rx.e; + interface response_from_dmem = interface Put + method Action put(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1))) resp); + wr_response_to_cpu <= resp; + endmethod + endinterface; + method Action loadtrigger_info(TriggerData tdata); + wr_loadtrigger<=tdata; + endmethod + method Action storetrigger_info(TriggerData tdata); + wr_storetrigger<=tdata; + endmethod + method Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) forwarding_data=wr_forward_from_MEM; + method Action update_wEpoch; + wEpoch<=~wEpoch; + endmethod + endmodule +endpackage:memory_stage diff --git a/src/core/muldiv.bsv b/src/core/muldiv.bsv new file mode 100644 index 0000000..577a431 --- /dev/null +++ b/src/core/muldiv.bsv @@ -0,0 +1,319 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package muldiv; + /*====== Package import ==== */ + import FIFOF::*; + import SpecialFIFOs::*; + import UniqueWrappers::*; + /*==== Project Import=== */ + import defined_types::*; + `include "defined_parameters.bsv" + /*====================== */ + + `define UnrollMul 8 // this means the number of bits being analysed simultaneously + `define UnrollDiv 1 + + interface Ifc_muldiv; + method Action input_operands(Bit#(`Reg_width) in1, Bit#(`Reg_width) in2, Bit#(2) funct3, Bit#(1) word_flag, Bit#(1) is_mul); + method ActionValue#(Bit#(`Reg_width)) muldiv_result; + method Action flush; + endinterface + + function Bit#(73) func_mult(Bit#(9) op1, Bit#(65) op2); + Bit#(73) lv_result= signExtend(op1)*signExtend(op2); + return lv_result; + endfunction + + function Bool is_op_zero(Bit#(56) op, Bit#(4) count); + Bool acc_7to0_is_zero = op[7:0]==0; + Bool acc_15to8_is_zero = op[15:8]==0; + Bool acc_23to16_is_zero= op[23:16]==0; + Bool acc_31to24_is_zero= op[31:24]==0; + Bool acc_39to32_is_zero= op[39:32]==0; + Bool acc_47to40_is_zero= op[47:40]==0; + Bool acc_55to48_is_zero= op[55:48]==0; + + Bool acc_47to32_is_zero= acc_47to40_is_zero && acc_39to32_is_zero; + Bool acc_31to16_is_zero= acc_31to24_is_zero && acc_23to16_is_zero; + Bool acc_15to0_is_zero = acc_15to8_is_zero && acc_7to0_is_zero; + Bool acc_31to0_is_zero = acc_31to16_is_zero && acc_15to0_is_zero; + + Bool earlyout= False; + if(count[2:1]=='b11) //==6 or ==7 + if(acc_55to48_is_zero && acc_47to32_is_zero && acc_31to0_is_zero) + earlyout= True; + else if(count==5) + if(acc_47to32_is_zero && acc_31to0_is_zero) + earlyout= True; + else if(count==4) + if(acc_39to32_is_zero && acc_31to0_is_zero) + earlyout= True; + else if(count==3) + if(acc_31to0_is_zero) + earlyout= True; + else if(count==2) + if(acc_23to16_is_zero && acc_15to0_is_zero) + earlyout= True; + else begin + if(acc_15to0_is_zero) + earlyout= True; + end + return earlyout; + endfunction + + (*synthesize*) + (*descending_urgency = "input_operands, perform_n_restoring_steps"*) + module mkmuldiv(Ifc_muldiv); + + Wrapper2#(Bit#(73), Bit#(73), Bit#(73)) wrapper_add_1 <- mkUniqueWrapper2( \+ ); + Wrapper2#(Bit#(9), Bit#(65), Bit#(73)) wrapper_mul_1 <- mkUniqueWrapper2( func_mult ); + Wrapper2#(Bit#(56), Bit#(4), Bool) wrapper_is_op_zero <- mkUniqueWrapper2( is_op_zero ); + + Reg#(Bit#(65)) multiplicand_divisor <-mkReg(0); // operand2 + Reg#(Bit#(137)) accumulator <-mkReg(0); // holds the accumulated results over the iterations + FIFOF#(Bit#(64)) ff_muldiv_result <-mkBypassFIFOF(); // to hold the final result + FIFOF#(Tuple5#(Bit#(`Reg_width), Bit#(`Reg_width),Bit#(2), Bit#(1), Bit#(1))) ff_input <-mkLFIFOF(); // to hold the final result + Reg#(Bit#(4)) rg_count[2]<-mkCReg(2,8); + Reg#(Bool) rg_signed<-mkReg(False); + Reg#(Bool) upper_bits<-mkReg(False); + Reg#(Bit#(1)) temp_multiplier_sign<-mkReg(0); + Reg#(Bit#(1)) rg_word_flag<-mkReg(0); + Reg#(Bit#(1)) rg_result_sign<-mkReg(0); + Reg#(Bool) rg_is_mul <-mkReg(False); + + //Only DIV + Reg#(Bit#(7)) rg_state_counter[2]<-mkCReg(2,0); // to count the number of iterations + Reg#(Bit#(2)) rg_funct3 <-mkReg(0); + + rule unroll_multiplication(rg_is_mul && rg_count[1]!=8); + + //Bit#(137) x=partial_prod_generator(multiplier_sign,multiplicand,accumulator[1]); + Bit#(73) product<- wrapper_mul_1.func({temp_multiplier_sign,accumulator[7:0]}, multiplicand_divisor); + Bit#(73) new_accum<- wrapper_add_1.func(accumulator[136:64],product); + Bit#(137) x = {new_accum,accumulator[63:0]}; + Int#(137) y = unpack(x); + y=y>>8; + x=pack(y); + Bool earlyout=False; + if(rg_count[1]==7) + earlyout<- wrapper_is_op_zero.func(accumulator[63:8],rg_count[1]); + else + earlyout<- wrapper_is_op_zero.func(accumulator[55:0],rg_count[1]); + `ifdef verbose $display($time,"\tAccumulator: %h Multiplicand: %h count: %d isHi: %b word: %b compl: %b sign: %b",x,multiplicand_divisor,rg_count[1],upper_bits, rg_word_flag, rg_signed,temp_multiplier_sign); `endif + `ifdef verbose $display($time,"\tx: %h y: %h",x,y); `endif + if(rg_count[1]==0 || earlyout)begin + `ifdef verbose $display($time,"\tMUL/DIV: Ending Mul/Div operation"); `endif + y = unpack(x); + x=pack(y>>({2'b0,rg_count[1]}*8)); + `ifdef verbose $display($time,"\tx: %h y: %h",x,y); `endif + if(rg_word_flag==1) + x=signExtend(x[31:0]); + if(upper_bits) + ff_muldiv_result.enq(x[2*`Reg_width-1:`Reg_width]); + else + ff_muldiv_result.enq(x[`Reg_width-1:0]); + rg_count[1]<=8; + end + else begin + rg_count[1]<=rg_count[1]-1; + accumulator<=x; + end + if(rg_count[1]==1 && rg_signed) + temp_multiplier_sign<=rg_result_sign; + endrule + + rule perform_n_restoring_steps(!rg_is_mul && rg_count[1]!='d8); + Bit#(`Reg_width) divisor= multiplicand_divisor[`Reg_width-1:0]; + Bit#(TAdd#(1,TMul#(2,`Reg_width))) remainder= truncate(accumulator); + Bit#(TAdd#(1,`Reg_width)) sub; + for (Integer i=0;i<`UnrollDiv;i=i+1)begin + remainder=remainder<<1; + Bit#(73) lv_add_op1= {8'd0,remainder[2*`Reg_width:`Reg_width]}; + Bit#(73) lv_add_op2= signExtend(~divisor+1); + let lv_added_inter_res <- wrapper_add_1.func(lv_add_op1, lv_add_op2); + sub= truncate(lv_added_inter_res); + if(remainder[2*`Reg_width-1:`Reg_width]>=divisor)begin // if subtraction is positive + remainder[0]=1; + remainder[2*`Reg_width:`Reg_width]=sub; // restore + end + end + //Bit#(TAdd#(1,`Reg_width)) lv_to_add= signExtend(~multiplicand_divisor[63:0]+1); + sub=accumulator[128:64]+signExtend(~multiplicand_divisor[63:0]+1); + if((rg_state_counter[1]==(64/`UnrollDiv)))begin // end of computation; + rg_state_counter[1]<=0; + rg_count[1]<='d8; + if(rg_funct3[1]==1) // REM/REMU + if(rg_word_flag==1) + remainder=signExtend(remainder[95:64]); + else + remainder=signExtend(remainder[127:64]); + else // DIV/DIVU + if(rg_word_flag==1) + remainder=signExtend(remainder[31:0]); + else + remainder=signExtend(remainder[63:0]); + + if(rg_funct3[1]==0 && rg_signed) begin// DIVU + remainder=~remainder+1; + end + else if(rg_funct3[1:0]=='b10 && remainder[`Reg_width-1]!=rg_result_sign) begin // REMU/REM + remainder=~remainder+1; + end + if(rg_word_flag==1) + ff_muldiv_result.enq(signExtend(remainder[31:0])); + else + ff_muldiv_result.enq(remainder[`Reg_width-1:0]); + end + else begin + accumulator[128:0]<=remainder; + rg_state_counter[1]<=rg_state_counter[1]+1; + end + endrule + + rule first_stage(rg_count[1]==8); + ff_input.deq; + let {in1,in2,funct3,word_flag,is_mul}=ff_input.first; + `ifdef verbose $display($time,"\tMUL/DIV: in1: %h in2: %h funct3: %h word_flag: %h is_mul: %b",in1,in2,funct3,word_flag, is_mul); `endif + Bit#(1) in2_sign=funct3[1:0]==1?word_flag==1?in2[31]:in2[63]:0; + Bit#(1) in1_sign=(funct3[1]^funct3[0]) & ((word_flag==1)?in1[31]:in1[63]); + + Bit#(TAdd#(`Reg_width,1)) op1; + Bit#(TAdd#(`Reg_width,1)) op2; + if(is_mul==1) begin + op1= word_flag==1? zeroExtend(in1[31:0]):{1'b0,in1}; + op2= word_flag==1? zeroExtend(in2[31:0]):{1'b0,in2}; + end + else begin + op1= word_flag==1? (funct3[0]==0?signExtend(in1[31:0]):zeroExtend(in1[31:0])): ({in1[63],in1[63:0]}); + op2= word_flag==1?(funct3[0]==0?signExtend(in2[31:0]):zeroExtend(in2[31:0])):({in2[63],in2[63:0]}); + + op1=(funct3[0]==0 && op1[`Reg_width]==1)?~op1[`Reg_width-1:0]+1:op1[`Reg_width-1:0]; + op2=(funct3[0]==0 && op2[`Reg_width]==1)?~op2[`Reg_width-1:0]+1:op2[`Reg_width-1:0]; + end + + rg_word_flag<=word_flag; + rg_is_mul<= unpack(is_mul); + Bool op1_31_to_0_is_zero= (op1[31:0]==0); + Bool op2_31_to_0_is_zero= (op2[31:0]==0); + Bool op1_is_zero= word_flag==1? op1_31_to_0_is_zero:(op1[63:0]==0 && op1_31_to_0_is_zero); + Bool op2_is_zero= word_flag==1? op2_31_to_0_is_zero:(op2[63:0]==0 && op2_31_to_0_is_zero); + + if(is_mul==0 && op2_is_zero) begin + if(funct3[1]==1) begin //REM/REMU operation + if(word_flag==1) + ff_muldiv_result.enq(signExtend(in1[31:0])); + else + ff_muldiv_result.enq(in1); + end + else begin //DIV/DIVU operation + ff_muldiv_result.enq('1); + end + end + else if(op1_is_zero) begin + ff_muldiv_result.enq(0); + rg_signed<=False; + end + else begin + if(funct3==0) begin + upper_bits<=False; //used only for MUL + if(is_mul==1) + rg_signed<=op1[`Reg_width-1]!=op2[`Reg_width-1]; + else + rg_signed<=op1[`Reg_width]!=op2[`Reg_width]; + end + else begin + upper_bits<=True; //used only for MUL + if(is_mul==1) + rg_signed<=unpack(in1_sign); + else + rg_signed<= False; + end + + if(is_mul==1) begin + //Bit#(73) product<- wrapper_mul_1.func({1'b0,op1[7:0]}, {in2_sign,op2[`Reg_width-1:0]}); + ////Bit#(73) new_accum<- wrapper_add_1.func(accumulator[1][136:64],product); + //Bit#(137) x = {product,op1[`Reg_width-1:0]}; + //Int#(137) y = unpack(x); + //y=y>>8; + //x=pack(y); + //`ifdef verbose $display("--- in1: %h in2: %h out: %h", {in2_sign,op2[7:0]}, op1, x); `endif + //Bool earlyout<- wrapper_is_op_zero.func(op1[63:8],rg_count[1]); + //if(earlyout) begin + // y=unpack(x); + // x=pack(y>>7*8); + // if(word_flag==1) + // x=signExtend(x[31:0]); + // if(funct3!=0) //Upper bits + // ff_muldiv_result.enq(x[2*`Reg_width-1:`Reg_width]); + // else + // ff_muldiv_result.enq(x[`Reg_width-1:0]); + //end + //else begin + rg_result_sign<=op1[`Reg_width-1]; + temp_multiplier_sign<=0; + multiplicand_divisor<={in2_sign,op2[63:0]}; + accumulator<=zeroExtend(op1[63:0]); + rg_count[1]<=7; + //end + //`ifdef verbose $display($time,"\tAccumulator: %h Multiplicand: %h rg_count: %d",x,{in2_sign,op2[63:0]},rg_count[1]); `endif + end + else begin + accumulator<= zeroExtend(op1[63:0]); + rg_state_counter[1]<=1; + rg_count[1]<= 4; + multiplicand_divisor<= op2; + rg_result_sign<= op1[`Reg_width]; + rg_funct3<= funct3; + end + end + endrule + + method Action input_operands(Bit#(`Reg_width) in1, Bit#(`Reg_width) in2, Bit#(2) funct3, Bit#(1) word_flag, Bit#(1) is_mul) if(rg_count[1]==8); + ff_input.enq(tuple5(in1,in2,funct3,word_flag,is_mul)); + endmethod + method ActionValue#(Bit#(`Reg_width)) muldiv_result; + ff_muldiv_result.deq; + return ff_muldiv_result.first(); + endmethod + method Action flush; + rg_count[0]<=8; + rg_state_counter[0]<= 0; + endmethod + endmodule + + module mkTb(Empty); + Ifc_muldiv muldiv <-mkmuldiv(); + UInt#(128) op1 = 'hfffffffffffffadb;//'h868c3b620d9a5d2c;//fffffffffffffbde;//'h8000000000000000; //'hfffffffffffffaef;//'h0000000000040000;//000000000fffff8a;//01d9a0aea837b4df; + UInt#(128) op2 = 'h000000000000004e;//'h868c3b620d9a5d2c;//fffffffffffffbde;//'h8000000000000000; //'hfffffffffffffaef;//'h000000003b48acdb;//fffffffffffff8d5;//04b8d2eb05855afa; + Bit#(2) funct=0; + UInt#(128) y = op1*op2; + Reg#(Bit#(32)) rg_clk <- mkReg(0); + rule give_ip; + `ifdef verbose $display($time," Giving Inputs : Op1: %d Op2: %d",op1,op2); `endif + muldiv.input_operands(pack(truncate(op1)),pack(truncate(op2)),funct,1,0); + endrule + + rule get_out; + let x <-muldiv.muldiv_result; + `ifdef verbose $display($time," Output: %h Expected: %h Match: %b",x,y,(zeroExtend(x)==pack(y))); `endif + $finish(0); + endrule + + rule rl_clk; + rg_clk<= rg_clk+1; + if(rg_clk=='d100) + $finish(0); + endrule + endmodule +endpackage diff --git a/src/core/prf.bsv b/src/core/prf.bsv new file mode 100644 index 0000000..8a0a7be --- /dev/null +++ b/src/core/prf.bsv @@ -0,0 +1,131 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package prf; + import defined_types::*; + `include "defined_parameters.bsv" + import Vector::*; + interface Ifc_prf_new; + method ActionValue#(RFType#(`Reg_width)) read_rs1 (Bit#(5) addr, Operand_type rs1type, Bit#(`Reg_width) data); + method ActionValue#(RFType#(`Reg_width)) read_rs2 (Bit#(5) addr, Operand_type rs2type, Bit#(`Reg_width) data); + method ActionValue#(RFType#(`Reg_width)) read_rs3 (Bit#(5) addr, Operand_type rs3type, Bit#(`Reg_width) data); + method ActionValue#(Tuple2#(Bit#(TLog#(`PRFDEPTH)),Bit#(4))) get_index_pid(Bit#(5) addr, Operand_type rdtype); + method Action update_rd (Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + method Action fwd_from_execution (Bit#(`Reg_width) data, Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + method Action fwd_from_memory (Bit#(`Reg_width) data, Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + method Action flush_all; + endinterface + (*conflict_free="fwd_from_execution,fwd_from_memory"*) + (*conflict_free="fwd_from_execution,update_rd"*) + (*conflict_free="update_rd,fwd_from_memory"*) + (*synthesize*) + module mkprf_new(Ifc_prf_new); + Reg#(RFType#(`Reg_width)) physical_rf [`PRFDEPTH]; + Reg#(Bit#(4)) pid_rf [`PRFDEPTH]; + Vector#(`PRFDEPTH,Reg#(Tuple2#(Bit#(5),Operand_type))) rd_rf<-replicateM(mkReg(tuple2(0,IntegerRF))); + Reg#(Bit#(4)) rg_pid_counter<-mkReg(0); + Reg#(Bit#(TLog#(`PRFDEPTH))) rg_prf_index<-mkReg(0); + Wire#(Bool) wr_flush<-mkDWire(False); + for(Integer i=0;i<`PRFDEPTH;i=i+1)begin + physical_rf[i]<-mkReg(tagged Absent 0); + pid_rf[i]<-mkReg(0); + end + rule flush_all_mapping(wr_flush); + for(Integer i=0;i<`PRFDEPTH;i=i+1)begin + physical_rf[i]<=tagged Absent 0; + pid_rf[i]<=0; + rd_rf[i]<=tuple2(0,IntegerRF); + end + endrule + method ActionValue#(RFType#(`Reg_width)) read_rs1 (Bit#(5) addr, Operand_type rs1type, Bit#(`Reg_width) data); + if(rs1type==IntegerRF && addr==0) + return tagged Present 0; + else if(rs1type==IntegerRF `ifdef spfpu || rs1type==FloatingRF `endif ) begin + let array_rd=readVReg(rd_rf); // convert the reg-vector to bit#(5)-vector + let index=findElem(tuple2(addr,rs1type),array_rd); // find the index of match + if(index matches tagged Valid .idx)begin // if match exists + `ifdef verbose $display($time,"\tPRF: READ_RS1: valid index: %d",idx); `endif + return physical_rf[idx]; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + endmethod + method ActionValue#(RFType#(`Reg_width)) read_rs2 (Bit#(5) addr, Operand_type rs2type, Bit#(`Reg_width) data); + if(rs2type==IntegerRF && addr==0) + return tagged Present 0; + else if(rs2type==IntegerRF `ifdef spfpu || rs2type==FloatingRF `endif ) begin + let array_rd=readVReg(rd_rf); // convert the reg-vector to bit#(5)-vector + let index=findElem(tuple2(addr,rs2type),array_rd); // find the index of match + if(index matches tagged Valid .idx)begin // if match exists + `ifdef verbose $display($time,"\tPRF: READ_RS2: valid index: %d",idx); `endif + return physical_rf[idx]; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + endmethod + method ActionValue#(RFType#(`Reg_width)) read_rs3 (Bit#(5) addr, Operand_type rs3type, Bit#(`Reg_width) data); + if(rs3type==IntegerRF && addr==0) + return tagged Present 0; + else if(rs3type==IntegerRF `ifdef spfpu || rs3type==FloatingRF `endif ) begin + let array_rd=readVReg(rd_rf); // convert the reg-vector to bit#(5)-vector + let index=findElem(tuple2(addr,rs3type),array_rd); // find the index of match + if(index matches tagged Valid .idx)begin // if match exists + return physical_rf[idx]; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + end + else // there is no instruction in pipe updating this reg. + return tagged Present data; + endmethod + method ActionValue#(Tuple2#(Bit#(TLog#(`PRFDEPTH)),Bit#(4))) get_index_pid(Bit#(5) addr, Operand_type rdtype); + let array_rd=readVReg(rd_rf); + let index=findElem(tuple2(addr,rdtype),array_rd); // find the index of match + rg_pid_counter<=rg_pid_counter+1; + if(rg_prf_index==`PRFDEPTH-1) + rg_prf_index<=0; + else + rg_prf_index<=rg_prf_index+1; + rd_rf[rg_prf_index]<=tuple2(addr,rdtype); + if(index matches tagged Valid .idx)begin + if(pack(idx)!=rg_prf_index) + rd_rf[idx]<=tuple2(0,IntegerRF); + `ifdef verbose $display($time,"\tPRF: Removing previously alotted index: %d",idx); `endif + end + `ifdef verbose $display($time,"\tPRF: Giving Index: %d PID: %d",rg_prf_index, rg_pid_counter); `endif + return tuple2(rg_prf_index,rg_pid_counter); + endmethod + method Action update_rd (Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + physical_rf[index]<=tagged Absent pid; + pid_rf[index]<=pid; + endmethod + method Action fwd_from_execution (Bit#(`Reg_width) data, Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + `ifdef verbose $display($time,"\tPRF: FWD from EXE Data: %h index: %d pid: %d",data,index,pid); `endif + physical_rf[index]<=tagged Present data; + endmethod + method Action fwd_from_memory (Bit#(`Reg_width) data, Bit#(TLog#(`PRFDEPTH)) index, Bit#(4) pid); + `ifdef verbose $display($time,"\tPRF: FWD from MEM Data: %h index: %d pid: %d",data,index,pid); `endif + physical_rf[index]<=tagged Present data; + endmethod + method Action flush_all; + wr_flush<=True; + endmethod + + endmodule +endpackage diff --git a/src/core/registerfile.bsv b/src/core/registerfile.bsv new file mode 100644 index 0000000..b325309 --- /dev/null +++ b/src/core/registerfile.bsv @@ -0,0 +1,136 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +package registerfile; + /*==== Project Imports === */ + import defined_types::*; + `include "defined_parameters.bsv" + /*======================== */ + /*===== Package Imports ==== */ + import RegFile::*; + import ConfigReg::*; + /*===========================*/ + + interface Ifc_registerfile; + method ActionValue#(Output_for_operand_fetch) _inputs_from_decode_stage(Bit#(5) rs1_addr, Operand_type rs1_type, Bit#(5) rs2_addr, Operand_type rs2_type, Bit#(`VADDR) pc, Bit#(`Reg_width) imm `ifdef spfpu , Bool rs3_valid,Bit#(5) rs3_addr `endif ); + `ifdef Debug + method Bit#(`Reg_width) read_debug_igpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + method Bit#(`Reg_width) read_debug_fgpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + `endif + method Action write_rd(Bit#(5) r, Bit#(`Reg_width) d, Operand_type rdtype); + method Action inferred_xlen(Bit#(2) mxl); + endinterface + + (*synthesize*) + module mkregisterfile(Ifc_registerfile); + RegFile#(Bit#(5),Bit#(`Reg_width)) integer_rf <-mkRegFileWCF(0,31); + `ifdef spfpu + RegFile#(Bit#(5),Bit#(`Reg_width)) floating_rf <-mkRegFileWCF(0,31); + `endif + Reg#(Bool) initialize<-mkReg(True); + Reg#(Bit#(5)) rg_index<-mkReg(0); + Wire#(Bit#(2)) wr_mxl <- mkWire(); + rule initialize_regfile(initialize); + `ifdef spfpu + floating_rf.upd(rg_index,0); + `endif + integer_rf.upd(rg_index,0); + rg_index<=rg_index+1; + if(rg_index=='d31) + initialize<=False; + endrule + + method ActionValue#(Output_for_operand_fetch) _inputs_from_decode_stage(Bit#(5) rs1_addr, Operand_type rs1_type, Bit#(5) rs2_addr, Operand_type rs2_type, Bit#(`VADDR) pc, Bit#(`Reg_width) imm `ifdef spfpu , Bool rs3_valid,Bit#(5) rs3_addr `endif ) if(!initialize); // recives the input from the decode stage. + + Bit#(`Reg_width) rs1=0; + Bit#(`Reg_width) rs2=0; + Bit#(`Reg_width) rs3=0; + + if(rs1_type==PC) + rs1=signExtend(pc); + else if(rs1_addr==0 && rs1_type==IntegerRF) + rs1=0; + else if(rs1_type==IntegerRF) + rs1=integer_rf.sub(rs1_addr); + `ifdef spfpu + else + rs1=floating_rf.sub(rs1_addr); + `endif + + if(rs2_type==Immediate) + rs2=imm; + else if(rs2_addr==0 && rs2_type==IntegerRF) + rs2=0; + else if(rs2_type==IntegerRF) + rs2=integer_rf.sub(rs2_addr); + `ifdef spfpu + else + rs2=floating_rf.sub(rs2_addr); + `endif + + `ifdef spfpu + if(rs3_valid) begin + rs3= floating_rf.sub(rs3_addr); + end + else + rs3 = 0; + `endif + + if(wr_mxl==1)begin // 32-bit + rs1=signExtend(rs1[31:0]); + rs2=signExtend(rs2[31:0]); + `ifdef spfpu rs3=signExtend(rs3[31:0]); `endif + end + + `ifdef verbose $display($time,"\nReg1 :%d : ",rs1_addr,fshow(rs1),"\nReg2 : %d : ",rs2_addr,fshow(rs2) `ifdef spfpu ,"\nReg3: %d ; ",rs3_addr,fshow(rs3) `endif ); `endif + return Output_for_operand_fetch{rs1:rs1,rs2:rs2`ifdef spfpu ,rs3: rs3 `endif }; + endmethod + method Action write_rd(Bit#(5) r, Bit#(`Reg_width) d, Operand_type rdtype) if(!initialize); // TODO if not in critical path shift the CReg ports. + `ifdef verbose $display($time,"\tRF: Writing into reg: :%d data: %h ",r,d,fshow(rdtype)); `endif + if(wr_mxl==1)begin // 32-bit + d=signExtend(d[31:0]); + end + if(rdtype==IntegerRF)begin + if(r!=0)begin + integer_rf.upd(r,d); + end + end + `ifdef spfpu + else if(rdtype==FloatingRF)begin + floating_rf.upd(r,d); + end + `endif + endmethod + `ifdef Debug + method Bit#(`Reg_width) read_debug_igpr (Bit#(5) r); // Read a General-Purpose Register + return integer_rf.sub(r); + endmethod + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d)if(!initialize); // Write a General-Purpose Register + integer_rf.upd(r,d); + endmethod + `ifdef spfpu + method Bit#(`Reg_width) read_debug_fgpr (Bit#(5) r); // Read a General-Purpose Register + return floating_rf.sub(r); + endmethod + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d)if(!initialize); // Write a General-Purpose Register + floating_rf.upd(r,d); + endmethod + `endif + `endif + method Action inferred_xlen(Bit#(2) mxl); + wr_mxl <=mxl; + endmethod + endmodule +endpackage diff --git a/src/core/riscv.bsv b/src/core/riscv.bsv new file mode 100644 index 0000000..0265ff6 --- /dev/null +++ b/src/core/riscv.bsv @@ -0,0 +1,357 @@ +/* +Copyright (c) 2013, IIT Madras +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Author Names : Neel Gala +Email ID : neelgala@gmail.com + +Description : +This is the 64-bit core of the c_class processor. It containes rules for each stage. The description of each stage +is given in the respective rules. +*/ +package riscv; + +/*===== Package imports === */ +import SpecialFIFOs::*; +import FIFO::*; +import FIFOF::*; +import DReg::*; +import Vector ::*; +import TxRx::*; +import Connectable::*; +import GetPut::*; +/*========================= */ + +/*=== Project imports -===== */ +import fetch_stage::*; +import decode_opfetch::*; +import execute_stage::*; +import memory_stage::*; +import csr::*; +`include "defined_parameters.bsv" +import defined_types::*; +/*========================= */ + + interface Ifc_riscv; + interface Get#(Tuple5#(Bit#(2),Bit#(`VADDR),Bit#(`VADDR),Bool,Bit#(3))) request_to_imem; + method Action instruction_response_from_imem(Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3))) x); + interface Put#(Tuple4#(Bit#(3),Bit#(`VADDR),Bit#(`VADDR),Bit#(2))) prediction_response; + interface Get#(Tuple2#(Bit#(3),Bit#(`VADDR))) send_prediction_request; + method Maybe#(Training_data#(`VADDR)) training_data; + interface Get#(Tuple2#(Memout,Bit#(1))) request_to_dmem; + interface Put#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1)))) response_from_dmem; + method Bool flush_dmem; + method Action set_external_interrupt(Tuple2#(Bool,Bool) i); + `ifdef MMU + method Bit#(`Reg_width) send_satp; + method Chmod perm_to_TLB; + method Bit#(`Reg_width) mmu_cache_disable; + method Fence_VMA_type#(`VADDR) fence_tlbs; + `endif + (*always_ready,always_enabled*) + method Action boot_sequence(Bit#(1) bootseq); + /* =========================== Debug Interface ===================== */ + `ifdef Debug + method Bit#(`Reg_width) read_debug_igpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + method Bit#(`Reg_width) read_debug_fgpr (Bit#(5) r); // Read a General-Purpose Register + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d); // Write a General-Purpose Register + method Action reset; + method Action run_continue; // Execute all instructions until the end of instruction stream + method Bool reset_complete; + method Action stop; // Stop CPU + method Bool halted (); + method ActionValue#(Bit#(`Reg_width)) rw_csr (Bit#(12) r, Bool write, Bit#(`Reg_width) data); // Read a General-Purpose Register + `endif + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt); + method Action clint_mtip(Bit#(1) intrpt); + method Action clint_mtime(Bit#(`Reg_width) c_mtime); + `endif + /*-========================================================================== */ + endinterface + + (*synthesize*) + (*conflict_free="get_trap_data_from_csr,rl_write_back"*) + module mkriscv#(Bit#(`VADDR) reset_vector)(Ifc_riscv); + + function Bool checkloadtrigger(TriggerData tdata, Bit#(`Reg_width) lsdata); + if(tdata.ttype matches tagged Data .data) + if(tdata.matchscheme==0 && data==lsdata) + return True; + else if(tdata.matchscheme==2 && data>=lsdata) + return True; + else if(tdata.matchscheme==3 && data<=lsdata) + return True; + else if(tdata.matchscheme==4 && data[31:0]==(data[63:32]&lsdata[31:0])) + return True; + else if(tdata.matchscheme==5 && data[31:0]==(data[63:32]&lsdata[63:32])) + return True; + else + return False; + else + return False; + endfunction + + /* =================== PIPELINE FIFOS ======================================================================== */ + FIFOF#(IF_ID_type) ff_if_id <-mkSizedFIFOF(2); // instantiating ISB between fetch and decode + FIFOF#(ID_IE_type) ff_id_ie <-mkSizedFIFOF(2); // instantiating ISB between decode and exec + FIFOF#(IE_IMEM_type) ff_ie_imem <-mkSizedFIFOF(2); // instantiating ISB between exec and memory + FIFOF#(IMEM_IWB_type) ff_imem_iwb <-mkLFIFOF(); // instantiating ISB between memory and write-back + Wire#(Maybe#(Operand_forwading_type)) wr_forward_from_WBS <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage + Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS)))) wr_response_to_cpu <- mkDWire(tagged Invalid); + /*============================================================================================================*/ + + /* ============================================ Flushing related regs/wires=================================== */ + Reg#(Bool) wb_stage_flush <-mkDWire(False); // if true inidicates that the entire pipe needs to be flushed + Reg#(Bool) dmem_flush <-mkDWire(False); // if true inidicates that the entire pipe needs to be flushed + Reg#(Bit#(`VADDR)) wr_effective_address1 <-mkDWire(0); // captures the new pc when an trap is taken.// captures the new pc when an trap is taken. + Wire#(Bool) wr_change_wEpoch<-mkDWire(False); + Wire#(Fence_VMA_type#(`VADDR)) wr_sfence <- mkWire(); + Wire#(Bit#(`PERFMONITORS)) wr_dcache_perfmon <- mkDWire(0); + Reg#(Bit#(1)) wEpoch <-mkReg(0); + /*============================================================================================================*/ + + /*========================================== Debug related registers/wires ================================== */ + Reg #(Bool) rg_stop_requested[2] <- mkCReg(2, False); // True if a stop was requested by the previous instruction + Reg #(Bool) rg_resume_requested[2] <- mkCReg(2, False); // True if a stop was requested by the previous instruction + Reg #(Bool) rg_reset_requested[2] <- mkCReg(2, False); // True if a stop was requested by the previous instruction + /*============================================================================================================*/ + + /* ================= Instantiating all the modules required in the pipe =================================== */ + Ifc_fetch fetch <-mkfetch(reset_vector); + Ifc_decode_opfetch decode <-mkdecode_opfetch; + Ifc_execute_stage execute_stage <-mkexecute_stage; + Ifc_memory_stage memory_stage <-mkmemory_stage; + Ifc_csr csr <-mkcsr(); + /*============================================================================================================*/ + + /* ================== flushing the core partially or completely =================================================== */ + let {flush_from_execute,effective_address}=execute_stage.generate_flush; // capture the flush generated by the decode stage. + + /* this rule flushes the insruction fetch stage since the decode stage has generated a branch misprediction. + The flush can also be generated is the instruction in the decode stage is a Fence.I operation. This difference + in the misprediction and fence is captured in flush_from_decode wire.*/ + rule rl_flush_first_two_stages(flush_from_execute!=None && !wb_stage_flush); + `ifdef verbose $display($time,"\tFlushing the fetch and Decode stage alone"); `endif + fetch.flush(effective_address,flush_from_execute); + endrule + + /*=== These rules are fired when an exception or interrupt or ECALL has been generated/taken at the write-back stage===*/ + rule rl_flush_fetch(wb_stage_flush); + `ifdef verbose $display($time,"\tFLUSH FIRING TO ALL STAGES"); `endif + fetch.flush(wr_effective_address1,AccessFlush); + execute_stage.flush_prf; + endrule + rule rl_flush_decode_stage(wb_stage_flush||flush_from_execute!=None); + decode.flush(); + endrule + rule rl_change_eEpoch(flush_from_execute!=None && !wr_change_wEpoch); + fetch.update_eEpoch; + decode.update_eEpoch; + endrule + rule ras_push_connect; + fetch.push_ras(execute_stage.ras_push); + endrule + rule rl_change_wEpoch(wr_change_wEpoch); + fetch.update_wEpoch; + decode.update_wEpoch; + memory_stage.update_wEpoch; + execute_stage.update_wEpoch; + endrule + /*=============================================================================================================*/ + + /*============= Make PIPE Connections =========== */ + mkConnection(fetch.tx_out,ff_if_id); + mkConnection(ff_if_id,decode.rx_in); + + mkConnection(decode.tx_out,ff_id_ie); + mkConnection(ff_id_ie,execute_stage.rx_in); + + mkConnection(execute_stage.tx_out,ff_ie_imem); + mkConnection(ff_ie_imem,memory_stage.rx_in); + + RX#(IMEM_IWB_type) rx <-mkRX(); + mkConnection(memory_stage.tx_out, ff_imem_iwb); + mkConnection(ff_imem_iwb,rx.e); + /*===================================================== */ + /* This rule connects the forwarding data from the memory and execution + stages to the operand fetch stage */ + rule connect_forwarding_data1; + execute_stage._forwarding_from_memory(memory_stage.forwarding_data); // forwarding from memory unit. + endrule + rule send_misa_to_decode; + decode.misa(csr.misa); + endrule + rule get_trap_data_from_csr(ff_if_id.notEmpty); + let {y,x}<-csr.check_for_trap(`ifdef Debug rg_stop_requested[1],rg_resume_requested[1],rg_reset_requested[1], `endif ff_if_id.first.program_counter,ff_if_id.first.instruction); + decode.trap_from_csr(tuple2(y,x)); + endrule + `ifdef Debug + rule disable_debug_requests; + if(csr.halted && rg_stop_requested[1]) + rg_stop_requested[1]<=False; + if(!csr.halted && rg_resume_requested[1]) + rg_resume_requested[1]<=False; + if(csr.reset_mode && rg_reset_requested[1]) + rg_reset_requested[1]<=False; + endrule + `endif + /* This rule is used to transfer the updated value + of the FCSR register to the floating point units */ + rule get_data_from_csr; + execute_stage.roundingmode(csr.roundingmode); + endrule + `ifdef Debug + rule connect_trigger_info_memorystage; + memory_stage.storetrigger_info(csr.store_triggerdata); + memory_stage.loadtrigger_info(csr.load_triggerdata); + endrule + `endif + + rule connect_mxl; + execute_stage.inferred_xlen(csr.inferred_xlen); + decode.inferred_xlen(csr.inferred_xlen); + endrule + + /* ==================================================================================================================================*/ + + + /* Modularizing this stage is too cumbersome since it will encapsulate the debug registers and the CSR + which communicate with the rest of the pipe in adhoc fashion. */ + rule rl_write_back; + WriteBackType info=rx.u.first().commit_data; + Bool start_write=True; + Bit#(`VADDR) memaddress=0; + if(info matches tagged RESULT .arith_data) + memaddress=truncate(arith_data.aluresult); + let exception=rx.u.first.exception; + Bit#(`PERFMONITORS) pm=rx.u.first.perfmonitors; + /*========================================================= */ + `ifdef verbose $display($time,"\t*****************WRITE BACK STAGE*************************\t PC: %h PID: %d PERF: %h Instr-Epochs: %b Epochs: %b",rx.u.first.program_counter,rx.u.first.pid,pm,rx.u.first.epochs,wEpoch); `endif + rx.u.deq(); // release the previous FIFO + Bit#(3) debugcause=rx.u.first.debugcause; + `ifdef Debug + if(csr.step_now)begin + exception=tagged Interrupt DebugInterrupt; + debugcause=4; + end + `endif + if(rx.u.first.epochs[0]!=wEpoch)begin + `ifdef verbose $display($time,"\tWRITEBACK: Dropping instruction"); `endif + end + else if(exception matches tagged None)begin + `ifdef MMU + if(info matches tagged SYSTEM .priv) begin + if(priv.csr_address[11:5]=='b0001001) + wr_sfence <= Fence_VMA_type{rs1:truncate(priv.rs1),rs2:truncate(priv.rs2)}; + end + `endif + let {flush,ea,destination_value,commit}<-csr.system_instruction(info,rx.u.first.program_counter,pm `ifdef simulate ,rx.u.first.instruction,rx.u.first.rd_type,rx.u.first.destination `endif ); + if(commit)begin + `ifdef verbose $display($time,"\tWRITEBACK: Writing into register: %d with value: %h",rx.u.first.destination,destination_value); `endif + decode.write_rd(rx.u.first.destination,destination_value,rx.u.first.rd_type); + end + wb_stage_flush<=flush; + if(info matches tagged SYSTEM .x) + dmem_flush<=True; + + if(flush) begin + wEpoch<=~wEpoch; + wr_change_wEpoch<=True; + end + wr_effective_address1<=ea; + `ifdef verbose $display($time,"\tWRITEBACK: Flush: ",fshow(flush)," EA: %h",ea); `endif + end + else begin + let {ea,flush}<-csr.take_trap(exception,debugcause,rx.u.first.program_counter,truncate(memaddress)); + `ifdef verbose $display($time,"\tWRITEBACK: Taking trap ea: %h",ea); `endif + if(flush) begin + wEpoch<=~wEpoch; + wr_change_wEpoch<=True; + end + if(exception matches tagged Interrupt .in &&& flush) + dmem_flush<=True; + if(exception matches tagged Exception .cause)begin + let ex=pack(cause); + if(ex!=4 && ex!=5 && ex!=6 && ex!=7 && ex!=13 && ex!=15) + dmem_flush<=True; + end + wb_stage_flush<=flush; + wr_effective_address1<=ea; + end + endrule + /*====================================== END OF PIPE STAGES ============================================= */ + //////////////////////////// definition of methods ///////////////////////////////////////////////////////////////////////////////// + interface request_to_imem=fetch.request_to_imem; + method Action instruction_response_from_imem(Maybe#(Tuple7#(Bit#(`VADDR),Bit#(2),Bit#(`VADDR),Bit#(32), Trap_type, Bit#(`PERFMONITORS),Bit#(3))) x)=fetch.instruction_response_from_imem(x); + interface prediction_response =fetch.prediction_response; + interface send_prediction_request=fetch.send_prediction_request; + method training_data=execute_stage.training_data; + interface request_to_dmem = execute_stage.to_dmem; + interface response_from_dmem = memory_stage.response_from_dmem; + method Bool flush_dmem; + return dmem_flush; + endmethod + method Action set_external_interrupt(Tuple2#(Bool,Bool) i) = csr.set_external_interrupt(i); + + `ifdef MMU + method Bit#(`Reg_width) send_satp = csr.send_satp; + method Chmod perm_to_TLB = csr.perm_to_TLB; + method Bit#(`Reg_width) mmu_cache_disable = csr.mmu_cache_disable; + method Fence_VMA_type#(`VADDR) fence_tlbs; + return wr_sfence; + endmethod + + `endif + + //interface ifc_riscv_interrupt_pins = memory_stage.plic_ifc_external_irq; + //method Bit#(TLog#(`INTERRUPT_PINS)) intrpt_completion; + // return memory_stage.plic_intrpt_completion; + //endmethod + /* ================================== Debug related methods ======================= */ + `ifdef Debug + method Action reset; + `ifdef verbose $display($time,"\tsetting the reset request"); `endif + rg_reset_requested[0]<=True; + endmethod + method Action run_continue(); + `ifdef verbose $display($time,"\tsetting the resume reqiest to True"); `endif + rg_resume_requested[0]<=True; + endmethod + method Bool reset_complete; + return !csr.reset_mode ; + endmethod + method Action stop; + `ifdef verbose $display($time,"RISCV: REQUESTING HALT"); `endif + rg_stop_requested[0]<=True; + endmethod + method Bool halted (); + return csr.halted; + endmethod + method read_debug_igpr (Bit#(5) r) = decode.read_debug_igpr(r); // Read a General-Purpose Register + method Action write_debug_igpr (Bit#(5) r, Bit#(`Reg_width) d)=decode.write_debug_igpr(r,d); // Write a General-Purpose Register + method read_debug_fgpr (Bit#(5) r)=decode.read_debug_fgpr(r); // Read a General-Purpose Register + method Action write_debug_fgpr (Bit#(5) r, Bit#(`Reg_width) d)=decode.write_debug_fgpr(r,d); // Write a General-Purpose Register + method ActionValue#(Bit#(`Reg_width)) rw_csr (Bit#(12) r, Bool write, Bit#(`Reg_width) data) =csr.rw_debug_csr(r,write,data); // TODO + `endif + `ifdef CLINT + method Action clint_msip(Bit#(1) intrpt)=csr.clint_msip(intrpt); + method Action clint_mtip(Bit#(1) intrpt)=csr.clint_mtip(intrpt); + method Action clint_mtime(Bit#(`Reg_width) c_mtime)=csr.clint_mtime(c_mtime); + `endif + /* ==================================================================================== */ + method Action boot_sequence(Bit#(1) bootseq)=csr.boot_sequence(bootseq); + endmodule +endpackage + + -- 2.30.2