2 Copyright (c) 2013, IIT Madras
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
14 package execute_stage;
15 /*===== Package Imports ==== */
21 import SpecialFIFOs::*;
22 /*========================== */
23 /*===== Project Imports ======*/
25 `include "defined_parameters.bsv"
26 import defined_types::*;
36 /*===============================*/
39 function Bit#(`Reg_width) multiplication (Bit#(`Reg_width) in1,Bit#(`Reg_width) in2,Bit#(2) funct3 `ifdef RV64 ,Bool word_double `endif );
40 Bit#(TMul#(2,`Reg_width)) op1= ((funct3[0]^funct3[1])==1 && in1[`Reg_width-1]==1)?zeroExtend((~in1)+1):zeroExtend(in1);
41 Bit#(TMul#(2,`Reg_width)) op2= (funct3[1:0]==1 && in2[`Reg_width-1]==1)?zeroExtend((~in2)+1):zeroExtend(in2);
42 Bit#(1) lv_take_complement = 0;
44 lv_take_complement=((in1[`Reg_width-1]^in2[`Reg_width-1])==1)?1:0;
45 else if(funct3[1:0]==2)
46 lv_take_complement=in1[`Reg_width-1];
49 product=(lv_take_complement==1)?(~product+1): product;
51 if(!word_double)// 32-bit
52 product=signExtend(product[31:0]);
55 return product[`Reg_width-1:0];
57 return product[2*`Reg_width-1:`Reg_width];
60 interface Ifc_execute_stage;
61 method Action flush_prf;
62 /* ====================== pipe connections ========= */
63 interface RXe#(ID_IE_type) rx_in;
64 interface TXe#(IE_IMEM_type) tx_out;
65 /*================================================== */
66 method Action roundingmode(Bit#(3) rm);
67 method Tuple2#(Flush_type,Bit#(`VADDR)) generate_flush;
68 method Maybe#(Training_data#(`VADDR)) training_data;
69 method Bit#(`PERFMONITORS) execute_perfmon; // icache performance counters
70 method Maybe#(Bit#(`VADDR)) ras_push;
71 interface Get#(Tuple2#(Memout,Bit#(1))) to_dmem;
72 method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data);
73 method Action update_wEpoch;
74 method Action inferred_xlen(Bit#(2) mxl);
75 endinterface:Ifc_execute_stage
80 (*mutually_exclusive="read_output_from_fpu, read_outputs_from_muldiv"*)
85 (*conflict_free="rl_receive_info_from_decode_stage, to_dmem_get"*)
86 module mkexecute_stage(Ifc_execute_stage);
87 Wire#(Maybe#(Bit#(`VADDR))) wr_ras_push<-mkDWire(tagged Invalid);
88 Ifc_prf_new prf <-mkprf_new();
89 RX#(ID_IE_type) rx <-mkRX; // receive ffrom the decode stage
90 TX#(IE_IMEM_type) tx <-mkTX; // send to the memory stage;
93 Ifc_muldiv muldiv <-mkmuldiv;
96 Reg#(Maybe#(Bit#(`Reg_width))) rg_mul_output<-mkReg(tagged Invalid);
98 `endif // instantiating
99 `ifdef spfpu Ifc_fpu fpu <- mkfpu(); `endif // instantiating the Floating point units.
101 //Wire#(Memout) wr_info_to_dmem <-mkWire;// holds the information to be given to dmem
102 FIFOF#(Tuple2#(Memout,Bit#(1))) wr_info_to_dmem <-mkBypassFIFOF;// holds the information to be given to dmem
103 Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4)))) wr_forward_from_EXE <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage
104 Wire#(Bit#(3)) wr_rounding_mode<-mkDWire(0);
105 Reg#(Bool) multicylce_output[2] <-mkCReg(2,False); // is true if the operation is multicycle.
106 Wire#(Flush_type) rg_flush_execute <-mkDWire(None);
107 Wire#(Bit#(`VADDR)) rg_effective_address<-mkDWire(0);
108 Reg#(Maybe#(Training_data#(`VADDR))) wr_training_data <-mkDReg(tagged Invalid);
110 Reg#(Bit#(`PERFMONITORS)) rg_execute_perfmon<-mkDReg(0);
112 Reg#(Bit#(1)) eEpoch <-mkReg(0);
113 Reg#(Bit#(1)) wEpoch <-mkReg(0);
114 PulseWire wb_flush <-mkPulseWire();
116 Reg#(Bit#(4)) rg_pid <-mkReg(0);
117 Reg#(Bit#(TLog#(`PRFDEPTH))) rg_prf_index<-mkReg(0);
119 Wire#(Bit#(2)) wr_mxl <- mkWire();
122 rule update_eEpoch_reg(rg_flush_execute!=None && !wb_flush);
123 `ifdef verbose $display($time,"\tEXECUTION: Inverting eEPOCH"); `endif
127 rule forward_data_from_exe(wr_forward_from_EXE matches tagged Valid .fwdata) ;
128 let {data,index,pid}=fwdata;
129 if(wr_mxl==1) begin// 32-bits
130 data=signExtend(data[31:0]);
132 prf.fwd_from_execution(data,index,pid);
135 rule rl_receive_info_from_decode_stage(rx.u.notEmpty && tx.u.notFull && !multicylce_output[1] );
136 Bit#(`PERFMONITORS) perfmonitors=0;
138 let pc=data.program_counter;
139 let dest=data.destination;
140 let rdtype=data.rdtype;
141 let exception=data.exception;
142 let inst_type=data.inst_type;
143 Bit#(`VADDR) nextpc=rx.u.first.nextpc;
144 `ifdef simulate let instruction=data.instruction; `endif
145 Execution_output result1=tagged Busy;
146 `ifdef verbose $display($time,"\t********** EXECUTION STAGE FIRING ************ PC: :%h EPOCHS: %b Instr-EPOCHS: %b",pc,{eEpoch,wEpoch}, data.epochs) ; `endif
147 if({eEpoch,wEpoch}!=data.epochs)begin
149 `ifdef verbose $display($time,"\tEXECUTION: PC: %h Dropping Instruction since Epochs do not match", rx.u.first.program_counter); `endif
151 else if(exception matches tagged None)begin
152 Bool start_execution=False;
153 `ifdef verbose $display($time,"\tEXECUTION: rs1type: ",fshow(data.rs1_type)," rs2_type ",fshow(data.rs2_type)); `endif
154 RFType#(`Reg_width) op1<-prf.read_rs1(data.rs1addr,data.rs1_type,data.rs1);
155 RFType#(`Reg_width) op2<-prf.read_rs2(data.rs2addr,data.rs2_type,data.rs2);
156 RFType#(`Reg_width) op3<-prf.read_rs3(data.rs3addr,data.rs3_type,data.rs3_imm);
157 if(op1 matches tagged Present .x &&& op2 matches tagged Present .y &&& op3 matches tagged Present .z)begin
158 Bit#(`Reg_width) rs1=x;
159 Bit#(`Reg_width) rs2=y;
160 Bit#(`Reg_width) rs3=z;
161 if(wr_mxl==1) begin // 32-bit
162 rs1=signExtend(rs1[31:0]);
163 rs2=signExtend(rs2[31:0]);
164 rs3=signExtend(rs3[31:0]);
167 let {prf_index,pid}<-prf.get_index_pid(data.destination,data.rdtype);
168 `ifdef verbose $display($time,"\tEXECUTION: PRFINDEX: %d PID: %d",prf_index,pid); `endif
169 rg_prf_index<=prf_index;
171 /*========== Multiplication =============== */
172 if(inst_type==MUL || inst_type == DIV)begin
174 if(inst_type==MUL) begin
176 `ifdef verbose $display($time,"\tEXECUTION: Multiplication Operation Op1: %h Op2: %h ",rs1,rs2); `endif
179 `ifdef verbose $display($time,"\tEXECUTION: Division Operation Op1: %h Op2: %h ",rs1,rs2); `endif
182 muldiv.input_operands(rs1,rs2,data.funct3[1:0],pack(data.word32),is_mul);
183 prf.update_rd(prf_index,pid);
184 multicylce_output[1]<=True;
187 let x= multiplication(rs1,rs2,data.funct3[1:0] `ifdef RV64 ,data.word32 `endif );
188 rg_mul_output<=tagged Valid x;
189 multicylce_output[1]<=True;
193 /*========================================== */
194 /*============== FLOATING POINT ============================ */
196 else if(inst_type == FLOATING || inst_type==DFLOATING)begin
197 `ifdef verbose $display($time,"\tEXECUTION: Floating Point Operation "); `endif
198 fpu._start(truncate(rs1),truncate(rs2),truncate(rs3),data.fn,rs3[11:5],data.funct3,rs3[1:0],wr_rounding_mode, data.word32);
199 multicylce_output[1]<=True;
200 prf.update_rd(prf_index,pid);
203 /*========================================== */
204 /*================ SINGLE CYCLE ALU ========================== */
206 `ifdef verbose $display($time,"\tEXECUTION: Single Cycle Operation "); `endif
207 let {x,ea,flush,td,raspush,ex,pm} = fn_alu(data.fn,rs1,rs2,rs3,pc,data.inst_type,nextpc,data.funct3,data.mem_access,dest,data.prediction,rx.u.first.perfmonitors,data.word32);
209 `ifdef verbose $display($time,"\tEXE: rs1: %h rs2: %h rs3_imm: %h",rs1,rs2,rs3); `endif
210 `ifdef verbose $display($time,"\tEXECUTION: Result: ",fshow(result1)); `endif
211 `ifdef verbose $display($time,"\tEXECUTION: PC: %h",pc," Flush: ",fshow(flush)," EA: %h",ea," Instruction: ",fshow(data.inst_type)); `endif
212 if(result1 matches tagged RESULT .res)begin
214 wr_forward_from_EXE <= tagged Valid tuple3(res.aluresult,prf_index,pid);
217 prf.update_rd(prf_index,pid);
219 if(result1 matches tagged MEMORY .meminfo)begin
221 tx.u.enq(IE_IMEM_type{execresult:result1,
222 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
223 destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs
224 `ifdef simulate , instruction:instruction `endif });
225 wr_info_to_dmem.enq(tuple2(meminfo,rx.u.first.epochs[0]));
226 rg_flush_execute<=flush;
227 rg_effective_address<=ea;
232 wr_ras_push<=raspush;
233 rg_flush_execute<=flush;
234 rg_effective_address<=ea;
235 wr_training_data<=td;
237 tx.u.enq(IE_IMEM_type{execresult:result1,
238 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
239 destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs
240 `ifdef simulate , instruction:instruction `endif });
243 /*========================================== */
246 `ifdef verbose $display($time,"\tEXECUTION: Waiting for operands.\nRS1: ",fshow(op1),"\nRS2: ",fshow(op2),"\nRS3: ",fshow(op3)); `endif
251 `ifdef verbose $display($time,"\tEXECUTE: EXCEPTION"); `endif
252 tx.u.enq(IE_IMEM_type{execresult:tagged RESULT Arithout{aluresult:0,fflags:0},
253 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
254 destination:dest, rd_type:rdtype , index:rg_prf_index, pid:rg_pid, perfmonitors:perfmonitors,epochs:rx.u.first.epochs
255 `ifdef simulate , instruction:instruction `endif });
261 rule read_outputs_from_muldiv(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] );
262 `ifdef verbose $display($time,"\tEXECUTION: Multiplier sending output to Memory stage"); `endif
263 let res<-muldiv.muldiv_result;
265 let decodedata=rx.u.first;
266 let pc=decodedata.program_counter;
267 let dest=decodedata.destination;
268 let rdtype=decodedata.rdtype;
269 let exception=decodedata.exception;
270 `ifdef simulate let instr=decodedata.instruction; `endif
271 Execution_output result1= tagged RESULT(Arithout{aluresult:res,fflags:0});
272 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
273 `ifdef verbose $display($time,"Epochs do not match"); `endif
276 tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause,
277 program_counter:pc, exception:exception,
278 destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
279 `ifdef simulate , instruction:instr `endif });
281 wr_forward_from_EXE <= tagged Valid tuple3(res,rg_prf_index,rg_pid);
283 multicylce_output[1]<=False;
287 rule read_outputs_from_muldiv(rx.u.notEmpty &&& tx.u.notFull &&& multicylce_output[1] &&& rg_mul_output matches tagged Valid .x);
288 rg_mul_output<=tagged Invalid;
290 let decodedata=rx.u.first;
291 let pc=decodedata.program_counter;
292 let dest=decodedata.destination;
293 let rdtype=decodedata.rdtype;
294 let exception=decodedata.exception;
295 `ifdef simulate let instr=decodedata.instruction; `endif
296 Execution_output result1= tagged RESULT(Arithout{aluresult:x,fflags:0});
297 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
298 `ifdef verbose $display($time,"Epochs do not match"); `endif
301 tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause,
302 program_counter:pc, exception:exception,
303 destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
304 `ifdef simulate , instruction:instr `endif });
306 wr_forward_from_EXE <= tagged Valid tuple3(x,rg_prf_index,rg_pid);
308 multicylce_output[1]<=False;
314 rule read_output_from_fpu(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] );
315 let res<-fpu.get_result;
317 let decodedata=rx.u.first;
318 let pc=decodedata.program_counter;
319 let dest=decodedata.destination;
320 let rdtype=decodedata.rdtype;
321 let exception=decodedata.exception;
322 `ifdef simulate let instr=decodedata.instruction; `endif
323 Execution_output result1= tagged RESULT(Arithout{aluresult:res.final_result, fflags:res.fflags});
324 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
325 `ifdef verbose $display($time,"Epochs do not match"); `endif
328 tx.u.enq(IE_IMEM_type{execresult:result1, debugcause:rx.u.first.debugcause,
329 program_counter:pc, exception:exception,
330 destination:dest, rd_type:rdtype, index:rg_prf_index,pid:rg_pid , perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
331 `ifdef simulate , instruction:instr `endif });
332 if((dest!=0 && rdtype==IntegerRF) || rdtype==FloatingRF)
333 wr_forward_from_EXE <= tagged Valid tuple3(res.final_result,rg_prf_index,rg_pid);
335 multicylce_output[1]<=False;
338 interface to_dmem = interface Get
339 method ActionValue#(Tuple2#(Memout,Bit#(1))) get ;
340 `ifdef verbose $display($time,"\tEXECUTION: DEQUEING MEM REQUEST",fshow(wr_info_to_dmem.first)); `endif
342 return wr_info_to_dmem.first;
347 method Action roundingmode(Bit#(3) rm);
348 wr_rounding_mode<=rm;
350 method generate_flush=tuple2(rg_flush_execute,rg_effective_address);
351 method Maybe#(Training_data#(`VADDR)) training_data=wr_training_data;
352 method Maybe#(Bit#(`VADDR)) ras_push = wr_ras_push;
353 method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data);
354 if(fwd_data matches tagged Valid .fwdata)begin
355 let {data,index,pid}=fwdata;
356 if(wr_mxl==1) begin// 32-bits
357 data=signExtend(data[31:0]);
359 prf.fwd_from_memory(data,index,pid);
362 method Action update_wEpoch;
363 `ifdef verbose $display($time,"\tEXECUTION: Updating wEPOCH"); `endif
367 method Action flush_prf;
370 method Action inferred_xlen(Bit#(2) mxl);
374 endpackage:execute_stage