add core
[shakti-core.git] / src / core / execute_stage.bsv
1 /*
2 Copyright (c) 2013, IIT Madras
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
13 */
14 package execute_stage;
15 /*===== Package Imports ==== */
16 import TxRx::*;
17 import FIFOF::*;
18 import DReg::*;
19 import Clocks::*;
20 import GetPut::*;
21 import SpecialFIFOs::*;
22 /*========================== */
23 /*===== Project Imports ======*/
24 import alu::*;
25 `include "defined_parameters.bsv"
26 import defined_types::*;
27 `ifdef muldiv
28 `ifdef RV64
29 import muldiv::*;
30 `endif
31 `endif
32 `ifdef spfpu
33 import fpu::*;
34 `endif
35 import prf::*;
36 /*===============================*/
37
38 (*noinline*)
39 function Bit#(`Reg_width) multiplication (Bit#(`Reg_width) in1,Bit#(`Reg_width) in2,Bit#(2) funct3 `ifdef RV64 ,Bool word_double `endif );
40 Bit#(TMul#(2,`Reg_width)) op1= ((funct3[0]^funct3[1])==1 && in1[`Reg_width-1]==1)?zeroExtend((~in1)+1):zeroExtend(in1);
41 Bit#(TMul#(2,`Reg_width)) op2= (funct3[1:0]==1 && in2[`Reg_width-1]==1)?zeroExtend((~in2)+1):zeroExtend(in2);
42 Bit#(1) lv_take_complement = 0;
43 if(funct3[1:0]==1)
44 lv_take_complement=((in1[`Reg_width-1]^in2[`Reg_width-1])==1)?1:0;
45 else if(funct3[1:0]==2)
46 lv_take_complement=in1[`Reg_width-1];
47
48 let product=op1*op2;
49 product=(lv_take_complement==1)?(~product+1): product;
50 `ifdef RV64
51 if(!word_double)// 32-bit
52 product=signExtend(product[31:0]);
53 `endif
54 if(funct3==0)
55 return product[`Reg_width-1:0];
56 else
57 return product[2*`Reg_width-1:`Reg_width];
58 endfunction
59
60 interface Ifc_execute_stage;
61 method Action flush_prf;
62 /* ====================== pipe connections ========= */
63 interface RXe#(ID_IE_type) rx_in;
64 interface TXe#(IE_IMEM_type) tx_out;
65 /*================================================== */
66 method Action roundingmode(Bit#(3) rm);
67 method Tuple2#(Flush_type,Bit#(`VADDR)) generate_flush;
68 method Maybe#(Training_data#(`VADDR)) training_data;
69 method Bit#(`PERFMONITORS) execute_perfmon; // icache performance counters
70 method Maybe#(Bit#(`VADDR)) ras_push;
71 interface Get#(Tuple2#(Memout,Bit#(1))) to_dmem;
72 method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data);
73 method Action update_wEpoch;
74 method Action inferred_xlen(Bit#(2) mxl);
75 endinterface:Ifc_execute_stage
76
77 `ifdef muldiv
78 `ifdef spfpu
79 `ifdef sequential
80 (*mutually_exclusive="read_output_from_fpu, read_outputs_from_muldiv"*)
81 `endif
82 `endif
83 `endif
84 (*synthesize*)
85 (*conflict_free="rl_receive_info_from_decode_stage, to_dmem_get"*)
86 module mkexecute_stage(Ifc_execute_stage);
87 Wire#(Maybe#(Bit#(`VADDR))) wr_ras_push<-mkDWire(tagged Invalid);
88 Ifc_prf_new prf <-mkprf_new();
89 RX#(ID_IE_type) rx <-mkRX; // receive ffrom the decode stage
90 TX#(IE_IMEM_type) tx <-mkTX; // send to the memory stage;
91 `ifdef muldiv
92 `ifdef sequential
93 Ifc_muldiv muldiv <-mkmuldiv;
94 `endif
95 `ifdef parallel
96 Reg#(Maybe#(Bit#(`Reg_width))) rg_mul_output<-mkReg(tagged Invalid);
97 `endif
98 `endif // instantiating
99 `ifdef spfpu Ifc_fpu fpu <- mkfpu(); `endif // instantiating the Floating point units.
100
101 //Wire#(Memout) wr_info_to_dmem <-mkWire;// holds the information to be given to dmem
102 FIFOF#(Tuple2#(Memout,Bit#(1))) wr_info_to_dmem <-mkBypassFIFOF;// holds the information to be given to dmem
103 Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4)))) wr_forward_from_EXE <-mkDWire(tagged Invalid);// holds the forwarded data from the memory stage
104 Wire#(Bit#(3)) wr_rounding_mode<-mkDWire(0);
105 Reg#(Bool) multicylce_output[2] <-mkCReg(2,False); // is true if the operation is multicycle.
106 Wire#(Flush_type) rg_flush_execute <-mkDWire(None);
107 Wire#(Bit#(`VADDR)) rg_effective_address<-mkDWire(0);
108 Reg#(Maybe#(Training_data#(`VADDR))) wr_training_data <-mkDReg(tagged Invalid);
109 `ifdef perf
110 Reg#(Bit#(`PERFMONITORS)) rg_execute_perfmon<-mkDReg(0);
111 `endif
112 Reg#(Bit#(1)) eEpoch <-mkReg(0);
113 Reg#(Bit#(1)) wEpoch <-mkReg(0);
114 PulseWire wb_flush <-mkPulseWire();
115
116 Reg#(Bit#(4)) rg_pid <-mkReg(0);
117 Reg#(Bit#(TLog#(`PRFDEPTH))) rg_prf_index<-mkReg(0);
118
119 Wire#(Bit#(2)) wr_mxl <- mkWire();
120
121
122 rule update_eEpoch_reg(rg_flush_execute!=None && !wb_flush);
123 `ifdef verbose $display($time,"\tEXECUTION: Inverting eEPOCH"); `endif
124 eEpoch<=~eEpoch;
125 endrule
126
127 rule forward_data_from_exe(wr_forward_from_EXE matches tagged Valid .fwdata) ;
128 let {data,index,pid}=fwdata;
129 if(wr_mxl==1) begin// 32-bits
130 data=signExtend(data[31:0]);
131 end
132 prf.fwd_from_execution(data,index,pid);
133 endrule
134
135 rule rl_receive_info_from_decode_stage(rx.u.notEmpty && tx.u.notFull && !multicylce_output[1] );
136 Bit#(`PERFMONITORS) perfmonitors=0;
137 let data=rx.u.first;
138 let pc=data.program_counter;
139 let dest=data.destination;
140 let rdtype=data.rdtype;
141 let exception=data.exception;
142 let inst_type=data.inst_type;
143 Bit#(`VADDR) nextpc=rx.u.first.nextpc;
144 `ifdef simulate let instruction=data.instruction; `endif
145 Execution_output result1=tagged Busy;
146 `ifdef verbose $display($time,"\t********** EXECUTION STAGE FIRING ************ PC: :%h EPOCHS: %b Instr-EPOCHS: %b",pc,{eEpoch,wEpoch}, data.epochs) ; `endif
147 if({eEpoch,wEpoch}!=data.epochs)begin
148 rx.u.deq;
149 `ifdef verbose $display($time,"\tEXECUTION: PC: %h Dropping Instruction since Epochs do not match", rx.u.first.program_counter); `endif
150 end
151 else if(exception matches tagged None)begin
152 Bool start_execution=False;
153 `ifdef verbose $display($time,"\tEXECUTION: rs1type: ",fshow(data.rs1_type)," rs2_type ",fshow(data.rs2_type)); `endif
154 RFType#(`Reg_width) op1<-prf.read_rs1(data.rs1addr,data.rs1_type,data.rs1);
155 RFType#(`Reg_width) op2<-prf.read_rs2(data.rs2addr,data.rs2_type,data.rs2);
156 RFType#(`Reg_width) op3<-prf.read_rs3(data.rs3addr,data.rs3_type,data.rs3_imm);
157 if(op1 matches tagged Present .x &&& op2 matches tagged Present .y &&& op3 matches tagged Present .z)begin
158 Bit#(`Reg_width) rs1=x;
159 Bit#(`Reg_width) rs2=y;
160 Bit#(`Reg_width) rs3=z;
161 if(wr_mxl==1) begin // 32-bit
162 rs1=signExtend(rs1[31:0]);
163 rs2=signExtend(rs2[31:0]);
164 rs3=signExtend(rs3[31:0]);
165 end
166 `ifdef muldiv
167 let {prf_index,pid}<-prf.get_index_pid(data.destination,data.rdtype);
168 `ifdef verbose $display($time,"\tEXECUTION: PRFINDEX: %d PID: %d",prf_index,pid); `endif
169 rg_prf_index<=prf_index;
170 rg_pid<=pid;
171 /*========== Multiplication =============== */
172 if(inst_type==MUL || inst_type == DIV)begin
173 Bit#(1) is_mul=0;
174 if(inst_type==MUL) begin
175 is_mul=1;
176 `ifdef verbose $display($time,"\tEXECUTION: Multiplication Operation Op1: %h Op2: %h ",rs1,rs2); `endif
177 end
178 else begin
179 `ifdef verbose $display($time,"\tEXECUTION: Division Operation Op1: %h Op2: %h ",rs1,rs2); `endif
180 end
181 `ifdef sequential
182 muldiv.input_operands(rs1,rs2,data.funct3[1:0],pack(data.word32),is_mul);
183 prf.update_rd(prf_index,pid);
184 multicylce_output[1]<=True;
185 `endif
186 `ifdef parallel
187 let x= multiplication(rs1,rs2,data.funct3[1:0] `ifdef RV64 ,data.word32 `endif );
188 rg_mul_output<=tagged Valid x;
189 multicylce_output[1]<=True;
190 `endif
191 end
192 `endif
193 /*========================================== */
194 /*============== FLOATING POINT ============================ */
195 `ifdef spfpu
196 else if(inst_type == FLOATING || inst_type==DFLOATING)begin
197 `ifdef verbose $display($time,"\tEXECUTION: Floating Point Operation "); `endif
198 fpu._start(truncate(rs1),truncate(rs2),truncate(rs3),data.fn,rs3[11:5],data.funct3,rs3[1:0],wr_rounding_mode, data.word32);
199 multicylce_output[1]<=True;
200 prf.update_rd(prf_index,pid);
201 end
202 `endif
203 /*========================================== */
204 /*================ SINGLE CYCLE ALU ========================== */
205 else begin
206 `ifdef verbose $display($time,"\tEXECUTION: Single Cycle Operation "); `endif
207 let {x,ea,flush,td,raspush,ex,pm} = fn_alu(data.fn,rs1,rs2,rs3,pc,data.inst_type,nextpc,data.funct3,data.mem_access,dest,data.prediction,rx.u.first.perfmonitors,data.word32);
208 result1=x;
209 `ifdef verbose $display($time,"\tEXE: rs1: %h rs2: %h rs3_imm: %h",rs1,rs2,rs3); `endif
210 `ifdef verbose $display($time,"\tEXECUTION: Result: ",fshow(result1)); `endif
211 `ifdef verbose $display($time,"\tEXECUTION: PC: %h",pc," Flush: ",fshow(flush)," EA: %h",ea," Instruction: ",fshow(data.inst_type)); `endif
212 if(result1 matches tagged RESULT .res)begin
213 if(dest!=0)
214 wr_forward_from_EXE <= tagged Valid tuple3(res.aluresult,prf_index,pid);
215 end
216 else begin
217 prf.update_rd(prf_index,pid);
218 end
219 if(result1 matches tagged MEMORY .meminfo)begin
220 rx.u.deq;
221 tx.u.enq(IE_IMEM_type{execresult:result1,
222 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
223 destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs
224 `ifdef simulate , instruction:instruction `endif });
225 wr_info_to_dmem.enq(tuple2(meminfo,rx.u.first.epochs[0]));
226 rg_flush_execute<=flush;
227 rg_effective_address<=ea;
228 end
229 else begin
230 rx.u.deq;
231 exception=ex;
232 wr_ras_push<=raspush;
233 rg_flush_execute<=flush;
234 rg_effective_address<=ea;
235 wr_training_data<=td;
236 perfmonitors=pm;
237 tx.u.enq(IE_IMEM_type{execresult:result1,
238 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
239 destination:dest, rd_type:rdtype , pid:pid, index:prf_index, perfmonitors:perfmonitors ,epochs:rx.u.first.epochs
240 `ifdef simulate , instruction:instruction `endif });
241 end
242 end
243 /*========================================== */
244 end
245 else begin
246 `ifdef verbose $display($time,"\tEXECUTION: Waiting for operands.\nRS1: ",fshow(op1),"\nRS2: ",fshow(op2),"\nRS3: ",fshow(op3)); `endif
247 end
248 end
249 else begin
250 rx.u.deq;
251 `ifdef verbose $display($time,"\tEXECUTE: EXCEPTION"); `endif
252 tx.u.enq(IE_IMEM_type{execresult:tagged RESULT Arithout{aluresult:0,fflags:0},
253 program_counter:pc, exception:exception, debugcause:rx.u.first.debugcause,
254 destination:dest, rd_type:rdtype , index:rg_prf_index, pid:rg_pid, perfmonitors:perfmonitors,epochs:rx.u.first.epochs
255 `ifdef simulate , instruction:instruction `endif });
256 end
257 endrule
258
259 `ifdef muldiv
260 `ifdef sequential
261 rule read_outputs_from_muldiv(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] );
262 `ifdef verbose $display($time,"\tEXECUTION: Multiplier sending output to Memory stage"); `endif
263 let res<-muldiv.muldiv_result;
264 rx.u.deq;
265 let decodedata=rx.u.first;
266 let pc=decodedata.program_counter;
267 let dest=decodedata.destination;
268 let rdtype=decodedata.rdtype;
269 let exception=decodedata.exception;
270 `ifdef simulate let instr=decodedata.instruction; `endif
271 Execution_output result1= tagged RESULT(Arithout{aluresult:res,fflags:0});
272 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
273 `ifdef verbose $display($time,"Epochs do not match"); `endif
274 end
275 else begin
276 tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause,
277 program_counter:pc, exception:exception,
278 destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
279 `ifdef simulate , instruction:instr `endif });
280 if(dest!=0)
281 wr_forward_from_EXE <= tagged Valid tuple3(res,rg_prf_index,rg_pid);
282 end
283 multicylce_output[1]<=False;
284 endrule
285 `endif
286 `ifdef parallel
287 rule read_outputs_from_muldiv(rx.u.notEmpty &&& tx.u.notFull &&& multicylce_output[1] &&& rg_mul_output matches tagged Valid .x);
288 rg_mul_output<=tagged Invalid;
289 rx.u.deq;
290 let decodedata=rx.u.first;
291 let pc=decodedata.program_counter;
292 let dest=decodedata.destination;
293 let rdtype=decodedata.rdtype;
294 let exception=decodedata.exception;
295 `ifdef simulate let instr=decodedata.instruction; `endif
296 Execution_output result1= tagged RESULT(Arithout{aluresult:x,fflags:0});
297 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
298 `ifdef verbose $display($time,"Epochs do not match"); `endif
299 end
300 else begin
301 tx.u.enq(IE_IMEM_type{execresult:result1,debugcause:rx.u.first.debugcause,
302 program_counter:pc, exception:exception,
303 destination:dest, rd_type:rdtype , index:rg_prf_index,pid:rg_pid, perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
304 `ifdef simulate , instruction:instr `endif });
305 if(dest!=0)
306 wr_forward_from_EXE <= tagged Valid tuple3(x,rg_prf_index,rg_pid);
307 end
308 multicylce_output[1]<=False;
309 endrule
310 `endif
311 `endif
312
313 `ifdef spfpu
314 rule read_output_from_fpu(rx.u.notEmpty && tx.u.notFull && multicylce_output[1] );
315 let res<-fpu.get_result;
316 rx.u.deq;
317 let decodedata=rx.u.first;
318 let pc=decodedata.program_counter;
319 let dest=decodedata.destination;
320 let rdtype=decodedata.rdtype;
321 let exception=decodedata.exception;
322 `ifdef simulate let instr=decodedata.instruction; `endif
323 Execution_output result1= tagged RESULT(Arithout{aluresult:res.final_result, fflags:res.fflags});
324 if({eEpoch,wEpoch}!=rx.u.first.epochs)begin
325 `ifdef verbose $display($time,"Epochs do not match"); `endif
326 end
327 else begin
328 tx.u.enq(IE_IMEM_type{execresult:result1, debugcause:rx.u.first.debugcause,
329 program_counter:pc, exception:exception,
330 destination:dest, rd_type:rdtype, index:rg_prf_index,pid:rg_pid , perfmonitors:rx.u.first.perfmonitors,epochs:rx.u.first.epochs
331 `ifdef simulate , instruction:instr `endif });
332 if((dest!=0 && rdtype==IntegerRF) || rdtype==FloatingRF)
333 wr_forward_from_EXE <= tagged Valid tuple3(res.final_result,rg_prf_index,rg_pid);
334 end
335 multicylce_output[1]<=False;
336 endrule
337 `endif
338 interface to_dmem = interface Get
339 method ActionValue#(Tuple2#(Memout,Bit#(1))) get ;
340 `ifdef verbose $display($time,"\tEXECUTION: DEQUEING MEM REQUEST",fshow(wr_info_to_dmem.first)); `endif
341 wr_info_to_dmem.deq;
342 return wr_info_to_dmem.first;
343 endmethod
344 endinterface;
345 method tx_out=tx.e;
346 method rx_in=rx.e;
347 method Action roundingmode(Bit#(3) rm);
348 wr_rounding_mode<=rm;
349 endmethod
350 method generate_flush=tuple2(rg_flush_execute,rg_effective_address);
351 method Maybe#(Training_data#(`VADDR)) training_data=wr_training_data;
352 method Maybe#(Bit#(`VADDR)) ras_push = wr_ras_push;
353 method Action _forwarding_from_memory (Maybe#(Tuple3#(Bit#(`Reg_width), Bit#(TLog#(`PRFDEPTH)), Bit#(4))) fwd_data);
354 if(fwd_data matches tagged Valid .fwdata)begin
355 let {data,index,pid}=fwdata;
356 if(wr_mxl==1) begin// 32-bits
357 data=signExtend(data[31:0]);
358 end
359 prf.fwd_from_memory(data,index,pid);
360 end
361 endmethod
362 method Action update_wEpoch;
363 `ifdef verbose $display($time,"\tEXECUTION: Updating wEPOCH"); `endif
364 wEpoch<=~wEpoch;
365 wb_flush.send;
366 endmethod
367 method Action flush_prf;
368 prf.flush_all;
369 endmethod
370 method Action inferred_xlen(Bit#(2) mxl);
371 wr_mxl <=mxl;
372 endmethod
373 endmodule
374 endpackage:execute_stage