a57d66c9c2b1af00a747e4e8f47b8b31f50d5ea7
[shakti-core.git] / src / core / fpu / fpu_fm_add_sub.bsv
1 /*
2 Authors : Vinod.G, Aditya Govardhan
3 Email : g.vinod1993@gmail.com
4 Last Update : 27th November 2017
5 See LICENSE for more details
6 Paper Reference: Floating Point Fused Multiply-Add Architectures (http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=4487224)
7 Description:
8 TODO
9 */
10
11 package fpu_fm_add_sub;
12 import DReg::*;
13 import defined_types::*;
14 import RegFile::*;
15 import UniqueWrappers::*;
16 `include "defined_parameters.bsv"
17 import ConfigReg::*;
18
19 interface Ifc_fpu_fm_add_sub#(numeric type fpinp, numeric type fpman, numeric type fpexp);
20 method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
21 method Floating_output#(fpinp) get_result();
22 method Action flush;
23 endinterface
24
25
26 `ifdef fpu_hierarchical
27 interface Ifc_fpu_fm_add_sub32;
28 method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
29 method Floating_output#(32) get_result();
30 method Action flush;
31 endinterface
32
33 interface Ifc_fpu_fm_add_sub64;
34 method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
35 method Floating_output#(64) get_result();
36 method Action flush;
37 endinterface
38 `endif
39
40 typedef struct{
41 Bit#(TMul#(2,TAdd#(fpman,1))) product_mantissa;
42 Bit#(TAdd#(fpexp,2)) lv_summed_exponent; // exponent of the resultant
43 bit sign; // sign bit of the result
44 Bit#(fpinp) _operand3;
45 Bit#(1) invalid; // indicating that the ff_output is NaN.
46 Bit#(1) infinity; // indicating that the ff_output is infinity.
47 Bit#(1) zero; // indicating that the ff_output is zero.
48 Bit#(5) add_flags;
49 Bit#(3) rounding_mode; // static rounding mode encoded in the instruction
50 bit _operation; // bit denoting the operation to be performed 0 - Add, 1 - Sub
51 bit _negate; // bit denoting whether the operands should be negated or not
52 bit mul; // bit denoting whether the operation is mul or not
53 bit muladd;
54 bit quiet_nan_two;
55 bit inp_denormal;
56 }Input_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
57
58 typedef struct{
59 Bit#(1) lv_product_sign; //The result of the integer multiplier stage
60 Bit#(1) lv_negate;
61 Bit#(TAdd#(fpexp,2)) lv_product_exponent;
62 Bit#(TAdd#(TMul#(2,TAdd#(1,fpman)),1)) lv_product_mantissa;
63 Bit#(fpinp) lv_operand3;
64 Bit#(5) add_flags;
65 bit operation;
66 bit mul;
67 bit muladd;
68 Bit#(3) rounding_mode;
69 bit lv_product_is_invalid;
70 bit lv_product_is_zero;
71 bit lv_product_is_infinity;
72 bit lv_product_overflow;
73 bit lv_product_underflow;
74 bit quiet_nan_two;
75 bit inp_denormal;
76 }Stage2_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
77
78 typedef struct{
79 bit actual_operation;
80 bit lv_resultant_sign;
81 bit man2_gt_man3;
82 Bit#(TAdd#(fpexp,2)) resultant_exponent;
83 Bit#(TAdd#(TMul#(fpman,3),4)) mantissa2;
84 Bit#(TAdd#(TMul#(fpman,3),4)) mantissa3;
85 Bit#(3) rounding_mode;
86 bit result_is_invalid;
87 Bit#(2) result_is_infinity;
88 Bit#(2) result_is_zero;
89 bit product_overflow;
90 bit product_underflow;
91 bit quiet_nan_two;
92 bit quiet_nan_three;
93 bit lv_product_is_zero;
94 }Stage4_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
95
96 typedef struct{
97 Bit#(TAdd#(fpexp,2)) resultant_exponent;
98 Bit#(TAdd#(TMul#(fpman,3),4)) resultant_mantissa;
99 bit lv_resultant_sign;
100 Bit#(3) lv_rounding_mode;
101 Bit#(2) add_sub_is_zero;
102 bit lv_result_is_invalid;
103 Bit#(2) lv_result_is_infinity;
104 Bit#(2) lv_result_is_zero;
105 bit lv_product_overflow;
106 bit lv_product_underflow;
107 bit quiet_nan_two;
108 bit quiet_nan_three;
109 bit lv_product_is_zero;
110 Bit#(TLog#(TAdd#(TAdd#(TMul#(fpman,3),4),1))) lv_zeros_on_left;
111 }Stage5_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
112
113 typedef enum{
114 Begin,
115 Stage1,
116 Stage2,
117 Stage3,
118 Stage4
119 } FMA_states deriving (Bits,Eq);
120
121 module mkfpu_fm_add_sub(Ifc_fpu_fm_add_sub#(fpinp,fpman,fpexp))
122 provisos(
123 Add#(TAdd#(fpexp,fpman),1,fpinp),
124 Add#(fpexp,2,fpexp2),
125 Add#(TMul#(fpman,3),4,fmaman),
126 Add#(fpman,1,fpman1),
127 Add#(fpexp,1,fpexp1),
128 Add#(b__,fpinp,64),
129 Mul#(TAdd#(fpman,1),2,impfpman2),
130 Add#(fpinp,fpinp,fpinp2),
131 //per request of bsc
132 Add#(c__, TSub#(fpexp, 1), fpexp1),
133 Add#(d__,1,fpexp2),
134 Add#(e__, fpexp2, fpman),
135 Add#(f__, TSub#(fpexp, 1), fpman),
136 Add#(g__, TAdd#(fpman, 1), fpinp),
137 Add#(h__, TSub#(fpexp, 1), fpexp2),
138 Add#(j__, TLog#(TAdd#(1, fmaman)), fpexp2),
139 Add#(l__, 1, fmaman),
140 Add#(m__, TAdd#(2, fpman), fmaman),
141 Add#(n__,TAdd#(fpman,1),fpinp2),
142 Add#(a__, TMul#(2, TAdd#(1, fpman)), fmaman),
143 Mul#(2, TAdd#(1, fpman), impfpman2),
144 Add#(i__, TAdd#(TMul#(2, TAdd#(1, fpman)), 1), fmaman),
145 Add#(k__, TLog#(TAdd#(1, TAdd#(impfpman2, 1))), fpexp2),
146 Add#(o__, TLog#(TAdd#(1, fmaman)), TAdd#(fpexp, 2)),
147 Log#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1), TLog#(TAdd#(1, fmaman))),
148 Add#(p__, TLog#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1)), TAdd#(fpexp, 2)),
149 Add#(s__, TAdd#(1, TAdd#(f__, fpexp)), fmaman),
150 Add#(t__, TAdd#(f__, fpexp), impfpman2),
151 Add#(q__, TAdd#(fpexp, f__), impfpman2),
152 Add#(r__, TAdd#(1, TAdd#(fpexp, f__)), fmaman)
153 );
154
155
156
157 Wire#(Floating_output#(fpinp)) ff_final_out <- mkWire();
158 Reg#(Stage2_data_type#(fpinp,fpman,fpexp)) ff_stage2 <- mkConfigRegU();
159 Reg#(Stage4_data_type#(fpman,fpexp)) ff_stage4 <- mkConfigRegU();
160 Reg#(Stage5_data_type#(fpman,fpexp)) ff_stage5 <- mkConfigRegU();
161 Reg#(Input_data_type#(fpinp,fpman,fpexp)) ff_input_register <- mkConfigRegU();
162 Reg#(FMA_states) rg_state_handler <- mkReg(Begin);
163 Wire#(Bool) wr_flush <- mkDWire(False);
164
165 function zeroExtendLSB(inp_man) = unpack(reverseBits(extend(reverseBits(pack(inp_man)))));
166
167 let fPMAN = valueOf(fpman);
168 let fPINP = valueOf(fpinp);
169 let fPEXP = valueOf(fpexp);
170 let iMPFPMAN2 = valueOf(impfpman2);
171 let fMAMAN = valueOf(fmaman);
172
173 rule rl_flush(wr_flush);
174 rg_state_handler <= Begin;
175 //ff_input_register <= tagged Invalid;
176 //ff_stage2 <= tagged Invalid;
177 //ff_stage4 <= tagged Invalid;
178 //ff_stage5 <= tagged Invalid;
179 endrule
180
181 rule rl_stage1_after_input_stage(rg_state_handler == Stage1 && !wr_flush);
182
183 Bit#(impfpman2) x = ff_input_register.product_mantissa;
184 Bit#(TAdd#(impfpman2,1)) lv_product_mantissa = {x[iMPFPMAN2-1:0],1'b0}; //extra zero for 10.xxxx case
185 Bit#(fpexp2) lv_product_exponent = ff_input_register.lv_summed_exponent;
186 Bit#(fpinp) lv_operand3 = ff_input_register._operand3;
187 Bit#(5) add_flags = ff_input_register.add_flags;
188 bit lv_product_underflow = 0;
189 bit lv_product_overflow = 0;
190 let lv_product_is_invalid = ff_input_register.invalid;
191 let lv_product_is_infinity = ff_input_register.infinity;
192 let lv_product_is_zero = ff_input_register.zero;
193 let rounding_mode = ff_input_register.rounding_mode;
194 let operation = ff_input_register._operation;
195 let lv_negate = ff_input_register._negate;
196 let lv_product_sign = ff_input_register.sign;
197 let mul = ff_input_register.mul;
198 let muladd = ff_input_register.muladd;
199 let quiet_nan_two = ff_input_register.quiet_nan_two;
200 bit inp_denormal = ff_input_register.inp_denormal;
201 Bit#(TSub#(fpexp,1)) bias = '1;
202 Int#(fpexp2) lv_actual_product_exponent = unpack(lv_product_exponent - {3'b0,bias});
203 let msb_zeros = pack(countZerosMSB(lv_product_mantissa));
204 let lsb_zeros = 0;
205
206 //Change-2 Removing Redundant Variables
207 //Bit#(fpman) bias_temp = zeroExtend(bias);
208 //Int#(fpman) lv_actual_product_exponent_temp = signExtend(lv_actual_product_exponent);
209 //`ifdef verbose $display("lv_actual_product_exponent_temp : %d",lv_actual_product_exponent_temp); `endif
210 rg_state_handler <= Stage2;
211
212 // lv_product_is_subnormal construct is like a flag which can be used in difficult situations
213 // bit lv_product_is_subnormal = 0;
214
215 bit lv_sticky = lv_product_mantissa[0];
216 `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
217
218 /*
219 if exponent is > bias then obviously none of the numbers are subnormal
220 so the product is of the form 1x.xxxx or 01.xxxx
221 the overflow conditions are handled in the following if condition accordingly
222 */
223
224 `ifdef verbose $display("lv_actual_product_exponent = %d",lv_actual_product_exponent); `endif
225 bit exp_overflow_bit = pack(lv_actual_product_exponent)[fPEXP]; //Says if Exponent Overflows
226 bit exp_underflow_bit = pack(lv_actual_product_exponent)[fPEXP+1]; //Says if Exponent Underflows
227 Bit#(fpexp) expo_temp = pack(lv_actual_product_exponent)[fPEXP-1:0];
228 bit exp_and = &(expo_temp); //Says if Exponent is equal to Bias
229 bit is_msb_zeros = |(msb_zeros);
230
231 //Change-1 -- Reducing the size of the Muxes from EXP size to just a bunch of 1bits and a Or-tree
232 //Change-1 was wrong apparently, according to Paranoia!! Should see why! Rolling back
233 //if((exp_overflow_bit==1'b1 && exp_underflow_bit==1'b0) || (is_msb_zeros==1'b0 && exp_and==1'b1)) begin
234 if(lv_actual_product_exponent > zeroExtend(unpack(bias)) || (msb_zeros == 0 && lv_actual_product_exponent == zeroExtend(unpack(bias)))) begin
235 if(muladd == 0 ||(muladd==1 && ((lv_product_sign^lv_operand3[fPINP-1]^operation) == 0)))
236 lv_product_overflow = 1;
237 //When the product overflows, the FMA result is an overflow
238 `ifdef verbose $display("lv_product_overflow!!!"); `endif
239 end
240
241 /*
242 -lowest_exp = -denormal_bias -mantissa_size -2
243 -2 is for the implicit bit and the carry bit
244 i.e. if all the bits are shifted out then its an underflow
245 */
246
247 else begin
248 //Thought-1 -- Can something be done to reduce the countZerosMSB and countZerosLSB
249 //Cannot reduce this mux to 1-bit but can reduce size since it's unwanted
250 if(lv_actual_product_exponent < unpack(-zeroExtend(bias)-fromInteger(fPMAN)-1)) begin
251 //if(lv_actual_product_exponent_temp < unpack(-bias_temp-fromInteger(fPMAN)-1)) begin
252 if((muladd == 1'b0 || (muladd==1'b1 && (add_flags[3]==1'b1 || add_flags[4]==1'b1))) && lv_product_is_zero == 1'b0)
253 lv_product_underflow = 1;
254 `ifdef verbose $display("lv_product_underflow!!!"); `endif
255 end
256 /*
257 if msb of product is 1 then the case is 1x.xxxx
258 product is shifted right once to make it 01.xxxx
259 we don't care what is the exponent, just increase it by one
260 actual exponent is also increased by one since exponent is increased by one
261 this increasing of exponent leading to overflow is handled in the overflow case
262 msb_zeros is increased for further arising conditions
263 */
264 //Change-4 Using the previously computed msb_zeros. Synthesis will detect this anyhow, but still. Fanout?
265 if(is_msb_zeros==1'b0) begin
266 //if(msb_zeros == 0) begin
267 lv_product_mantissa = lv_product_mantissa >> 1;
268 lv_product_exponent = lv_product_exponent + 1;
269 lv_actual_product_exponent = lv_actual_product_exponent + 1;
270 msb_zeros = msb_zeros + 1;
271 end
272 // possible shift is positive when exponent is lesser than -126
273
274 //Change-5 Possible shift needn't use lv_actual_product_exponent -- It's enough if exponent is used I guess
275 // Int#(fpexp2) possible_shift = 1-zeroExtend(unpack(bias))-(lv_actual_product_exponent);
276 Int#(fpexp2) possible_shift = 1-unpack(lv_product_exponent);
277
278 //Experiment-1 -- Do all the operations parallely and use the if-else for just assignments
279 lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
280 let lv_product_mantissa_shiftR = (lv_product_mantissa >> pack(possible_shift));
281 //lv_product_mantissa_shiftR = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0] | lv_sticky};
282 let lv_product_exponent_inc_shift = lv_product_exponent + pack(possible_shift);
283
284 let shift_neg = ~pack(possible_shift)+1;
285
286 let lv_product_mantissa_shiftL_expo = lv_product_mantissa << (shift_neg);
287 let lv_product_exponent_sub_shift = lv_product_exponent - (shift_neg);
288
289 let lv_product_mantissa_shiftL_zerosMSB = lv_product_mantissa << (msb_zeros - 1);
290 let lv_product_exponent_sub_zerosMSB = lv_product_exponent - (zeroExtend(msb_zeros) - 1);
291
292 /*
293 msb_zeros = 1 when
294 i) the product is 1x.xxxx and shifted right once
295 ii) the product is 01.xxxx already
296 if possible_shift is negative or zero, it means that exponent is -126 or greater
297 and thus the product is already normalized
298 but if possible_shift is positive, it means that exponent is < -126
299 and thus product is shifted right to make exponent -126 and the result is subnormal
300 */
301 if(possible_shift > 0) begin
302 //Setting sticky if all lsb zeros are removed out
303
304 //Is there a better logic for this? Since, lsb_zeros is a big if-else logic
305 //lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
306 if(possible_shift > unpack(zeroExtend(lsb_zeros)) || lv_product_mantissa[0] == 1)
307 lv_sticky = 1;
308
309 lv_product_mantissa = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0]|lv_sticky};
310 lv_sticky = lv_product_mantissa[0];
311 lv_product_exponent = lv_product_exponent_inc_shift;
312
313 `ifdef verbose $display("possible_shift",possible_shift); `endif
314 /*if(mul==1 && lv_product_is_zero==0)
315 lv_product_underflow = 1;*/
316 //Handling sticky
317
318 `ifdef verbose $display("lv_product_exponent : %d bin : %b",lv_product_exponent,lv_product_exponent); `endif
319 `ifdef verbose $display("lv_product_mantissa = %b lv_product_exponent : %d since exp < -126", lv_product_mantissa,lv_product_exponent); `endif
320 `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
321 // lv_product_is_subnormal = 1;
322 end
323
324 /*
325 msb_zeros != 1 means product is of the form 00.xxxx, important case
326 */
327 else if(msb_zeros != 'b1) begin
328 /*
329 if possible shift is < the number of leading zeros then the number can't be made normal
330 */
331 if((shift_neg) < zeroExtend(msb_zeros - 1)) begin
332 lv_product_mantissa = lv_product_mantissa_shiftL_expo;
333 lv_product_exponent = lv_product_exponent_sub_shift;
334 // lv_product_is_subnormal = 1;
335 end
336 /*
337 if exponent affords to give away enough such that shifting left leads to 01.xxxx and exponent >= -126
338 */
339 else begin
340 lv_product_mantissa = lv_product_mantissa_shiftL_zerosMSB;
341 lv_product_exponent = lv_product_exponent_sub_zerosMSB;
342 // lv_product_is_subnormal = 0;
343 end
344 end
345 end
346 ff_stage2 <= Stage2_data_type{
347 lv_product_sign : lv_product_sign,
348 lv_negate : lv_negate,
349 lv_product_exponent : lv_product_exponent,
350 lv_product_mantissa : lv_product_mantissa,
351 lv_operand3 : lv_operand3,
352 add_flags : add_flags,
353 operation : operation,
354 mul : mul,
355 muladd : muladd,
356 rounding_mode : rounding_mode,
357 lv_product_is_invalid : lv_product_is_invalid,
358 lv_product_is_zero : lv_product_is_zero,
359 lv_product_is_infinity : lv_product_is_infinity,
360 lv_product_overflow : lv_product_overflow,
361 lv_product_underflow : lv_product_underflow,
362 quiet_nan_two : quiet_nan_two
363 };
364
365
366 endrule
367
368 rule rl_stage_3(rg_state_handler == Stage2 && !wr_flush);
369
370 rg_state_handler <= Stage3;
371 let lv_negate = ff_stage2.lv_negate;
372 let lv_product_exponent = ff_stage2.lv_product_exponent;
373 let lv_product_mantissa = ff_stage2.lv_product_mantissa;
374 let lv_operand3 = ff_stage2.lv_operand3;
375 let add_flags = ff_stage2.add_flags;
376 let operation = ff_stage2.operation;
377 let mul = ff_stage2.mul;
378 let muladd = ff_stage2.muladd;
379 let lv_product_sign = ff_stage2.lv_product_sign;
380 let lv_product_is_invalid = ff_stage2.lv_product_is_invalid;
381 let lv_product_is_zero = ff_stage2.lv_product_is_zero;
382 let lv_rounding_mode = ff_stage2.rounding_mode;
383 let lv_product_is_infinity = ff_stage2.lv_product_is_infinity;
384 let lv_product_overflow = ff_stage2.lv_product_overflow;
385 let lv_product_underflow = ff_stage2.lv_product_underflow;
386 let quiet_nan_two = ff_stage2.quiet_nan_two;
387 let inp_denormal = ff_stage2.inp_denormal;
388 //ff_stage2 <= tagged Invalid;
389
390 Bit#(1) sign2 = lv_product_sign ^ lv_negate;
391 Bit#(fpexp2) exponent2 = lv_product_exponent;
392 Bit#(fmaman) mantissa2 = zeroExtendLSB(lv_product_mantissa);
393 Bit#(1) sign3 = lv_operand3[fPINP-1] ^ lv_negate;
394 Bit#(fpexp2) exponent3 = {2'b0, lv_operand3[fPINP-2:fPMAN]};
395 Bit#(fmaman) mantissa3 = 0;
396 Bit#(fpman) lv_man3 = lv_operand3[fPMAN-1:0];
397 Bit#(fpexp) lv_exp_max = '1;
398 bit lv_op3_is_invalid = add_flags[2] | add_flags[0];
399 bit lv_op3_is_infinity = add_flags[1];
400 bit lv_op3_is_zero = add_flags[3];
401 bit op3_is_subnormal = add_flags[4];
402 bit quiet_nan_three = add_flags[2];
403 bit expo3_zero = |exponent3;
404 //Change-7 Replaced all instances of operation^sign3 with op_xor_sign3
405 bit op_xor_sign3 = operation ^ sign3;
406
407 //Change-6 Avoiding exponent3==0 mux, but does that help?
408 if(lv_op3_is_infinity==0 && lv_op3_is_invalid ==0 && lv_op3_is_zero==0) begin
409 mantissa3 = zeroExtendLSB({1'b0,expo3_zero,lv_man3});
410 /* if(exponent3 == '0)
411 mantissa3 = zeroExtendLSB({2'b0,lv_man3});
412 else
413 mantissa3 = zeroExtendLSB({2'b01,lv_man3});*/
414 end
415
416 exponent3 = exponent3 + zeroExtend(op3_is_subnormal);
417
418 Bit#(1) lv_result_is_invalid = 0;
419 Bit#(2) lv_result_is_infinity = 0;
420 Bit#(2) lv_result_is_zero = 0;
421
422 if(quiet_nan_two == 0 && lv_product_is_invalid == 1)
423 quiet_nan_three = 0; //0*inf case
424
425 //Result is invalid cases
426 if(lv_op3_is_invalid == 1 || lv_product_is_invalid == 1)
427 lv_result_is_invalid = 1;
428
429 //Result is zero cases
430 else if(lv_op3_is_zero == 1 && lv_product_is_zero == 1) begin
431 if(mul==0) begin
432 if((lv_rounding_mode == 'b010) && (sign2 | (op_xor_sign3)) == 1)
433 lv_result_is_zero = 2'b11;
434 else if((lv_rounding_mode != 'b010) && (sign2 & (op_xor_sign3)) == 1)
435 lv_result_is_zero = 2'b11;
436 else begin
437 if(sign2 == 0)
438 lv_result_is_zero = 2'b01;
439 else
440 lv_result_is_zero = {op_xor_sign3,1'b1};
441 end
442 end
443 else
444 lv_result_is_zero = {sign2,1};
445 end
446
447 //Result is infinity cases
448 else if(lv_product_is_infinity == 1 && lv_op3_is_infinity == 1) begin
449 lv_result_is_infinity = {sign2, ~(sign2 ^ (op_xor_sign3))};
450 lv_result_is_invalid = ~lv_result_is_infinity[0];
451 quiet_nan_two = 0; //inf * qNaN + inf case
452 end
453 else if(lv_product_is_infinity == 1 || lv_op3_is_infinity == 1) begin
454 lv_result_is_infinity = {((lv_product_is_infinity & ~lv_op3_is_infinity) & sign2) | ((~lv_product_is_infinity & lv_op3_is_infinity) & (op_xor_sign3)), 1};
455 end
456 if(lv_product_is_zero == 1) begin
457 exponent2 = '0;
458 mantissa2 = '0;
459 end
460
461 Bit#(fpexp2) lv_minuend, lv_subtrahend;
462 Bit#(fpexp2) exponent_difference = '0;
463 Bit#(fpexp2) resultant_exponent = '0;
464 bit op2_gt_op3 = 0;
465
466 Bit#(fmaman) mantissa_to_shift;
467 let lv_zeros_on_right;
468 bit lv_sticky = 0;
469
470 if(exponent2 > exponent3) begin
471 lv_minuend = exponent2;
472 lv_subtrahend = exponent3;
473 mantissa_to_shift = mantissa3;
474 op2_gt_op3 = 1;
475 end
476 else begin
477 lv_minuend = exponent3;
478 lv_subtrahend = exponent2;
479 mantissa_to_shift = mantissa2;
480 op2_gt_op3 = 0;
481 end
482
483 resultant_exponent = lv_minuend;
484 exponent_difference = lv_minuend - lv_subtrahend;
485 lv_zeros_on_right = zeroExtend(pack(countZerosLSB(mantissa_to_shift)));
486 Bit#(1) shifted_operand_zero = (mantissa_to_shift == '0) ? 1:0;
487 mantissa_to_shift = mantissa_to_shift >> exponent_difference;
488
489 //Handling sticky
490 if(((lv_zeros_on_right < exponent_difference) || (mantissa_to_shift[0] == 1)) && shifted_operand_zero != 1)
491 lv_sticky = 1;
492
493 mantissa_to_shift = {mantissa_to_shift[fMAMAN-1:1], lv_sticky};
494
495 if(op2_gt_op3 == 1) begin
496 mantissa3 = mantissa_to_shift;
497 end
498 else begin
499 mantissa2 = mantissa_to_shift;
500 end
501 quiet_nan_two = quiet_nan_two & ~add_flags[0];
502 `ifdef verbose $display("sign2 = %b exponent2 = %b mantissa2 = %b", sign2, resultant_exponent, mantissa2); `endif
503 `ifdef verbose $display("sign3 = %b exponent3 = %b mantissa3 = %b", sign3, resultant_exponent, mantissa3); `endif
504 `ifdef verbose $display(); `endif
505 bit man2_gt_man3 = 0;
506 if(mantissa2 > mantissa3) man2_gt_man3 = 1; //Can this be optimized?
507 bit lv_resultant_sign = (man2_gt_man3 & sign2) | (~man2_gt_man3 & (operation ^ sign3)); // Using Karnaugh maps
508 bit actual_operation = sign2 ^ (operation ^ sign3); // 0 for addition 1 for subtraction //Can this be pushed back to the prev. stage, saving 1 bit
509
510 ff_stage4<= Stage4_data_type{
511 lv_resultant_sign : lv_resultant_sign,
512 actual_operation : actual_operation,
513 mantissa2 : mantissa2,
514 mantissa3 : mantissa3,
515 man2_gt_man3 : man2_gt_man3,
516 resultant_exponent : resultant_exponent,
517 rounding_mode : lv_rounding_mode,
518 result_is_invalid : lv_result_is_invalid,
519 result_is_infinity : lv_result_is_infinity,
520 result_is_zero : lv_result_is_zero,
521 product_overflow : lv_product_overflow,
522 product_underflow : lv_product_underflow,
523 quiet_nan_two : quiet_nan_two,
524 quiet_nan_three : quiet_nan_three,
525 lv_product_is_zero : lv_product_is_zero
526 };
527 endrule:rl_stage_3
528
529 rule rl_stage4(rg_state_handler == Stage3 && !wr_flush);
530 rg_state_handler <= Stage4;
531 let lv_resultant_sign = ff_stage4.lv_resultant_sign;
532 let man2_gt_man3 = ff_stage4.man2_gt_man3;
533 let mantissa2 = ff_stage4.mantissa2;
534 let mantissa3 = ff_stage4.mantissa3;
535 let actual_operation = ff_stage4.actual_operation;
536 Bit#(fpexp2) resultant_exponent = ff_stage4.resultant_exponent;
537 Bit#(3) lv_rounding_mode = ff_stage4.rounding_mode;
538 bit lv_result_is_invalid = ff_stage4.result_is_invalid;
539 Bit#(2) lv_result_is_infinity = ff_stage4.result_is_infinity;
540 Bit#(2) lv_result_is_zero = ff_stage4.result_is_zero;
541 bit lv_product_overflow = ff_stage4.product_overflow;
542 let quiet_nan_two = ff_stage4.quiet_nan_two;
543 let quiet_nan_three = ff_stage4.quiet_nan_three;
544 let lv_product_underflow = ff_stage4.product_underflow;
545 let lv_product_is_zero = ff_stage4.lv_product_is_zero;
546 //ff_stage4 <= tagged Invalid;
547
548 Bit#(fmaman) resultant_mantissa = 0;
549 Bit#(fmaman) add_mantissa = mantissa2 + mantissa3;
550
551 //Serial Path?
552 Bit#(fmaman) sub_mantissa1 = (man2_gt_man3==1)? mantissa2 : mantissa3;
553 Bit#(fmaman) sub_mantissa2 = (man2_gt_man3==1)? mantissa3 : mantissa2;
554 Bit#(fmaman) sub_mantissa = sub_mantissa1 - sub_mantissa2;
555
556
557 if(actual_operation == 0)
558 resultant_mantissa = add_mantissa;
559 else
560 resultant_mantissa = sub_mantissa;
561
562 //Case when Mantissa2 = Mantissa3 and hence the result is zero
563 Bit#(2) add_sub_is_zero = 0;
564
565 if(resultant_mantissa == '0) begin
566 if(lv_rounding_mode == 3'b010) begin
567 add_sub_is_zero = 2'b11;
568 end
569 else begin
570 add_sub_is_zero = 2'b01; // checks the resultant mantissa for zero
571 end
572 end
573
574 let lv_zeros_on_left = pack(countZerosMSB(resultant_mantissa));
575 ff_stage5 <= Stage5_data_type{
576 resultant_mantissa : resultant_mantissa,
577 add_sub_is_zero : add_sub_is_zero,
578 lv_resultant_sign : lv_resultant_sign,
579 resultant_exponent : resultant_exponent,
580 lv_rounding_mode : lv_rounding_mode,
581 lv_result_is_invalid : lv_result_is_invalid,
582 lv_result_is_infinity : lv_result_is_infinity,
583 lv_result_is_zero : lv_result_is_zero,
584 lv_product_overflow : lv_product_overflow,
585 quiet_nan_two : quiet_nan_two,
586 quiet_nan_three : quiet_nan_three,
587 lv_product_underflow : lv_product_underflow,
588 lv_product_is_zero : lv_product_is_zero,
589 lv_zeros_on_left : lv_zeros_on_left
590 };
591
592 endrule
593
594
595
596 rule rl_stage_5_final_stage(rg_state_handler == Stage4 && !wr_flush);
597 Bit#(fmaman) resultant_mantissa = ff_stage5.resultant_mantissa;
598 let add_sub_is_zero = ff_stage5.add_sub_is_zero;
599 let resultant_exponent = ff_stage5.resultant_exponent;
600 let lv_resultant_sign = ff_stage5.lv_resultant_sign;
601 let lv_rounding_mode = ff_stage5.lv_rounding_mode;
602 let lv_result_is_invalid = ff_stage5.lv_result_is_invalid;
603 let lv_result_is_infinity = ff_stage5.lv_result_is_infinity;
604 let lv_result_is_zero = ff_stage5.lv_result_is_zero;
605 let lv_product_overflow = ff_stage5.lv_product_overflow;
606 let quiet_nan_two = ff_stage5.quiet_nan_two;
607 let quiet_nan_three = ff_stage5.quiet_nan_three;
608 let lv_product_underflow = ff_stage5.lv_product_underflow;
609 let lv_product_is_zero = ff_stage5.lv_product_is_zero;
610 let lv_zeros_on_left = ff_stage5.lv_zeros_on_left;
611 bit add_sub_subnormal = 0;
612 //ff_stage5 <= tagged Invalid;
613 rg_state_handler <= Begin;
614 bit lv_sticky = resultant_mantissa[0];
615
616 //change-x+1
617 let resultant_exponent_sub = resultant_exponent -1;
618 let resultant_mantissa_unnormalized = resultant_mantissa >> 1;
619 let resultant_exponent_inc = resultant_exponent + 1;
620 let resultant_mantissa_norm_expo = resultant_mantissa << resultant_exponent_sub;
621 let resultant_mantissa_norm_zerosMSB = resultant_mantissa << (lv_zeros_on_left - 1);
622 let resultant_exponent_sub_zerosMSB = resultant_exponent - ((zeroExtend(lv_zeros_on_left)) - 1);
623
624
625 if(resultant_mantissa[fMAMAN-1] == 1'b1) begin
626 //resultant_mantissa = resultant_mantissa >> 1;
627 resultant_mantissa = {resultant_mantissa_unnormalized[fMAMAN-1:1], lv_sticky | resultant_mantissa_unnormalized[0]};
628 resultant_exponent = resultant_exponent_inc;
629 //resultant_exponent = resultant_exponent + 1;
630 end
631
632 else if(resultant_mantissa[fMAMAN-2] != 1'b1) begin
633 if((zeroExtend(lv_zeros_on_left) - 1) > resultant_exponent_sub) begin
634 //if((zeroExtend(lv_zeros_on_left) - 1) > (resultant_exponent - 1)) begin
635 `ifdef verbose $display("resultant_exponent : %d",resultant_exponent); `endif
636 //resultant_mantissa = resultant_mantissa << (resultant_exponent - 1);
637 resultant_mantissa = resultant_mantissa_norm_expo;
638 resultant_exponent = 0;
639 `ifdef verbose $display("add_sub subnormal!!!"); `endif
640 add_sub_subnormal = 1;
641 end
642 else begin
643 //resultant_mantissa = resultant_mantissa << (lv_zeros_on_left - 1);
644 //resultant_exponent = resultant_exponent - (zeroExtend(lv_zeros_on_left) - 1);
645 resultant_mantissa = resultant_mantissa_norm_zerosMSB;
646 resultant_exponent = resultant_exponent_sub_zerosMSB;
647 end
648 end
649
650
651 `ifdef verbose $display("resultant_exponent : %b",resultant_exponent); `endif
652 Bit#(TSub#(fpexp,1)) bias = '1;
653 bit ex_overflow = 0;
654 Int#(fpexp2) res_exp_int = unpack(resultant_exponent) - zeroExtend(unpack(bias));
655 `ifdef verbose $display("resultant_exponent : %d res_exp_int : %d",resultant_exponent, res_exp_int); `endif
656
657 if(res_exp_int > zeroExtend(unpack(bias))) begin
658 lv_product_overflow = 1;
659 ex_overflow = 1;
660 end
661 /* else if(res_exp_int == zeroExtend(unpack(bias)))
662 ex_overflow = 1;*/
663 else if(resultant_exponent[fPEXP+1] == 1 && lv_product_is_zero == 0) begin
664 lv_product_underflow = 1;
665 `ifdef verbose $display("Underflow"); `endif
666 end
667 /*`ifdef verbose $display("resultant_sign = %b resultant_exponent = %b resultant_mantissa = %b", resultant_sign, resultant_exponent, resultant_mantissa); `endif
668 `ifdef verbose $display(); `endif
669 */
670 Bit#(TAdd#(fpman,2)) lv_rounded_mantissa = resultant_mantissa[fMAMAN-1:iMPFPMAN2];
671 Bit#(2) lv_res_man = resultant_mantissa[fMAMAN-1:fMAMAN-2];
672 Bit#(TSub#(impfpman2,2)) lv_res1 = resultant_mantissa[iMPFPMAN2-3:0];
673 bit lv_guard = resultant_mantissa[iMPFPMAN2-1];
674 bit lv_round = resultant_mantissa[iMPFPMAN2-2];
675 lv_sticky = |lv_res1;
676 bit lv_round_up = 0;
677 bit lv_inexact = lv_guard | lv_round | lv_sticky;
678
679 if(lv_rounding_mode == 'b000)
680 lv_round_up = lv_guard & (resultant_mantissa[iMPFPMAN2] | lv_round | lv_sticky);
681 else if(lv_rounding_mode == 'b100)
682 lv_round_up = lv_guard ;//& (lv_round | lv_sticky | ~lv_resultant_sign);
683 else if(lv_rounding_mode == 'b010)
684 lv_round_up = lv_inexact & (lv_resultant_sign);
685 else if(lv_rounding_mode == 'b011)
686 lv_round_up = lv_inexact & (~lv_resultant_sign);
687
688 if(add_sub_subnormal == 1 && lv_inexact == 1)
689 lv_product_underflow = 1;
690
691 `ifdef verbose $display("lv_guard = %b lv_round = %b lv_sticky = %b", lv_guard, lv_round, lv_sticky); `endif
692 `ifdef verbose $display("lv_round_up = %b", lv_round_up); `endif
693 `ifdef verbose $display("lv_rounded_mantissa = %b", lv_rounded_mantissa); `endif
694
695 if(lv_round_up == 1)
696 lv_rounded_mantissa = lv_rounded_mantissa + 1;
697
698 `ifdef verbose $display("lv_rounded_mantissa = %b after roundup", lv_rounded_mantissa); `endif
699
700 if(lv_rounded_mantissa[fPMAN+1] == 1) begin
701 resultant_exponent = resultant_exponent + 1;
702 lv_rounded_mantissa = lv_rounded_mantissa >> 1;
703 end
704 else if(lv_res_man == 'b0 && lv_rounded_mantissa[fPMAN] == 1) begin
705 resultant_exponent = resultant_exponent + 1;
706 end
707
708 Bit#(fpexp) lv_res_exp_temp = resultant_exponent[fPEXP-1:0];
709 Bit#(fpman) man_all_zeros = '0;
710 Bit#(TSub#(fpman,1)) man1_all_zeros = '0;
711 Bit#(fpman) man_all_ones = '1;
712 Bit#(fpexp) exp_all_zeros = '0;
713 Bit#(TSub#(fpexp,1)) exp_all_ones_1 = '1;
714 Bit#(fpinp) lv_final_output = 0;
715 Bit#(fpexp) exp_all_ones = '1;
716 Bit#(fpexp) out_exp = resultant_exponent[fPEXP-1:0];
717 Bit#(fpman) out_man = lv_rounded_mantissa[fPMAN-1:0];
718
719
720 //Can I put these invalid, infinity, zero, cases in the first stage which will clear some of the paths????
721 if(lv_result_is_invalid == 1) begin
722 lv_final_output = {1'b0, exp_all_ones,1'b1, man1_all_zeros};
723 end
724 else if(lv_result_is_infinity[0] == 1) begin
725 lv_final_output = {lv_result_is_infinity[1], exp_all_ones, man_all_zeros};
726 ex_overflow = 0; lv_product_underflow = 0; lv_inexact = 0;
727 end
728 else if(lv_result_is_zero[0] == 1) begin
729 lv_final_output = {lv_result_is_zero[1],exp_all_zeros, man_all_zeros};
730 end
731 else if(add_sub_is_zero[0] == 1) begin
732 lv_final_output = {add_sub_is_zero[1], exp_all_zeros , man_all_zeros};
733 end
734 else if(lv_product_overflow == 1 || lv_res_exp_temp == '1) begin
735 lv_inexact = 1;
736 ex_overflow = 1;
737 if(lv_rounding_mode == 'b001)
738 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
739 else if(lv_rounding_mode == 'b010 && lv_resultant_sign == 0)
740 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
741 else if(lv_rounding_mode == 'b011 && lv_resultant_sign == 1)
742 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
743 else begin
744 lv_final_output={lv_resultant_sign,exp_all_ones,man_all_zeros};
745 end
746 end
747 else begin
748 lv_final_output = {lv_resultant_sign, out_exp, out_man};
749 end
750
751 if(lv_product_underflow == 1'b1 && lv_rounded_mantissa[fPMAN]==1'b1 && lv_rounding_mode!=3'b011) //Tininess vanishing after rounding
752 lv_product_underflow = 0;
753
754 if(lv_result_is_invalid == 1) begin //For effectively handling the flag cases between add,sub,mul and fused mul add
755 ex_overflow = 0;
756 lv_inexact = 0;
757 lv_product_underflow = 0;
758 if(quiet_nan_two == 1 || quiet_nan_three == 1)
759 lv_result_is_invalid = 0;
760 end
761
762 Bit#(5) fflags={lv_result_is_invalid,1'b0,ex_overflow,lv_product_underflow,lv_inexact};
763 `ifdef verbose $display("lv_inv : %b ex_overflow: %b lv_inexact : %b",lv_result_is_invalid,ex_overflow,lv_inexact); `endif
764 ff_final_out <= Floating_output{
765 final_result : lv_final_output,
766 fflags : fflags
767 };
768
769 `ifdef verbose $display("FMA: Result: %h fflags: %8h",lv_final_output, {24'b0,fflags}); `endif
770 endrule
771
772 method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
773
774
775 Bit#(TSub#(fpexp,1)) bias = '1; //Bias for the exponent: 127 for SP and 1023 for DP
776 Bit#(1) sign1 = tpl_1(_operand1);
777 Bit#(1) sign2 = tpl_1(_operand2);
778 Bit#(1) sign3 = tpl_1(_operand3);
779 Bit#(fpexp) lv_exponent1 = tpl_2(_operand1);
780 Bit#(fpexp) lv_exponent2 = tpl_2(_operand2);
781 Bit#(fpexp) lv_exponent3 = tpl_2(_operand3);
782 Bit#(fpman) lv_mantissa1 = tpl_3(_operand1);
783 Bit#(fpman) lv_mantissa2 = tpl_3(_operand2);
784 Bit#(fpman) lv_mantissa3 = tpl_3(_operand3);
785 Bit#(5) flags1 = tpl_1(flags);
786 Bit#(5) flags2 = tpl_2(flags);
787 Bit#(5) flags3 = tpl_3(flags);
788 Bit#(1) lv_op1_is_zero = flags1[3]; //1 when operand1=0
789 Bit#(1) lv_op2_is_zero = flags2[3]; //1 when operand2=0
790 Bit#(1) lv_op1_infinity = flags1[1]; //1 when operand1=inf
791 Bit#(1) lv_op2_infinity = flags2[1]; //1 when operand2=inf
792 Bit#(1) lv_op1_subnormal = flags1[4] | flags1[3]; //1 when operand1 is subnormal
793 Bit#(1) lv_op2_subnormal = flags2[4] | flags2[3]; //1 when operand2 is subnormal
794 Bit#(1) lv_inf = 0; //Bit indicating infinity
795 Bit#(1) lv_inv = 0; //Invalid Bit
796 Bit#(1) lv_zero = 0; //Zero bit
797 bit quiet_nan_two = (flags1[2] & ~flags2[0]) | (flags2[2] & ~flags1[0]);
798
799 if((((flags1[0] | flags1[2])==1) || (flags2[0] | flags2[2])==1)) //If either of the operands are NaN's (Quiet or Signalling - Not distinguishing between them here)
800 lv_inv = 1;
801 else if(lv_op1_infinity==1 || lv_op2_infinity==1) begin //If either of the operands are Infinity
802 if(lv_op1_is_zero == 1 || lv_op2_is_zero ==1) begin //Provided atleast one of the operands are infinity, if either of them are zero, then res is NaN (0*inf)
803 lv_inv = 1;
804 end
805 else begin
806 lv_inf = 1; //Else result is infinity - inf +/- op2 = inf
807 quiet_nan_two = 0;
808 end
809 end
810 else if(lv_op1_is_zero == 1 || lv_op2_is_zero == 1)
811 lv_zero = 1; //If they are not infinity - Checked for Zero, if it is then product is zero (0*x = 0)
812
813
814 `ifdef verbose $display("lv_inv : %h lv_inf : %h lv_zero : %h",lv_inv,lv_inf,lv_zero); `endif
815 `ifdef verbose $display("flags1 : %b flags2 : %b flags3 : %b",flags1,flags2,flags3); `endif
816
817 /*
818 When normal and denormal number is multiplied, exponent is
819 (biased_exponent - bias) + (1 - bias) + bias = biased_exponent - bias + 1;
820 either _operand1[30:23] == 0 or _operand2[30:23] == 0 for the above if condition so no harm in adding both
821 */
822
823 Bit#(fpexp2) exp1_temp = {2'b0,lv_exponent1};
824 Bit#(fpexp2) exp2_temp = {2'b0,lv_exponent2};
825 Bit#(fpexp2) lv_summed_exponent = exp1_temp + exp2_temp - zeroExtend(bias) + zeroExtend(lv_op1_subnormal) + zeroExtend(lv_op2_subnormal);
826 Bit#(1) lv_sign = sign1 ^ sign2;
827
828 `ifdef verbose $display("lv_summed_exponent = %b", lv_summed_exponent/*, lv_actual_exponent*/); `endif
829
830 Bit#(impfpman2) x = zeroExtend({~lv_op1_subnormal, lv_mantissa1})*zeroExtend({~lv_op2_subnormal, lv_mantissa2}); //Single Cycle Int Mul
831 rg_state_handler <= Stage1;
832 ff_input_register<= Input_data_type{
833 product_mantissa : x,
834 lv_summed_exponent : lv_summed_exponent,
835 sign : lv_sign,
836 _operand3 : {sign3,lv_exponent3,lv_mantissa3},
837 rounding_mode : rounding_mode,
838 infinity : lv_inf,
839 add_flags : flags3,
840 invalid : lv_inv,
841 zero : lv_zero,
842 _operation : operation,
843 _negate : _negate,
844 mul : mul,
845 muladd : muladd,
846 quiet_nan_two : quiet_nan_two,
847 inp_denormal : lv_op1_subnormal | lv_op2_subnormal
848 };
849 endmethod
850
851
852 method Floating_output#(fpinp) get_result();
853 return ff_final_out;
854 endmethod
855 method Action flush;
856 wr_flush <= True;
857 endmethod
858 endmodule
859
860
861 module mkTb_fpu_fm_add_sub(Empty);
862
863 Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
864
865 function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z);
866 let s = valueOf(m);
867 let man1 = tpl_1(x);
868 let expo1 = tpl_2(x);
869 let man2 = tpl_1(y);
870 let expo2 = tpl_2(y);
871 let man3 = tpl_1(z);
872 let expo3 = tpl_2(z);
873 Bit#(5) flags1, flags2,flags3;
874 Bool expZ1 = (expo1 == 0);
875 Bool manZ1 = (man1 == 0);
876 Bool expO1 = (expo1 == '1);
877 Bool manO1 = (man1 == '1);
878 Bool topB1 = (man1[s-1] == 1);
879 Bool expZ2 = (expo2 == 0);
880 Bool manZ2 = (man2 == 0);
881 Bool expO2 = (expo2 == '1);
882 Bool manO2 = (man2 == '1);
883 Bool topB2 = (man2[s-1] == 1 && man2 !=0);
884 Bool expZ3 = (expo3 == 0);
885 Bool manZ3 = (man3 == 0);
886 Bool expO3 = (expo3 == '1);
887 Bool manO3 = (man3 == '1);
888 Bool topB3 = (man3[s-1] == 1 && man3 !=0);
889 flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN
890 flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN
891 flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN
892 return tuple3(flags1,flags2,flags3);
893 endfunction
894
895 function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
896 provisos(Add#(TAdd#(m,1),e,n),
897 Add#(7,a__,e)
898 );
899 let expo = valueOf(e);
900 let man = valueOf(m);
901 return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]);
902 endfunction
903
904 function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
905 provisos(Add#(TAdd#(m,1),e,n),
906 Add#(7,a__,e)
907 );
908 let inp = valueOf(n);
909 let man = valueOf(m);
910 return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]);
911 endfunction
912
913 function Bool isNaNBox(Bit#(64) op);
914 return (op[63:32]=='1);
915 endfunction
916
917 function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3);
918 return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000);
919 endfunction
920
921 Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags);
922 Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags);
923 Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa);
924 Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp);
925 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa);
926 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp);
927 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN);
928
929 Reg#(Bit#(32)) rg_clock <-mkReg(0);
930 //Reg#(Bit#(64)) operand1 <- mkReg(64'h17fffffffffff860);
931 //Reg#(Bit#(64)) operand2 <- mkReg(64'h0000000000000200);
932 //Reg#(Bit#(64)) operand3 <- mkReg(64'h000000000000005f);
933 Reg#(Bit#(32)) operand1 <- mkReg(32'h31f36ab4);
934 Reg#(Bit#(32)) operand2 <- mkReg(32'h08835f4d);
935 Reg#(Bit#(32)) operand3 <- mkReg(32'h0);
936
937 rule rl_count_clock ;
938 rg_clock<=rg_clock+1;
939 if(rg_clock=='d20) $finish(0);
940 endrule
941
942 rule rl_input1(rg_clock==1);
943 let {man1,man2,man3} <- getMant32.func(operand1,operand2, operand3);
944 let {exp1,exp2,exp3} <- getExp32.func(operand1,operand2, operand3);
945 let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3));
946 let sign1 = operand1[31];
947 let sign2 = operand2[31];
948 let sign3 = operand3[31];
949 uut._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),3'b0,1'b0,1'b0,1'b0,1'b1,x);
950 `ifdef verbose $display("giving inputs at %0d", rg_clock); `endif
951
952 endrule
953
954 rule rl_finish;
955 let res = uut.get_result();
956 `ifdef verbose $display("Output = %h at %0d",res.final_result[31:0], rg_clock); `endif
957 endrule
958
959 endmodule
960
961 `ifdef fpu_hierarchical
962 (*synthesize*)
963 module mkfpu_fm_add_sub32(Ifc_fpu_fm_add_sub32);
964 Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
965
966 method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
967 uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd,flags);
968 endmethod
969 method Floating_output#(32) get_result();
970 return uut.get_result();
971 endmethod
972 method Action flush;
973 uut.flush();
974 endmethod
975 endmodule
976
977 (*synthesize*)
978 module mkfpu_fm_add_sub64(Ifc_fpu_fm_add_sub64);
979 Ifc_fpu_fm_add_sub#(64,52,11) uut <- mkfpu_fm_add_sub();
980 method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
981 uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd, flags);
982 endmethod
983 method Floating_output#(64) get_result();
984 return uut.get_result();
985 endmethod
986 method Action flush;
987 uut.flush();
988 endmethod
989 endmodule
990 `endif
991
992 //module mkTb_fpu_fm_add_sub_2 (Empty);
993 //
994 //// RegFile #(Bit #(16), Bit #(100)) input_data <- mkRegFileFullLoad("./testcases/fma_inp_nor.txt");
995 //// RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/mul_denormal_testcases.txt");
996 // RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/Add_normal_testcases.hex");
997 // Reg #(Bit #(16)) index <- mkReg(0);
998 //
999 // Ifc_fpu_fm_add_sub#(32,23,8,16) multiplier <- mkfpu_fm_add_sub();
1000 // Reg #(Bit #(32)) state_clock <- mkReg(1);
1001 // Reg #(Bit #(1)) rg_state <- mkReg(0);
1002 //
1003 // Reg#(int) cnt <- mkReg(0); //File Variable
1004 // let fh <- mkReg(InvalidFile) ; //File handler
1005 //
1006 // //rule for file creation
1007 // rule open (cnt == 0 ) ;
1008 // File tb_mul_output <- $fopen("tb_madd_output.hex", "w+");
1009 // fh <= tb_mul_output;
1010 // cnt <= 1 ;
1011 // endrule
1012 //
1013 // rule state_clock_count;
1014 // state_clock <= state_clock + 1;
1015 // endrule
1016 //
1017 // rule take_input_in (rg_state == 0);
1018 // // multiplier._start(input_data.sub(index)[99:68],input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1019 // // multiplier._start(input_data.sub(index)[67:36],input_data.sub(index)[35:4],32'b0,0,input_data.sub(index)[2:0],0,0);
1020 // multiplier._start(32'h3f800000, input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1021 // index <= index + 1;
1022 // rg_state <= 1;
1023 // endrule
1024 //
1025 // rule display_output (rg_state == 1);
1026 // let abc = multiplier.get_result();
1027 // $fwrite(fh, "%h\n", abc.final_result[31:0]);
1028 // rg_state <= 0;
1029 // endrule
1030 //
1031 // rule end_testing (index == 16562);
1032 // $finish(0);
1033 // endrule : end_testing
1034 //
1035 //endmodule
1036
1037 endpackage