2 Authors : Vinod.G, Aditya Govardhan
3 Email : g.vinod1993@gmail.com
4 Last Update : 27th November 2017
5 See LICENSE for more details
6 Paper Reference: Floating Point Fused Multiply-Add Architectures (http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=4487224)
11 package fpu_fm_add_sub;
13 import defined_types::*;
15 import UniqueWrappers::*;
16 `include "defined_parameters.bsv"
19 interface Ifc_fpu_fm_add_sub#(numeric type fpinp, numeric type fpman, numeric type fpexp);
20 method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
21 method Floating_output#(fpinp) get_result();
26 `ifdef fpu_hierarchical
27 interface Ifc_fpu_fm_add_sub32;
28 method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
29 method Floating_output#(32) get_result();
33 interface Ifc_fpu_fm_add_sub64;
34 method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
35 method Floating_output#(64) get_result();
41 Bit#(TMul#(2,TAdd#(fpman,1))) product_mantissa;
42 Bit#(TAdd#(fpexp,2)) lv_summed_exponent; // exponent of the resultant
43 bit sign; // sign bit of the result
44 Bit#(fpinp) _operand3;
45 Bit#(1) invalid; // indicating that the ff_output is NaN.
46 Bit#(1) infinity; // indicating that the ff_output is infinity.
47 Bit#(1) zero; // indicating that the ff_output is zero.
49 Bit#(3) rounding_mode; // static rounding mode encoded in the instruction
50 bit _operation; // bit denoting the operation to be performed 0 - Add, 1 - Sub
51 bit _negate; // bit denoting whether the operands should be negated or not
52 bit mul; // bit denoting whether the operation is mul or not
56 }Input_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
59 Bit#(1) lv_product_sign; //The result of the integer multiplier stage
61 Bit#(TAdd#(fpexp,2)) lv_product_exponent;
62 Bit#(TAdd#(TMul#(2,TAdd#(1,fpman)),1)) lv_product_mantissa;
63 Bit#(fpinp) lv_operand3;
68 Bit#(3) rounding_mode;
69 bit lv_product_is_invalid;
70 bit lv_product_is_zero;
71 bit lv_product_is_infinity;
72 bit lv_product_overflow;
73 bit lv_product_underflow;
76 }Stage2_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
80 bit lv_resultant_sign;
82 Bit#(TAdd#(fpexp,2)) resultant_exponent;
83 Bit#(TAdd#(TMul#(fpman,3),4)) mantissa2;
84 Bit#(TAdd#(TMul#(fpman,3),4)) mantissa3;
85 Bit#(3) rounding_mode;
86 bit result_is_invalid;
87 Bit#(2) result_is_infinity;
88 Bit#(2) result_is_zero;
90 bit product_underflow;
93 bit lv_product_is_zero;
94 }Stage4_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
97 Bit#(TAdd#(fpexp,2)) resultant_exponent;
98 Bit#(TAdd#(TMul#(fpman,3),4)) resultant_mantissa;
99 bit lv_resultant_sign;
100 Bit#(3) lv_rounding_mode;
101 Bit#(2) add_sub_is_zero;
102 bit lv_result_is_invalid;
103 Bit#(2) lv_result_is_infinity;
104 Bit#(2) lv_result_is_zero;
105 bit lv_product_overflow;
106 bit lv_product_underflow;
109 bit lv_product_is_zero;
110 Bit#(TLog#(TAdd#(TAdd#(TMul#(fpman,3),4),1))) lv_zeros_on_left;
111 }Stage5_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
119 } FMA_states deriving (Bits,Eq);
121 module mkfpu_fm_add_sub(Ifc_fpu_fm_add_sub#(fpinp,fpman,fpexp))
123 Add#(TAdd#(fpexp,fpman),1,fpinp),
124 Add#(fpexp,2,fpexp2),
125 Add#(TMul#(fpman,3),4,fmaman),
126 Add#(fpman,1,fpman1),
127 Add#(fpexp,1,fpexp1),
129 Mul#(TAdd#(fpman,1),2,impfpman2),
130 Add#(fpinp,fpinp,fpinp2),
132 Add#(c__, TSub#(fpexp, 1), fpexp1),
134 Add#(e__, fpexp2, fpman),
135 Add#(f__, TSub#(fpexp, 1), fpman),
136 Add#(g__, TAdd#(fpman, 1), fpinp),
137 Add#(h__, TSub#(fpexp, 1), fpexp2),
138 Add#(j__, TLog#(TAdd#(1, fmaman)), fpexp2),
139 Add#(l__, 1, fmaman),
140 Add#(m__, TAdd#(2, fpman), fmaman),
141 Add#(n__,TAdd#(fpman,1),fpinp2),
142 Add#(a__, TMul#(2, TAdd#(1, fpman)), fmaman),
143 Mul#(2, TAdd#(1, fpman), impfpman2),
144 Add#(i__, TAdd#(TMul#(2, TAdd#(1, fpman)), 1), fmaman),
145 Add#(k__, TLog#(TAdd#(1, TAdd#(impfpman2, 1))), fpexp2),
146 Add#(o__, TLog#(TAdd#(1, fmaman)), TAdd#(fpexp, 2)),
147 Log#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1), TLog#(TAdd#(1, fmaman))),
148 Add#(p__, TLog#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1)), TAdd#(fpexp, 2)),
149 Add#(s__, TAdd#(1, TAdd#(f__, fpexp)), fmaman),
150 Add#(t__, TAdd#(f__, fpexp), impfpman2),
151 Add#(q__, TAdd#(fpexp, f__), impfpman2),
152 Add#(r__, TAdd#(1, TAdd#(fpexp, f__)), fmaman)
157 Wire#(Floating_output#(fpinp)) ff_final_out <- mkWire();
158 Reg#(Stage2_data_type#(fpinp,fpman,fpexp)) ff_stage2 <- mkConfigRegU();
159 Reg#(Stage4_data_type#(fpman,fpexp)) ff_stage4 <- mkConfigRegU();
160 Reg#(Stage5_data_type#(fpman,fpexp)) ff_stage5 <- mkConfigRegU();
161 Reg#(Input_data_type#(fpinp,fpman,fpexp)) ff_input_register <- mkConfigRegU();
162 Reg#(FMA_states) rg_state_handler <- mkReg(Begin);
163 Wire#(Bool) wr_flush <- mkDWire(False);
165 function zeroExtendLSB(inp_man) = unpack(reverseBits(extend(reverseBits(pack(inp_man)))));
167 let fPMAN = valueOf(fpman);
168 let fPINP = valueOf(fpinp);
169 let fPEXP = valueOf(fpexp);
170 let iMPFPMAN2 = valueOf(impfpman2);
171 let fMAMAN = valueOf(fmaman);
173 rule rl_flush(wr_flush);
174 rg_state_handler <= Begin;
175 //ff_input_register <= tagged Invalid;
176 //ff_stage2 <= tagged Invalid;
177 //ff_stage4 <= tagged Invalid;
178 //ff_stage5 <= tagged Invalid;
181 rule rl_stage1_after_input_stage(rg_state_handler == Stage1 && !wr_flush);
183 Bit#(impfpman2) x = ff_input_register.product_mantissa;
184 Bit#(TAdd#(impfpman2,1)) lv_product_mantissa = {x[iMPFPMAN2-1:0],1'b0}; //extra zero for 10.xxxx case
185 Bit#(fpexp2) lv_product_exponent = ff_input_register.lv_summed_exponent;
186 Bit#(fpinp) lv_operand3 = ff_input_register._operand3;
187 Bit#(5) add_flags = ff_input_register.add_flags;
188 bit lv_product_underflow = 0;
189 bit lv_product_overflow = 0;
190 let lv_product_is_invalid = ff_input_register.invalid;
191 let lv_product_is_infinity = ff_input_register.infinity;
192 let lv_product_is_zero = ff_input_register.zero;
193 let rounding_mode = ff_input_register.rounding_mode;
194 let operation = ff_input_register._operation;
195 let lv_negate = ff_input_register._negate;
196 let lv_product_sign = ff_input_register.sign;
197 let mul = ff_input_register.mul;
198 let muladd = ff_input_register.muladd;
199 let quiet_nan_two = ff_input_register.quiet_nan_two;
200 bit inp_denormal = ff_input_register.inp_denormal;
201 Bit#(TSub#(fpexp,1)) bias = '1;
202 Int#(fpexp2) lv_actual_product_exponent = unpack(lv_product_exponent - {3'b0,bias});
203 let msb_zeros = pack(countZerosMSB(lv_product_mantissa));
206 //Change-2 Removing Redundant Variables
207 //Bit#(fpman) bias_temp = zeroExtend(bias);
208 //Int#(fpman) lv_actual_product_exponent_temp = signExtend(lv_actual_product_exponent);
209 //`ifdef verbose $display("lv_actual_product_exponent_temp : %d",lv_actual_product_exponent_temp); `endif
210 rg_state_handler <= Stage2;
212 // lv_product_is_subnormal construct is like a flag which can be used in difficult situations
213 // bit lv_product_is_subnormal = 0;
215 bit lv_sticky = lv_product_mantissa[0];
216 `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
219 if exponent is > bias then obviously none of the numbers are subnormal
220 so the product is of the form 1x.xxxx or 01.xxxx
221 the overflow conditions are handled in the following if condition accordingly
224 `ifdef verbose $display("lv_actual_product_exponent = %d",lv_actual_product_exponent); `endif
225 bit exp_overflow_bit = pack(lv_actual_product_exponent)[fPEXP]; //Says if Exponent Overflows
226 bit exp_underflow_bit = pack(lv_actual_product_exponent)[fPEXP+1]; //Says if Exponent Underflows
227 Bit#(fpexp) expo_temp = pack(lv_actual_product_exponent)[fPEXP-1:0];
228 bit exp_and = &(expo_temp); //Says if Exponent is equal to Bias
229 bit is_msb_zeros = |(msb_zeros);
231 //Change-1 -- Reducing the size of the Muxes from EXP size to just a bunch of 1bits and a Or-tree
232 //Change-1 was wrong apparently, according to Paranoia!! Should see why! Rolling back
233 //if((exp_overflow_bit==1'b1 && exp_underflow_bit==1'b0) || (is_msb_zeros==1'b0 && exp_and==1'b1)) begin
234 if(lv_actual_product_exponent > zeroExtend(unpack(bias)) || (msb_zeros == 0 && lv_actual_product_exponent == zeroExtend(unpack(bias)))) begin
235 if(muladd == 0 ||(muladd==1 && ((lv_product_sign^lv_operand3[fPINP-1]^operation) == 0)))
236 lv_product_overflow = 1;
237 //When the product overflows, the FMA result is an overflow
238 `ifdef verbose $display("lv_product_overflow!!!"); `endif
242 -lowest_exp = -denormal_bias -mantissa_size -2
243 -2 is for the implicit bit and the carry bit
244 i.e. if all the bits are shifted out then its an underflow
248 //Thought-1 -- Can something be done to reduce the countZerosMSB and countZerosLSB
249 //Cannot reduce this mux to 1-bit but can reduce size since it's unwanted
250 if(lv_actual_product_exponent < unpack(-zeroExtend(bias)-fromInteger(fPMAN)-1)) begin
251 //if(lv_actual_product_exponent_temp < unpack(-bias_temp-fromInteger(fPMAN)-1)) begin
252 if((muladd == 1'b0 || (muladd==1'b1 && (add_flags[3]==1'b1 || add_flags[4]==1'b1))) && lv_product_is_zero == 1'b0)
253 lv_product_underflow = 1;
254 `ifdef verbose $display("lv_product_underflow!!!"); `endif
257 if msb of product is 1 then the case is 1x.xxxx
258 product is shifted right once to make it 01.xxxx
259 we don't care what is the exponent, just increase it by one
260 actual exponent is also increased by one since exponent is increased by one
261 this increasing of exponent leading to overflow is handled in the overflow case
262 msb_zeros is increased for further arising conditions
264 //Change-4 Using the previously computed msb_zeros. Synthesis will detect this anyhow, but still. Fanout?
265 if(is_msb_zeros==1'b0) begin
266 //if(msb_zeros == 0) begin
267 lv_product_mantissa = lv_product_mantissa >> 1;
268 lv_product_exponent = lv_product_exponent + 1;
269 lv_actual_product_exponent = lv_actual_product_exponent + 1;
270 msb_zeros = msb_zeros + 1;
272 // possible shift is positive when exponent is lesser than -126
274 //Change-5 Possible shift needn't use lv_actual_product_exponent -- It's enough if exponent is used I guess
275 // Int#(fpexp2) possible_shift = 1-zeroExtend(unpack(bias))-(lv_actual_product_exponent);
276 Int#(fpexp2) possible_shift = 1-unpack(lv_product_exponent);
278 //Experiment-1 -- Do all the operations parallely and use the if-else for just assignments
279 lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
280 let lv_product_mantissa_shiftR = (lv_product_mantissa >> pack(possible_shift));
281 //lv_product_mantissa_shiftR = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0] | lv_sticky};
282 let lv_product_exponent_inc_shift = lv_product_exponent + pack(possible_shift);
284 let shift_neg = ~pack(possible_shift)+1;
286 let lv_product_mantissa_shiftL_expo = lv_product_mantissa << (shift_neg);
287 let lv_product_exponent_sub_shift = lv_product_exponent - (shift_neg);
289 let lv_product_mantissa_shiftL_zerosMSB = lv_product_mantissa << (msb_zeros - 1);
290 let lv_product_exponent_sub_zerosMSB = lv_product_exponent - (zeroExtend(msb_zeros) - 1);
294 i) the product is 1x.xxxx and shifted right once
295 ii) the product is 01.xxxx already
296 if possible_shift is negative or zero, it means that exponent is -126 or greater
297 and thus the product is already normalized
298 but if possible_shift is positive, it means that exponent is < -126
299 and thus product is shifted right to make exponent -126 and the result is subnormal
301 if(possible_shift > 0) begin
302 //Setting sticky if all lsb zeros are removed out
304 //Is there a better logic for this? Since, lsb_zeros is a big if-else logic
305 //lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
306 if(possible_shift > unpack(zeroExtend(lsb_zeros)) || lv_product_mantissa[0] == 1)
309 lv_product_mantissa = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0]|lv_sticky};
310 lv_sticky = lv_product_mantissa[0];
311 lv_product_exponent = lv_product_exponent_inc_shift;
313 `ifdef verbose $display("possible_shift",possible_shift); `endif
314 /*if(mul==1 && lv_product_is_zero==0)
315 lv_product_underflow = 1;*/
318 `ifdef verbose $display("lv_product_exponent : %d bin : %b",lv_product_exponent,lv_product_exponent); `endif
319 `ifdef verbose $display("lv_product_mantissa = %b lv_product_exponent : %d since exp < -126", lv_product_mantissa,lv_product_exponent); `endif
320 `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
321 // lv_product_is_subnormal = 1;
325 msb_zeros != 1 means product is of the form 00.xxxx, important case
327 else if(msb_zeros != 'b1) begin
329 if possible shift is < the number of leading zeros then the number can't be made normal
331 if((shift_neg) < zeroExtend(msb_zeros - 1)) begin
332 lv_product_mantissa = lv_product_mantissa_shiftL_expo;
333 lv_product_exponent = lv_product_exponent_sub_shift;
334 // lv_product_is_subnormal = 1;
337 if exponent affords to give away enough such that shifting left leads to 01.xxxx and exponent >= -126
340 lv_product_mantissa = lv_product_mantissa_shiftL_zerosMSB;
341 lv_product_exponent = lv_product_exponent_sub_zerosMSB;
342 // lv_product_is_subnormal = 0;
346 ff_stage2 <= Stage2_data_type{
347 lv_product_sign : lv_product_sign,
348 lv_negate : lv_negate,
349 lv_product_exponent : lv_product_exponent,
350 lv_product_mantissa : lv_product_mantissa,
351 lv_operand3 : lv_operand3,
352 add_flags : add_flags,
353 operation : operation,
356 rounding_mode : rounding_mode,
357 lv_product_is_invalid : lv_product_is_invalid,
358 lv_product_is_zero : lv_product_is_zero,
359 lv_product_is_infinity : lv_product_is_infinity,
360 lv_product_overflow : lv_product_overflow,
361 lv_product_underflow : lv_product_underflow,
362 quiet_nan_two : quiet_nan_two
368 rule rl_stage_3(rg_state_handler == Stage2 && !wr_flush);
370 rg_state_handler <= Stage3;
371 let lv_negate = ff_stage2.lv_negate;
372 let lv_product_exponent = ff_stage2.lv_product_exponent;
373 let lv_product_mantissa = ff_stage2.lv_product_mantissa;
374 let lv_operand3 = ff_stage2.lv_operand3;
375 let add_flags = ff_stage2.add_flags;
376 let operation = ff_stage2.operation;
377 let mul = ff_stage2.mul;
378 let muladd = ff_stage2.muladd;
379 let lv_product_sign = ff_stage2.lv_product_sign;
380 let lv_product_is_invalid = ff_stage2.lv_product_is_invalid;
381 let lv_product_is_zero = ff_stage2.lv_product_is_zero;
382 let lv_rounding_mode = ff_stage2.rounding_mode;
383 let lv_product_is_infinity = ff_stage2.lv_product_is_infinity;
384 let lv_product_overflow = ff_stage2.lv_product_overflow;
385 let lv_product_underflow = ff_stage2.lv_product_underflow;
386 let quiet_nan_two = ff_stage2.quiet_nan_two;
387 let inp_denormal = ff_stage2.inp_denormal;
388 //ff_stage2 <= tagged Invalid;
390 Bit#(1) sign2 = lv_product_sign ^ lv_negate;
391 Bit#(fpexp2) exponent2 = lv_product_exponent;
392 Bit#(fmaman) mantissa2 = zeroExtendLSB(lv_product_mantissa);
393 Bit#(1) sign3 = lv_operand3[fPINP-1] ^ lv_negate;
394 Bit#(fpexp2) exponent3 = {2'b0, lv_operand3[fPINP-2:fPMAN]};
395 Bit#(fmaman) mantissa3 = 0;
396 Bit#(fpman) lv_man3 = lv_operand3[fPMAN-1:0];
397 Bit#(fpexp) lv_exp_max = '1;
398 bit lv_op3_is_invalid = add_flags[2] | add_flags[0];
399 bit lv_op3_is_infinity = add_flags[1];
400 bit lv_op3_is_zero = add_flags[3];
401 bit op3_is_subnormal = add_flags[4];
402 bit quiet_nan_three = add_flags[2];
403 bit expo3_zero = |exponent3;
404 //Change-7 Replaced all instances of operation^sign3 with op_xor_sign3
405 bit op_xor_sign3 = operation ^ sign3;
407 //Change-6 Avoiding exponent3==0 mux, but does that help?
408 if(lv_op3_is_infinity==0 && lv_op3_is_invalid ==0 && lv_op3_is_zero==0) begin
409 mantissa3 = zeroExtendLSB({1'b0,expo3_zero,lv_man3});
410 /* if(exponent3 == '0)
411 mantissa3 = zeroExtendLSB({2'b0,lv_man3});
413 mantissa3 = zeroExtendLSB({2'b01,lv_man3});*/
416 exponent3 = exponent3 + zeroExtend(op3_is_subnormal);
418 Bit#(1) lv_result_is_invalid = 0;
419 Bit#(2) lv_result_is_infinity = 0;
420 Bit#(2) lv_result_is_zero = 0;
422 if(quiet_nan_two == 0 && lv_product_is_invalid == 1)
423 quiet_nan_three = 0; //0*inf case
425 //Result is invalid cases
426 if(lv_op3_is_invalid == 1 || lv_product_is_invalid == 1)
427 lv_result_is_invalid = 1;
429 //Result is zero cases
430 else if(lv_op3_is_zero == 1 && lv_product_is_zero == 1) begin
432 if((lv_rounding_mode == 'b010) && (sign2 | (op_xor_sign3)) == 1)
433 lv_result_is_zero = 2'b11;
434 else if((lv_rounding_mode != 'b010) && (sign2 & (op_xor_sign3)) == 1)
435 lv_result_is_zero = 2'b11;
438 lv_result_is_zero = 2'b01;
440 lv_result_is_zero = {op_xor_sign3,1'b1};
444 lv_result_is_zero = {sign2,1};
447 //Result is infinity cases
448 else if(lv_product_is_infinity == 1 && lv_op3_is_infinity == 1) begin
449 lv_result_is_infinity = {sign2, ~(sign2 ^ (op_xor_sign3))};
450 lv_result_is_invalid = ~lv_result_is_infinity[0];
451 quiet_nan_two = 0; //inf * qNaN + inf case
453 else if(lv_product_is_infinity == 1 || lv_op3_is_infinity == 1) begin
454 lv_result_is_infinity = {((lv_product_is_infinity & ~lv_op3_is_infinity) & sign2) | ((~lv_product_is_infinity & lv_op3_is_infinity) & (op_xor_sign3)), 1};
456 if(lv_product_is_zero == 1) begin
461 Bit#(fpexp2) lv_minuend, lv_subtrahend;
462 Bit#(fpexp2) exponent_difference = '0;
463 Bit#(fpexp2) resultant_exponent = '0;
466 Bit#(fmaman) mantissa_to_shift;
467 let lv_zeros_on_right;
470 if(exponent2 > exponent3) begin
471 lv_minuend = exponent2;
472 lv_subtrahend = exponent3;
473 mantissa_to_shift = mantissa3;
477 lv_minuend = exponent3;
478 lv_subtrahend = exponent2;
479 mantissa_to_shift = mantissa2;
483 resultant_exponent = lv_minuend;
484 exponent_difference = lv_minuend - lv_subtrahend;
485 lv_zeros_on_right = zeroExtend(pack(countZerosLSB(mantissa_to_shift)));
486 Bit#(1) shifted_operand_zero = (mantissa_to_shift == '0) ? 1:0;
487 mantissa_to_shift = mantissa_to_shift >> exponent_difference;
490 if(((lv_zeros_on_right < exponent_difference) || (mantissa_to_shift[0] == 1)) && shifted_operand_zero != 1)
493 mantissa_to_shift = {mantissa_to_shift[fMAMAN-1:1], lv_sticky};
495 if(op2_gt_op3 == 1) begin
496 mantissa3 = mantissa_to_shift;
499 mantissa2 = mantissa_to_shift;
501 quiet_nan_two = quiet_nan_two & ~add_flags[0];
502 `ifdef verbose $display("sign2 = %b exponent2 = %b mantissa2 = %b", sign2, resultant_exponent, mantissa2); `endif
503 `ifdef verbose $display("sign3 = %b exponent3 = %b mantissa3 = %b", sign3, resultant_exponent, mantissa3); `endif
504 `ifdef verbose $display(); `endif
505 bit man2_gt_man3 = 0;
506 if(mantissa2 > mantissa3) man2_gt_man3 = 1; //Can this be optimized?
507 bit lv_resultant_sign = (man2_gt_man3 & sign2) | (~man2_gt_man3 & (operation ^ sign3)); // Using Karnaugh maps
508 bit actual_operation = sign2 ^ (operation ^ sign3); // 0 for addition 1 for subtraction //Can this be pushed back to the prev. stage, saving 1 bit
510 ff_stage4<= Stage4_data_type{
511 lv_resultant_sign : lv_resultant_sign,
512 actual_operation : actual_operation,
513 mantissa2 : mantissa2,
514 mantissa3 : mantissa3,
515 man2_gt_man3 : man2_gt_man3,
516 resultant_exponent : resultant_exponent,
517 rounding_mode : lv_rounding_mode,
518 result_is_invalid : lv_result_is_invalid,
519 result_is_infinity : lv_result_is_infinity,
520 result_is_zero : lv_result_is_zero,
521 product_overflow : lv_product_overflow,
522 product_underflow : lv_product_underflow,
523 quiet_nan_two : quiet_nan_two,
524 quiet_nan_three : quiet_nan_three,
525 lv_product_is_zero : lv_product_is_zero
529 rule rl_stage4(rg_state_handler == Stage3 && !wr_flush);
530 rg_state_handler <= Stage4;
531 let lv_resultant_sign = ff_stage4.lv_resultant_sign;
532 let man2_gt_man3 = ff_stage4.man2_gt_man3;
533 let mantissa2 = ff_stage4.mantissa2;
534 let mantissa3 = ff_stage4.mantissa3;
535 let actual_operation = ff_stage4.actual_operation;
536 Bit#(fpexp2) resultant_exponent = ff_stage4.resultant_exponent;
537 Bit#(3) lv_rounding_mode = ff_stage4.rounding_mode;
538 bit lv_result_is_invalid = ff_stage4.result_is_invalid;
539 Bit#(2) lv_result_is_infinity = ff_stage4.result_is_infinity;
540 Bit#(2) lv_result_is_zero = ff_stage4.result_is_zero;
541 bit lv_product_overflow = ff_stage4.product_overflow;
542 let quiet_nan_two = ff_stage4.quiet_nan_two;
543 let quiet_nan_three = ff_stage4.quiet_nan_three;
544 let lv_product_underflow = ff_stage4.product_underflow;
545 let lv_product_is_zero = ff_stage4.lv_product_is_zero;
546 //ff_stage4 <= tagged Invalid;
548 Bit#(fmaman) resultant_mantissa = 0;
549 Bit#(fmaman) add_mantissa = mantissa2 + mantissa3;
552 Bit#(fmaman) sub_mantissa1 = (man2_gt_man3==1)? mantissa2 : mantissa3;
553 Bit#(fmaman) sub_mantissa2 = (man2_gt_man3==1)? mantissa3 : mantissa2;
554 Bit#(fmaman) sub_mantissa = sub_mantissa1 - sub_mantissa2;
557 if(actual_operation == 0)
558 resultant_mantissa = add_mantissa;
560 resultant_mantissa = sub_mantissa;
562 //Case when Mantissa2 = Mantissa3 and hence the result is zero
563 Bit#(2) add_sub_is_zero = 0;
565 if(resultant_mantissa == '0) begin
566 if(lv_rounding_mode == 3'b010) begin
567 add_sub_is_zero = 2'b11;
570 add_sub_is_zero = 2'b01; // checks the resultant mantissa for zero
574 let lv_zeros_on_left = pack(countZerosMSB(resultant_mantissa));
575 ff_stage5 <= Stage5_data_type{
576 resultant_mantissa : resultant_mantissa,
577 add_sub_is_zero : add_sub_is_zero,
578 lv_resultant_sign : lv_resultant_sign,
579 resultant_exponent : resultant_exponent,
580 lv_rounding_mode : lv_rounding_mode,
581 lv_result_is_invalid : lv_result_is_invalid,
582 lv_result_is_infinity : lv_result_is_infinity,
583 lv_result_is_zero : lv_result_is_zero,
584 lv_product_overflow : lv_product_overflow,
585 quiet_nan_two : quiet_nan_two,
586 quiet_nan_three : quiet_nan_three,
587 lv_product_underflow : lv_product_underflow,
588 lv_product_is_zero : lv_product_is_zero,
589 lv_zeros_on_left : lv_zeros_on_left
596 rule rl_stage_5_final_stage(rg_state_handler == Stage4 && !wr_flush);
597 Bit#(fmaman) resultant_mantissa = ff_stage5.resultant_mantissa;
598 let add_sub_is_zero = ff_stage5.add_sub_is_zero;
599 let resultant_exponent = ff_stage5.resultant_exponent;
600 let lv_resultant_sign = ff_stage5.lv_resultant_sign;
601 let lv_rounding_mode = ff_stage5.lv_rounding_mode;
602 let lv_result_is_invalid = ff_stage5.lv_result_is_invalid;
603 let lv_result_is_infinity = ff_stage5.lv_result_is_infinity;
604 let lv_result_is_zero = ff_stage5.lv_result_is_zero;
605 let lv_product_overflow = ff_stage5.lv_product_overflow;
606 let quiet_nan_two = ff_stage5.quiet_nan_two;
607 let quiet_nan_three = ff_stage5.quiet_nan_three;
608 let lv_product_underflow = ff_stage5.lv_product_underflow;
609 let lv_product_is_zero = ff_stage5.lv_product_is_zero;
610 let lv_zeros_on_left = ff_stage5.lv_zeros_on_left;
611 bit add_sub_subnormal = 0;
612 //ff_stage5 <= tagged Invalid;
613 rg_state_handler <= Begin;
614 bit lv_sticky = resultant_mantissa[0];
617 let resultant_exponent_sub = resultant_exponent -1;
618 let resultant_mantissa_unnormalized = resultant_mantissa >> 1;
619 let resultant_exponent_inc = resultant_exponent + 1;
620 let resultant_mantissa_norm_expo = resultant_mantissa << resultant_exponent_sub;
621 let resultant_mantissa_norm_zerosMSB = resultant_mantissa << (lv_zeros_on_left - 1);
622 let resultant_exponent_sub_zerosMSB = resultant_exponent - ((zeroExtend(lv_zeros_on_left)) - 1);
625 if(resultant_mantissa[fMAMAN-1] == 1'b1) begin
626 //resultant_mantissa = resultant_mantissa >> 1;
627 resultant_mantissa = {resultant_mantissa_unnormalized[fMAMAN-1:1], lv_sticky | resultant_mantissa_unnormalized[0]};
628 resultant_exponent = resultant_exponent_inc;
629 //resultant_exponent = resultant_exponent + 1;
632 else if(resultant_mantissa[fMAMAN-2] != 1'b1) begin
633 if((zeroExtend(lv_zeros_on_left) - 1) > resultant_exponent_sub) begin
634 //if((zeroExtend(lv_zeros_on_left) - 1) > (resultant_exponent - 1)) begin
635 `ifdef verbose $display("resultant_exponent : %d",resultant_exponent); `endif
636 //resultant_mantissa = resultant_mantissa << (resultant_exponent - 1);
637 resultant_mantissa = resultant_mantissa_norm_expo;
638 resultant_exponent = 0;
639 `ifdef verbose $display("add_sub subnormal!!!"); `endif
640 add_sub_subnormal = 1;
643 //resultant_mantissa = resultant_mantissa << (lv_zeros_on_left - 1);
644 //resultant_exponent = resultant_exponent - (zeroExtend(lv_zeros_on_left) - 1);
645 resultant_mantissa = resultant_mantissa_norm_zerosMSB;
646 resultant_exponent = resultant_exponent_sub_zerosMSB;
651 `ifdef verbose $display("resultant_exponent : %b",resultant_exponent); `endif
652 Bit#(TSub#(fpexp,1)) bias = '1;
654 Int#(fpexp2) res_exp_int = unpack(resultant_exponent) - zeroExtend(unpack(bias));
655 `ifdef verbose $display("resultant_exponent : %d res_exp_int : %d",resultant_exponent, res_exp_int); `endif
657 if(res_exp_int > zeroExtend(unpack(bias))) begin
658 lv_product_overflow = 1;
661 /* else if(res_exp_int == zeroExtend(unpack(bias)))
663 else if(resultant_exponent[fPEXP+1] == 1 && lv_product_is_zero == 0) begin
664 lv_product_underflow = 1;
665 `ifdef verbose $display("Underflow"); `endif
667 /*`ifdef verbose $display("resultant_sign = %b resultant_exponent = %b resultant_mantissa = %b", resultant_sign, resultant_exponent, resultant_mantissa); `endif
668 `ifdef verbose $display(); `endif
670 Bit#(TAdd#(fpman,2)) lv_rounded_mantissa = resultant_mantissa[fMAMAN-1:iMPFPMAN2];
671 Bit#(2) lv_res_man = resultant_mantissa[fMAMAN-1:fMAMAN-2];
672 Bit#(TSub#(impfpman2,2)) lv_res1 = resultant_mantissa[iMPFPMAN2-3:0];
673 bit lv_guard = resultant_mantissa[iMPFPMAN2-1];
674 bit lv_round = resultant_mantissa[iMPFPMAN2-2];
675 lv_sticky = |lv_res1;
677 bit lv_inexact = lv_guard | lv_round | lv_sticky;
679 if(lv_rounding_mode == 'b000)
680 lv_round_up = lv_guard & (resultant_mantissa[iMPFPMAN2] | lv_round | lv_sticky);
681 else if(lv_rounding_mode == 'b100)
682 lv_round_up = lv_guard ;//& (lv_round | lv_sticky | ~lv_resultant_sign);
683 else if(lv_rounding_mode == 'b010)
684 lv_round_up = lv_inexact & (lv_resultant_sign);
685 else if(lv_rounding_mode == 'b011)
686 lv_round_up = lv_inexact & (~lv_resultant_sign);
688 if(add_sub_subnormal == 1 && lv_inexact == 1)
689 lv_product_underflow = 1;
691 `ifdef verbose $display("lv_guard = %b lv_round = %b lv_sticky = %b", lv_guard, lv_round, lv_sticky); `endif
692 `ifdef verbose $display("lv_round_up = %b", lv_round_up); `endif
693 `ifdef verbose $display("lv_rounded_mantissa = %b", lv_rounded_mantissa); `endif
696 lv_rounded_mantissa = lv_rounded_mantissa + 1;
698 `ifdef verbose $display("lv_rounded_mantissa = %b after roundup", lv_rounded_mantissa); `endif
700 if(lv_rounded_mantissa[fPMAN+1] == 1) begin
701 resultant_exponent = resultant_exponent + 1;
702 lv_rounded_mantissa = lv_rounded_mantissa >> 1;
704 else if(lv_res_man == 'b0 && lv_rounded_mantissa[fPMAN] == 1) begin
705 resultant_exponent = resultant_exponent + 1;
708 Bit#(fpexp) lv_res_exp_temp = resultant_exponent[fPEXP-1:0];
709 Bit#(fpman) man_all_zeros = '0;
710 Bit#(TSub#(fpman,1)) man1_all_zeros = '0;
711 Bit#(fpman) man_all_ones = '1;
712 Bit#(fpexp) exp_all_zeros = '0;
713 Bit#(TSub#(fpexp,1)) exp_all_ones_1 = '1;
714 Bit#(fpinp) lv_final_output = 0;
715 Bit#(fpexp) exp_all_ones = '1;
716 Bit#(fpexp) out_exp = resultant_exponent[fPEXP-1:0];
717 Bit#(fpman) out_man = lv_rounded_mantissa[fPMAN-1:0];
720 //Can I put these invalid, infinity, zero, cases in the first stage which will clear some of the paths????
721 if(lv_result_is_invalid == 1) begin
722 lv_final_output = {1'b0, exp_all_ones,1'b1, man1_all_zeros};
724 else if(lv_result_is_infinity[0] == 1) begin
725 lv_final_output = {lv_result_is_infinity[1], exp_all_ones, man_all_zeros};
726 ex_overflow = 0; lv_product_underflow = 0; lv_inexact = 0;
728 else if(lv_result_is_zero[0] == 1) begin
729 lv_final_output = {lv_result_is_zero[1],exp_all_zeros, man_all_zeros};
731 else if(add_sub_is_zero[0] == 1) begin
732 lv_final_output = {add_sub_is_zero[1], exp_all_zeros , man_all_zeros};
734 else if(lv_product_overflow == 1 || lv_res_exp_temp == '1) begin
737 if(lv_rounding_mode == 'b001)
738 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
739 else if(lv_rounding_mode == 'b010 && lv_resultant_sign == 0)
740 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
741 else if(lv_rounding_mode == 'b011 && lv_resultant_sign == 1)
742 lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
744 lv_final_output={lv_resultant_sign,exp_all_ones,man_all_zeros};
748 lv_final_output = {lv_resultant_sign, out_exp, out_man};
751 if(lv_product_underflow == 1'b1 && lv_rounded_mantissa[fPMAN]==1'b1 && lv_rounding_mode!=3'b011) //Tininess vanishing after rounding
752 lv_product_underflow = 0;
754 if(lv_result_is_invalid == 1) begin //For effectively handling the flag cases between add,sub,mul and fused mul add
757 lv_product_underflow = 0;
758 if(quiet_nan_two == 1 || quiet_nan_three == 1)
759 lv_result_is_invalid = 0;
762 Bit#(5) fflags={lv_result_is_invalid,1'b0,ex_overflow,lv_product_underflow,lv_inexact};
763 `ifdef verbose $display("lv_inv : %b ex_overflow: %b lv_inexact : %b",lv_result_is_invalid,ex_overflow,lv_inexact); `endif
764 ff_final_out <= Floating_output{
765 final_result : lv_final_output,
769 `ifdef verbose $display("FMA: Result: %h fflags: %8h",lv_final_output, {24'b0,fflags}); `endif
772 method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
775 Bit#(TSub#(fpexp,1)) bias = '1; //Bias for the exponent: 127 for SP and 1023 for DP
776 Bit#(1) sign1 = tpl_1(_operand1);
777 Bit#(1) sign2 = tpl_1(_operand2);
778 Bit#(1) sign3 = tpl_1(_operand3);
779 Bit#(fpexp) lv_exponent1 = tpl_2(_operand1);
780 Bit#(fpexp) lv_exponent2 = tpl_2(_operand2);
781 Bit#(fpexp) lv_exponent3 = tpl_2(_operand3);
782 Bit#(fpman) lv_mantissa1 = tpl_3(_operand1);
783 Bit#(fpman) lv_mantissa2 = tpl_3(_operand2);
784 Bit#(fpman) lv_mantissa3 = tpl_3(_operand3);
785 Bit#(5) flags1 = tpl_1(flags);
786 Bit#(5) flags2 = tpl_2(flags);
787 Bit#(5) flags3 = tpl_3(flags);
788 Bit#(1) lv_op1_is_zero = flags1[3]; //1 when operand1=0
789 Bit#(1) lv_op2_is_zero = flags2[3]; //1 when operand2=0
790 Bit#(1) lv_op1_infinity = flags1[1]; //1 when operand1=inf
791 Bit#(1) lv_op2_infinity = flags2[1]; //1 when operand2=inf
792 Bit#(1) lv_op1_subnormal = flags1[4] | flags1[3]; //1 when operand1 is subnormal
793 Bit#(1) lv_op2_subnormal = flags2[4] | flags2[3]; //1 when operand2 is subnormal
794 Bit#(1) lv_inf = 0; //Bit indicating infinity
795 Bit#(1) lv_inv = 0; //Invalid Bit
796 Bit#(1) lv_zero = 0; //Zero bit
797 bit quiet_nan_two = (flags1[2] & ~flags2[0]) | (flags2[2] & ~flags1[0]);
799 if((((flags1[0] | flags1[2])==1) || (flags2[0] | flags2[2])==1)) //If either of the operands are NaN's (Quiet or Signalling - Not distinguishing between them here)
801 else if(lv_op1_infinity==1 || lv_op2_infinity==1) begin //If either of the operands are Infinity
802 if(lv_op1_is_zero == 1 || lv_op2_is_zero ==1) begin //Provided atleast one of the operands are infinity, if either of them are zero, then res is NaN (0*inf)
806 lv_inf = 1; //Else result is infinity - inf +/- op2 = inf
810 else if(lv_op1_is_zero == 1 || lv_op2_is_zero == 1)
811 lv_zero = 1; //If they are not infinity - Checked for Zero, if it is then product is zero (0*x = 0)
814 `ifdef verbose $display("lv_inv : %h lv_inf : %h lv_zero : %h",lv_inv,lv_inf,lv_zero); `endif
815 `ifdef verbose $display("flags1 : %b flags2 : %b flags3 : %b",flags1,flags2,flags3); `endif
818 When normal and denormal number is multiplied, exponent is
819 (biased_exponent - bias) + (1 - bias) + bias = biased_exponent - bias + 1;
820 either _operand1[30:23] == 0 or _operand2[30:23] == 0 for the above if condition so no harm in adding both
823 Bit#(fpexp2) exp1_temp = {2'b0,lv_exponent1};
824 Bit#(fpexp2) exp2_temp = {2'b0,lv_exponent2};
825 Bit#(fpexp2) lv_summed_exponent = exp1_temp + exp2_temp - zeroExtend(bias) + zeroExtend(lv_op1_subnormal) + zeroExtend(lv_op2_subnormal);
826 Bit#(1) lv_sign = sign1 ^ sign2;
828 `ifdef verbose $display("lv_summed_exponent = %b", lv_summed_exponent/*, lv_actual_exponent*/); `endif
830 Bit#(impfpman2) x = zeroExtend({~lv_op1_subnormal, lv_mantissa1})*zeroExtend({~lv_op2_subnormal, lv_mantissa2}); //Single Cycle Int Mul
831 rg_state_handler <= Stage1;
832 ff_input_register<= Input_data_type{
833 product_mantissa : x,
834 lv_summed_exponent : lv_summed_exponent,
836 _operand3 : {sign3,lv_exponent3,lv_mantissa3},
837 rounding_mode : rounding_mode,
842 _operation : operation,
846 quiet_nan_two : quiet_nan_two,
847 inp_denormal : lv_op1_subnormal | lv_op2_subnormal
852 method Floating_output#(fpinp) get_result();
861 module mkTb_fpu_fm_add_sub(Empty);
863 Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
865 function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z);
868 let expo1 = tpl_2(x);
870 let expo2 = tpl_2(y);
872 let expo3 = tpl_2(z);
873 Bit#(5) flags1, flags2,flags3;
874 Bool expZ1 = (expo1 == 0);
875 Bool manZ1 = (man1 == 0);
876 Bool expO1 = (expo1 == '1);
877 Bool manO1 = (man1 == '1);
878 Bool topB1 = (man1[s-1] == 1);
879 Bool expZ2 = (expo2 == 0);
880 Bool manZ2 = (man2 == 0);
881 Bool expO2 = (expo2 == '1);
882 Bool manO2 = (man2 == '1);
883 Bool topB2 = (man2[s-1] == 1 && man2 !=0);
884 Bool expZ3 = (expo3 == 0);
885 Bool manZ3 = (man3 == 0);
886 Bool expO3 = (expo3 == '1);
887 Bool manO3 = (man3 == '1);
888 Bool topB3 = (man3[s-1] == 1 && man3 !=0);
889 flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN
890 flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN
891 flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN
892 return tuple3(flags1,flags2,flags3);
895 function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
896 provisos(Add#(TAdd#(m,1),e,n),
899 let expo = valueOf(e);
900 let man = valueOf(m);
901 return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]);
904 function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
905 provisos(Add#(TAdd#(m,1),e,n),
908 let inp = valueOf(n);
909 let man = valueOf(m);
910 return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]);
913 function Bool isNaNBox(Bit#(64) op);
914 return (op[63:32]=='1);
917 function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3);
918 return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000);
921 Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags32 <- mkUniqueWrapper3(condFlags);
922 Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5))) condFlags64 <- mkUniqueWrapper3(condFlags);
923 Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23))) getMant32 <- mkUniqueWrapper3(getMantissa);
924 Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8))) getExp32 <- mkUniqueWrapper3(getExp);
925 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52))) getMant64 <- mkUniqueWrapper3(getMantissa);
926 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11))) getExp64 <- mkUniqueWrapper3(getExp);
927 Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32))) setCanonicalNaN <- mkUniqueWrapper3(setCanNaN);
929 Reg#(Bit#(32)) rg_clock <-mkReg(0);
930 //Reg#(Bit#(64)) operand1 <- mkReg(64'h17fffffffffff860);
931 //Reg#(Bit#(64)) operand2 <- mkReg(64'h0000000000000200);
932 //Reg#(Bit#(64)) operand3 <- mkReg(64'h000000000000005f);
933 Reg#(Bit#(32)) operand1 <- mkReg(32'h31f36ab4);
934 Reg#(Bit#(32)) operand2 <- mkReg(32'h08835f4d);
935 Reg#(Bit#(32)) operand3 <- mkReg(32'h0);
937 rule rl_count_clock ;
938 rg_clock<=rg_clock+1;
939 if(rg_clock=='d20) $finish(0);
942 rule rl_input1(rg_clock==1);
943 let {man1,man2,man3} <- getMant32.func(operand1,operand2, operand3);
944 let {exp1,exp2,exp3} <- getExp32.func(operand1,operand2, operand3);
945 let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3));
946 let sign1 = operand1[31];
947 let sign2 = operand2[31];
948 let sign3 = operand3[31];
949 uut._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),3'b0,1'b0,1'b0,1'b0,1'b1,x);
950 `ifdef verbose $display("giving inputs at %0d", rg_clock); `endif
955 let res = uut.get_result();
956 `ifdef verbose $display("Output = %h at %0d",res.final_result[31:0], rg_clock); `endif
961 `ifdef fpu_hierarchical
963 module mkfpu_fm_add_sub32(Ifc_fpu_fm_add_sub32);
964 Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
966 method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
967 uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd,flags);
969 method Floating_output#(32) get_result();
970 return uut.get_result();
978 module mkfpu_fm_add_sub64(Ifc_fpu_fm_add_sub64);
979 Ifc_fpu_fm_add_sub#(64,52,11) uut <- mkfpu_fm_add_sub();
980 method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
981 uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd, flags);
983 method Floating_output#(64) get_result();
984 return uut.get_result();
992 //module mkTb_fpu_fm_add_sub_2 (Empty);
994 //// RegFile #(Bit #(16), Bit #(100)) input_data <- mkRegFileFullLoad("./testcases/fma_inp_nor.txt");
995 //// RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/mul_denormal_testcases.txt");
996 // RegFile #(Bit #(16), Bit #(68)) input_data <- mkRegFileFullLoad("./testcases/Add_normal_testcases.hex");
997 // Reg #(Bit #(16)) index <- mkReg(0);
999 // Ifc_fpu_fm_add_sub#(32,23,8,16) multiplier <- mkfpu_fm_add_sub();
1000 // Reg #(Bit #(32)) state_clock <- mkReg(1);
1001 // Reg #(Bit #(1)) rg_state <- mkReg(0);
1003 // Reg#(int) cnt <- mkReg(0); //File Variable
1004 // let fh <- mkReg(InvalidFile) ; //File handler
1006 // //rule for file creation
1007 // rule open (cnt == 0 ) ;
1008 // File tb_mul_output <- $fopen("tb_madd_output.hex", "w+");
1009 // fh <= tb_mul_output;
1013 // rule state_clock_count;
1014 // state_clock <= state_clock + 1;
1017 // rule take_input_in (rg_state == 0);
1018 // // multiplier._start(input_data.sub(index)[99:68],input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1019 // // multiplier._start(input_data.sub(index)[67:36],input_data.sub(index)[35:4],32'b0,0,input_data.sub(index)[2:0],0,0);
1020 // multiplier._start(32'h3f800000, input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1021 // index <= index + 1;
1025 // rule display_output (rg_state == 1);
1026 // let abc = multiplier.get_result();
1027 // $fwrite(fh, "%h\n", abc.final_result[31:0]);
1031 // rule end_testing (index == 16562);
1033 // endrule : end_testing