src/core/fpu/fpu_fm_add_sub.bsv

   1 /*
   2 Authors     : Vinod.G, Aditya Govardhan
   3 Email       : g.vinod1993@gmail.com
   4 Last Update : 27th November 2017
   5 See LICENSE for more details
   6 Paper Reference: Floating Point Fused Multiply-Add Architectures (http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=4487224)
   7 Description:
   8 TODO
   9 */
  10
  11 package fpu_fm_add_sub;
  12 import DReg::*;
  13 import defined_types::*;
  14 import RegFile::*;
  15 import UniqueWrappers::*;
  16 `include "defined_parameters.bsv"
  17 import ConfigReg::*;
  18
  19 interface Ifc_fpu_fm_add_sub#(numeric type fpinp, numeric type fpman, numeric type fpexp);
  20     method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
  21         method Floating_output#(fpinp) get_result();
  22     method Action flush;
  23 endinterface
  24
  25
  26 `ifdef fpu_hierarchical
  27     interface Ifc_fpu_fm_add_sub32;
  28         method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
  29         method Floating_output#(32) get_result();
  30         method Action flush;
  31     endinterface
  32
  33     interface Ifc_fpu_fm_add_sub64;
  34         method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
  35         method Floating_output#(64) get_result();
  36         method Action flush;
  37     endinterface
  38 `endif
  39
  40 typedef struct{
  41     Bit#(TMul#(2,TAdd#(fpman,1))) product_mantissa;
  42     Bit#(TAdd#(fpexp,2)) lv_summed_exponent;           // exponent of the resultant
  43     bit sign;                                          // sign bit of the result
  44     Bit#(fpinp) _operand3;
  45     Bit#(1) invalid;                                   // indicating that the ff_output is NaN.
  46     Bit#(1) infinity;                                  // indicating that the ff_output is infinity.
  47     Bit#(1) zero;                                      // indicating that the ff_output is zero.
  48     Bit#(5) add_flags;
  49     Bit#(3) rounding_mode;                             // static rounding mode encoded in the instruction
  50     bit _operation;                                    // bit denoting the operation to be performed 0 - Add, 1 - Sub
  51     bit _negate;                                       // bit denoting whether the operands should be negated or not
  52     bit mul;                                           // bit denoting whether the operation is mul or not
  53     bit muladd;
  54     bit quiet_nan_two;
  55     bit inp_denormal;
  56 }Input_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
  57
  58 typedef struct{
  59     Bit#(1) lv_product_sign;                 //The result of the integer multiplier stage
  60     Bit#(1) lv_negate;
  61     Bit#(TAdd#(fpexp,2)) lv_product_exponent;
  62     Bit#(TAdd#(TMul#(2,TAdd#(1,fpman)),1)) lv_product_mantissa;
  63     Bit#(fpinp) lv_operand3;
  64     Bit#(5) add_flags;
  65     bit operation;
  66     bit mul;
  67     bit muladd;
  68     Bit#(3) rounding_mode;
  69     bit lv_product_is_invalid;
  70     bit lv_product_is_zero;
  71     bit lv_product_is_infinity;
  72     bit lv_product_overflow;
  73     bit lv_product_underflow;
  74     bit quiet_nan_two;
  75     bit inp_denormal;
  76 }Stage2_data_type #(numeric type fpinp, numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
  77
  78 typedef struct{
  79     bit actual_operation;
  80     bit lv_resultant_sign;
  81     bit man2_gt_man3;
  82     Bit#(TAdd#(fpexp,2)) resultant_exponent;
  83     Bit#(TAdd#(TMul#(fpman,3),4)) mantissa2;
  84     Bit#(TAdd#(TMul#(fpman,3),4)) mantissa3;
  85     Bit#(3) rounding_mode;
  86     bit result_is_invalid;
  87     Bit#(2) result_is_infinity;
  88     Bit#(2) result_is_zero;
  89     bit product_overflow;
  90     bit product_underflow;
  91     bit quiet_nan_two;
  92     bit quiet_nan_three;
  93     bit lv_product_is_zero;
  94 }Stage4_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
  95
  96 typedef struct{
  97     Bit#(TAdd#(fpexp,2)) resultant_exponent;
  98     Bit#(TAdd#(TMul#(fpman,3),4)) resultant_mantissa;
  99     bit lv_resultant_sign;
 100     Bit#(3) lv_rounding_mode;
 101     Bit#(2) add_sub_is_zero;
 102     bit lv_result_is_invalid;
 103     Bit#(2) lv_result_is_infinity;
 104     Bit#(2) lv_result_is_zero;
 105     bit lv_product_overflow;
 106     bit lv_product_underflow;
 107     bit quiet_nan_two;
 108     bit quiet_nan_three;
 109     bit lv_product_is_zero;
 110     Bit#(TLog#(TAdd#(TAdd#(TMul#(fpman,3),4),1))) lv_zeros_on_left;
 111 }Stage5_data_type #(numeric type fpman, numeric type fpexp) deriving (Bits,Eq);
 112
 113     typedef enum{
 114                  Begin,
 115                  Stage1,
 116                  Stage2,
 117                  Stage3,
 118                  Stage4
 119                 } FMA_states deriving (Bits,Eq);
 120
 121 module mkfpu_fm_add_sub(Ifc_fpu_fm_add_sub#(fpinp,fpman,fpexp))
 122     provisos(
 123              Add#(TAdd#(fpexp,fpman),1,fpinp),
 124              Add#(fpexp,2,fpexp2),
 125              Add#(TMul#(fpman,3),4,fmaman),
 126              Add#(fpman,1,fpman1),
 127              Add#(fpexp,1,fpexp1),
 128              Add#(b__,fpinp,64),
 129              Mul#(TAdd#(fpman,1),2,impfpman2),
 130              Add#(fpinp,fpinp,fpinp2),
 131              //per request of bsc
 132              Add#(c__, TSub#(fpexp, 1), fpexp1),
 133              Add#(d__,1,fpexp2),
 134              Add#(e__, fpexp2, fpman),
 135              Add#(f__, TSub#(fpexp, 1), fpman),
 136              Add#(g__, TAdd#(fpman, 1), fpinp),
 137              Add#(h__, TSub#(fpexp, 1), fpexp2),
 138              Add#(j__, TLog#(TAdd#(1, fmaman)), fpexp2),
 139              Add#(l__, 1, fmaman),
 140              Add#(m__, TAdd#(2, fpman), fmaman),
 141              Add#(n__,TAdd#(fpman,1),fpinp2),
 142              Add#(a__, TMul#(2, TAdd#(1, fpman)), fmaman),
 143              Mul#(2, TAdd#(1, fpman), impfpman2),
 144              Add#(i__, TAdd#(TMul#(2, TAdd#(1, fpman)), 1), fmaman),
 145              Add#(k__, TLog#(TAdd#(1, TAdd#(impfpman2, 1))), fpexp2),
 146              Add#(o__, TLog#(TAdd#(1, fmaman)), TAdd#(fpexp, 2)),
 147              Log#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1), TLog#(TAdd#(1, fmaman))),
 148              Add#(p__, TLog#(TAdd#(TAdd#(TMul#(fpman, 3), 4), 1)), TAdd#(fpexp, 2)),
 149                                                  Add#(s__, TAdd#(1, TAdd#(f__, fpexp)), fmaman),
 150                                                  Add#(t__, TAdd#(f__, fpexp), impfpman2),
 151                                                  Add#(q__, TAdd#(fpexp, f__), impfpman2),
 152                                                  Add#(r__, TAdd#(1, TAdd#(fpexp, f__)), fmaman)
 153              );
 154
 155
 156
 157     Wire#(Floating_output#(fpinp))                            ff_final_out        <-   mkWire();
 158     Reg#(Stage2_data_type#(fpinp,fpman,fpexp))                ff_stage2           <-   mkConfigRegU();
 159     Reg#(Stage4_data_type#(fpman,fpexp))                      ff_stage4           <-   mkConfigRegU();
 160     Reg#(Stage5_data_type#(fpman,fpexp))                      ff_stage5           <-   mkConfigRegU();
 161     Reg#(Input_data_type#(fpinp,fpman,fpexp))                 ff_input_register   <-   mkConfigRegU();
 162     Reg#(FMA_states)                                          rg_state_handler    <-   mkReg(Begin);
 163     Wire#(Bool)                                               wr_flush            <-   mkDWire(False);
 164
 165     function zeroExtendLSB(inp_man) = unpack(reverseBits(extend(reverseBits(pack(inp_man)))));
 166
 167     let fPMAN              =  valueOf(fpman);
 168     let fPINP              =  valueOf(fpinp);
 169     let fPEXP              =  valueOf(fpexp);
 170     let iMPFPMAN2          =  valueOf(impfpman2);
 171     let fMAMAN             =  valueOf(fmaman);
 172
 173     rule rl_flush(wr_flush);
 174          rg_state_handler <= Begin;
 175          //ff_input_register <= tagged Invalid;
 176          //ff_stage2         <= tagged Invalid;
 177          //ff_stage4         <= tagged Invalid;
 178          //ff_stage5         <= tagged Invalid;
 179     endrule
 180
 181     rule rl_stage1_after_input_stage(rg_state_handler == Stage1 && !wr_flush);
 182
 183          Bit#(impfpman2)  x = ff_input_register.product_mantissa;
 184          Bit#(TAdd#(impfpman2,1)) lv_product_mantissa              =  {x[iMPFPMAN2-1:0],1'b0};          //extra zero for 10.xxxx case
 185          Bit#(fpexp2)             lv_product_exponent              =  ff_input_register.lv_summed_exponent;
 186          Bit#(fpinp)              lv_operand3                      =  ff_input_register._operand3;
 187          Bit#(5)                  add_flags                        =  ff_input_register.add_flags;
 188          bit                      lv_product_underflow             =  0;
 189          bit                      lv_product_overflow              =  0;
 190          let                      lv_product_is_invalid            =  ff_input_register.invalid;
 191          let                      lv_product_is_infinity           =  ff_input_register.infinity;
 192          let                      lv_product_is_zero               =  ff_input_register.zero;
 193          let                      rounding_mode                    =  ff_input_register.rounding_mode;
 194          let                      operation                        =  ff_input_register._operation;
 195          let                      lv_negate                        =  ff_input_register._negate;
 196          let                      lv_product_sign                  =  ff_input_register.sign;
 197          let                      mul                              =  ff_input_register.mul;
 198          let                      muladd                           =  ff_input_register.muladd;
 199          let                      quiet_nan_two                    =  ff_input_register.quiet_nan_two;
 200          bit                      inp_denormal                     =  ff_input_register.inp_denormal;
 201          Bit#(TSub#(fpexp,1))     bias                             =  '1;
 202          Int#(fpexp2)             lv_actual_product_exponent       =  unpack(lv_product_exponent - {3'b0,bias});
 203          let                      msb_zeros                        =  pack(countZerosMSB(lv_product_mantissa));
 204          let                      lsb_zeros                        =  0;
 205
 206          //Change-2 Removing Redundant Variables
 207          //Bit#(fpman)              bias_temp                        =  zeroExtend(bias);
 208          //Int#(fpman)              lv_actual_product_exponent_temp  =  signExtend(lv_actual_product_exponent);
 209          //`ifdef verbose $display("lv_actual_product_exponent_temp : %d",lv_actual_product_exponent_temp); `endif
 210          rg_state_handler <= Stage2;
 211
 212          // lv_product_is_subnormal construct is like a flag which can be used in difficult situations
 213          // bit lv_product_is_subnormal = 0;
 214
 215          bit lv_sticky = lv_product_mantissa[0];
 216          `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
 217
 218          /*
 219           if exponent is > bias then obviously none of the numbers are subnormal
 220           so the product is of the form 1x.xxxx or 01.xxxx
 221           the overflow conditions are handled in the following if condition accordingly
 222          */
 223
 224          `ifdef verbose $display("lv_actual_product_exponent = %d",lv_actual_product_exponent); `endif
 225          bit exp_overflow_bit  = pack(lv_actual_product_exponent)[fPEXP]; //Says if Exponent Overflows
 226          bit exp_underflow_bit = pack(lv_actual_product_exponent)[fPEXP+1];  //Says if Exponent Underflows
 227          Bit#(fpexp) expo_temp = pack(lv_actual_product_exponent)[fPEXP-1:0];
 228          bit exp_and           = &(expo_temp);  //Says if Exponent is equal to Bias
 229          bit is_msb_zeros       = |(msb_zeros);
 230
 231          //Change-1 -- Reducing the size of the Muxes from EXP size to just a bunch of 1bits and a Or-tree
 232          //Change-1 was wrong apparently, according to Paranoia!! Should see why! Rolling back
 233          //if((exp_overflow_bit==1'b1 && exp_underflow_bit==1'b0) || (is_msb_zeros==1'b0 && exp_and==1'b1)) begin
 234              if(lv_actual_product_exponent > zeroExtend(unpack(bias)) || (msb_zeros == 0 && lv_actual_product_exponent == zeroExtend(unpack(bias)))) begin
 235                     if(muladd == 0 ||(muladd==1 && ((lv_product_sign^lv_operand3[fPINP-1]^operation) == 0)))
 236                            lv_product_overflow = 1;
 237             //When the product overflows, the FMA result is an overflow
 238             `ifdef verbose $display("lv_product_overflow!!!"); `endif
 239          end
 240
 241          /*
 242           -lowest_exp = -denormal_bias -mantissa_size -2
 243           -2 is for the implicit bit and the carry bit
 244           i.e. if all the bits are shifted out then its an underflow
 245          */
 246
 247          else begin
 248              //Thought-1 -- Can something be done to reduce the countZerosMSB and countZerosLSB
 249              //Cannot reduce this mux to 1-bit but can reduce size since it's unwanted
 250             if(lv_actual_product_exponent < unpack(-zeroExtend(bias)-fromInteger(fPMAN)-1)) begin
 251             //if(lv_actual_product_exponent_temp < unpack(-bias_temp-fromInteger(fPMAN)-1)) begin
 252                if((muladd == 1'b0  || (muladd==1'b1 && (add_flags[3]==1'b1 || add_flags[4]==1'b1))) && lv_product_is_zero == 1'b0)
 253                 lv_product_underflow = 1;
 254                `ifdef verbose $display("lv_product_underflow!!!"); `endif
 255             end
 256             /*
 257              if msb of product is 1 then the case is 1x.xxxx
 258              product is shifted right once to make it 01.xxxx
 259              we don't care what is the exponent, just increase it by one
 260              actual exponent is also increased by one since exponent is increased by one
 261              this increasing of exponent leading to overflow is handled in the overflow case
 262              msb_zeros is increased for further arising conditions
 263             */
 264              //Change-4 Using the previously computed msb_zeros. Synthesis will detect this anyhow, but still. Fanout?
 265             if(is_msb_zeros==1'b0) begin
 266             //if(msb_zeros == 0) begin
 267                lv_product_mantissa        =  lv_product_mantissa >> 1;
 268                lv_product_exponent        =  lv_product_exponent + 1;
 269                lv_actual_product_exponent =  lv_actual_product_exponent + 1;
 270                msb_zeros                  =  msb_zeros + 1;
 271             end
 272             // possible shift is positive when exponent is lesser than -126
 273
 274             //Change-5 Possible shift needn't use lv_actual_product_exponent -- It's enough if exponent is used I guess
 275 //            Int#(fpexp2) possible_shift   =  1-zeroExtend(unpack(bias))-(lv_actual_product_exponent);
 276             Int#(fpexp2) possible_shift   =  1-unpack(lv_product_exponent);
 277
 278             //Experiment-1 -- Do all the operations parallely and use the if-else for just assignments
 279               lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
 280               let lv_product_mantissa_shiftR     = (lv_product_mantissa >> pack(possible_shift));
 281               //lv_product_mantissa_shiftR         = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0] | lv_sticky};
 282               let lv_product_exponent_inc_shift     = lv_product_exponent + pack(possible_shift);
 283
 284               let shift_neg = ~pack(possible_shift)+1;
 285
 286               let lv_product_mantissa_shiftL_expo = lv_product_mantissa << (shift_neg);
 287               let lv_product_exponent_sub_shift = lv_product_exponent - (shift_neg);
 288
 289               let lv_product_mantissa_shiftL_zerosMSB = lv_product_mantissa << (msb_zeros - 1);
 290               let lv_product_exponent_sub_zerosMSB = lv_product_exponent - (zeroExtend(msb_zeros) - 1);
 291
 292             /*
 293             msb_zeros = 1 when
 294             i)  the product is 1x.xxxx and shifted right once
 295             ii) the product is 01.xxxx already
 296             if possible_shift is negative or zero, it means that exponent is -126 or greater
 297             and thus the product is already normalized
 298             but if possible_shift is positive, it means that exponent is < -126
 299             and thus product is shifted right to make exponent -126 and the result is subnormal
 300             */
 301             if(possible_shift > 0) begin
 302                //Setting sticky if all lsb zeros are removed out
 303
 304                //Is there a better logic for this? Since, lsb_zeros is a big if-else logic
 305                //lsb_zeros = pack(countZerosLSB(lv_product_mantissa));
 306                if(possible_shift > unpack(zeroExtend(lsb_zeros)) || lv_product_mantissa[0] == 1)
 307                   lv_sticky = 1;
 308
 309                 lv_product_mantissa = {lv_product_mantissa_shiftR[iMPFPMAN2:1], lv_product_mantissa_shiftR[0]|lv_sticky};
 310                 lv_sticky = lv_product_mantissa[0];
 311                 lv_product_exponent = lv_product_exponent_inc_shift;
 312
 313                 `ifdef verbose $display("possible_shift",possible_shift); `endif
 314                /*if(mul==1 && lv_product_is_zero==0)
 315                    lv_product_underflow = 1;*/
 316                //Handling sticky
 317
 318                `ifdef verbose $display("lv_product_exponent : %d bin : %b",lv_product_exponent,lv_product_exponent); `endif
 319                `ifdef verbose $display("lv_product_mantissa = %b lv_product_exponent : %d since exp < -126", lv_product_mantissa,lv_product_exponent); `endif
 320                `ifdef verbose $display("and thus the sticky bit = %b", lv_sticky); `endif
 321                // lv_product_is_subnormal = 1;
 322             end
 323
 324             /*
 325             msb_zeros != 1 means product is of the form 00.xxxx, important case
 326             */
 327             else if(msb_zeros != 'b1) begin
 328                /*
 329                if possible shift is < the number of leading zeros then the number can't be made normal
 330                */
 331                if((shift_neg) < zeroExtend(msb_zeros - 1)) begin
 332                    lv_product_mantissa = lv_product_mantissa_shiftL_expo;
 333                    lv_product_exponent = lv_product_exponent_sub_shift;
 334                               // lv_product_is_subnormal = 1;
 335                end
 336                /*
 337                if exponent affords to give away enough such that shifting left leads to 01.xxxx and exponent >= -126
 338                */
 339                else begin
 340                     lv_product_mantissa = lv_product_mantissa_shiftL_zerosMSB;
 341                     lv_product_exponent = lv_product_exponent_sub_zerosMSB;
 342                // lv_product_is_subnormal = 0;
 343                end
 344             end
 345          end
 346          ff_stage2 <= Stage2_data_type{
 347                                                     lv_product_sign        :  lv_product_sign,
 348                                                     lv_negate              :  lv_negate,
 349                                                     lv_product_exponent    :  lv_product_exponent,
 350                                                     lv_product_mantissa    :  lv_product_mantissa,
 351                                                     lv_operand3            :  lv_operand3,
 352                                                     add_flags              :  add_flags,
 353                                                     operation              :  operation,
 354                                                     mul                    :  mul,
 355                                                     muladd                 :  muladd,
 356                                                     rounding_mode          :  rounding_mode,
 357                                                     lv_product_is_invalid  :  lv_product_is_invalid,
 358                                                     lv_product_is_zero     :  lv_product_is_zero,
 359                                                     lv_product_is_infinity :  lv_product_is_infinity,
 360                                                     lv_product_overflow    :  lv_product_overflow,
 361                                                     lv_product_underflow   :  lv_product_underflow,
 362                                                     quiet_nan_two          :  quiet_nan_two
 363                                                    };
 364
 365
 366     endrule
 367
 368     rule rl_stage_3(rg_state_handler == Stage2 && !wr_flush);
 369
 370          rg_state_handler <= Stage3;
 371          let lv_negate              = ff_stage2.lv_negate;
 372          let lv_product_exponent    = ff_stage2.lv_product_exponent;
 373          let lv_product_mantissa    = ff_stage2.lv_product_mantissa;
 374          let lv_operand3            = ff_stage2.lv_operand3;
 375          let add_flags              = ff_stage2.add_flags;
 376          let operation              = ff_stage2.operation;
 377          let mul                    = ff_stage2.mul;
 378          let muladd                 = ff_stage2.muladd;
 379          let lv_product_sign        = ff_stage2.lv_product_sign;
 380          let lv_product_is_invalid  = ff_stage2.lv_product_is_invalid;
 381          let lv_product_is_zero     = ff_stage2.lv_product_is_zero;
 382          let lv_rounding_mode       = ff_stage2.rounding_mode;
 383          let lv_product_is_infinity = ff_stage2.lv_product_is_infinity;
 384          let lv_product_overflow    = ff_stage2.lv_product_overflow;
 385          let lv_product_underflow   = ff_stage2.lv_product_underflow;
 386          let quiet_nan_two          = ff_stage2.quiet_nan_two;
 387          let inp_denormal           = ff_stage2.inp_denormal;
 388          //ff_stage2 <= tagged Invalid;
 389
 390          Bit#(1) sign2              = lv_product_sign ^ lv_negate;
 391          Bit#(fpexp2) exponent2     = lv_product_exponent;
 392          Bit#(fmaman) mantissa2     = zeroExtendLSB(lv_product_mantissa);
 393          Bit#(1) sign3              = lv_operand3[fPINP-1] ^ lv_negate;
 394          Bit#(fpexp2) exponent3     = {2'b0, lv_operand3[fPINP-2:fPMAN]};
 395          Bit#(fmaman) mantissa3     = 0;
 396          Bit#(fpman) lv_man3        = lv_operand3[fPMAN-1:0];
 397          Bit#(fpexp) lv_exp_max     = '1;
 398          bit lv_op3_is_invalid      = add_flags[2] | add_flags[0];
 399          bit lv_op3_is_infinity     = add_flags[1];
 400          bit lv_op3_is_zero         = add_flags[3];
 401          bit op3_is_subnormal       = add_flags[4];
 402          bit quiet_nan_three        = add_flags[2];
 403          bit expo3_zero = |exponent3;
 404          //Change-7 Replaced all instances of operation^sign3 with op_xor_sign3
 405          bit op_xor_sign3 = operation ^ sign3;
 406
 407          //Change-6 Avoiding exponent3==0 mux, but does that help?
 408          if(lv_op3_is_infinity==0 && lv_op3_is_invalid ==0 && lv_op3_is_zero==0) begin
 409              mantissa3 =  zeroExtendLSB({1'b0,expo3_zero,lv_man3});
 410          /*   if(exponent3 == '0)
 411                 mantissa3 = zeroExtendLSB({2'b0,lv_man3});
 412             else
 413                 mantissa3 = zeroExtendLSB({2'b01,lv_man3});*/
 414          end
 415
 416          exponent3 = exponent3 + zeroExtend(op3_is_subnormal);
 417
 418          Bit#(1) lv_result_is_invalid  = 0;
 419          Bit#(2) lv_result_is_infinity = 0;
 420          Bit#(2) lv_result_is_zero     = 0;
 421
 422          if(quiet_nan_two == 0 && lv_product_is_invalid == 1)
 423              quiet_nan_three = 0;           //0*inf case
 424
 425          //Result is invalid cases
 426          if(lv_op3_is_invalid == 1 || lv_product_is_invalid == 1)
 427             lv_result_is_invalid = 1;
 428
 429          //Result is zero cases
 430          else if(lv_op3_is_zero == 1 && lv_product_is_zero == 1) begin
 431             if(mul==0) begin
 432                if((lv_rounding_mode == 'b010) && (sign2 | (op_xor_sign3)) == 1)
 433                   lv_result_is_zero = 2'b11;
 434                else if((lv_rounding_mode != 'b010) && (sign2 & (op_xor_sign3)) == 1)
 435                   lv_result_is_zero = 2'b11;
 436                else begin
 437                   if(sign2 == 0)
 438                      lv_result_is_zero = 2'b01;
 439                   else
 440                      lv_result_is_zero = {op_xor_sign3,1'b1};
 441                end
 442             end
 443             else
 444             lv_result_is_zero = {sign2,1};
 445          end
 446
 447          //Result is infinity cases
 448          else if(lv_product_is_infinity == 1 && lv_op3_is_infinity == 1) begin
 449             lv_result_is_infinity   = {sign2, ~(sign2 ^ (op_xor_sign3))};
 450             lv_result_is_invalid    = ~lv_result_is_infinity[0];
 451             quiet_nan_two = 0; //inf * qNaN + inf case
 452          end
 453          else if(lv_product_is_infinity == 1 || lv_op3_is_infinity == 1) begin
 454             lv_result_is_infinity   = {((lv_product_is_infinity & ~lv_op3_is_infinity) & sign2) | ((~lv_product_is_infinity & lv_op3_is_infinity) & (op_xor_sign3)), 1};
 455          end
 456          if(lv_product_is_zero == 1) begin
 457             exponent2 = '0;
 458             mantissa2 = '0;
 459          end
 460
 461          Bit#(fpexp2) lv_minuend, lv_subtrahend;
 462          Bit#(fpexp2) exponent_difference = '0;
 463          Bit#(fpexp2) resultant_exponent = '0;
 464          bit op2_gt_op3 = 0;
 465
 466          Bit#(fmaman) mantissa_to_shift;
 467          let lv_zeros_on_right;
 468          bit lv_sticky = 0;
 469
 470          if(exponent2 > exponent3) begin
 471             lv_minuend        = exponent2;
 472             lv_subtrahend     = exponent3;
 473             mantissa_to_shift = mantissa3;
 474             op2_gt_op3        = 1;
 475          end
 476          else begin
 477             lv_minuend        = exponent3;
 478             lv_subtrahend     = exponent2;
 479             mantissa_to_shift = mantissa2;
 480             op2_gt_op3        = 0;
 481          end
 482
 483          resultant_exponent           = lv_minuend;
 484          exponent_difference          = lv_minuend - lv_subtrahend;
 485          lv_zeros_on_right            = zeroExtend(pack(countZerosLSB(mantissa_to_shift)));
 486          Bit#(1) shifted_operand_zero = (mantissa_to_shift == '0) ? 1:0;
 487          mantissa_to_shift            = mantissa_to_shift >> exponent_difference;
 488
 489          //Handling sticky
 490          if(((lv_zeros_on_right < exponent_difference) || (mantissa_to_shift[0] == 1)) && shifted_operand_zero != 1)
 491             lv_sticky = 1;
 492
 493          mantissa_to_shift = {mantissa_to_shift[fMAMAN-1:1], lv_sticky};
 494
 495          if(op2_gt_op3 == 1) begin
 496          mantissa3 = mantissa_to_shift;
 497          end
 498          else begin
 499          mantissa2 = mantissa_to_shift;
 500          end
 501          quiet_nan_two = quiet_nan_two & ~add_flags[0];
 502          `ifdef verbose $display("sign2 = %b exponent2 = %b mantissa2 = %b", sign2, resultant_exponent, mantissa2); `endif
 503          `ifdef verbose $display("sign3 = %b exponent3 = %b mantissa3 = %b", sign3, resultant_exponent, mantissa3); `endif
 504          `ifdef verbose $display(); `endif
 505          bit man2_gt_man3 = 0;
 506          if(mantissa2 > mantissa3) man2_gt_man3 = 1;   //Can this be optimized?
 507          bit lv_resultant_sign = (man2_gt_man3 & sign2) | (~man2_gt_man3 & (operation ^ sign3));  // Using Karnaugh maps
 508          bit actual_operation  = sign2 ^ (operation ^ sign3);                          // 0 for addition 1 for subtraction  //Can this be pushed back to the prev. stage, saving 1 bit
 509
 510       ff_stage4<= Stage4_data_type{
 511                                                 lv_resultant_sign    :           lv_resultant_sign,
 512                                                 actual_operation     :           actual_operation,
 513                                                 mantissa2            :           mantissa2,
 514                                                 mantissa3            :           mantissa3,
 515                                                 man2_gt_man3         :           man2_gt_man3,
 516                                                 resultant_exponent   :           resultant_exponent,
 517                                                 rounding_mode        :           lv_rounding_mode,
 518                                                 result_is_invalid    :           lv_result_is_invalid,
 519                                                 result_is_infinity   :           lv_result_is_infinity,
 520                                                 result_is_zero       :           lv_result_is_zero,
 521                                                 product_overflow     :           lv_product_overflow,
 522                                                 product_underflow    :           lv_product_underflow,
 523                                                 quiet_nan_two        :           quiet_nan_two,
 524                                                 quiet_nan_three      :           quiet_nan_three,
 525                                                 lv_product_is_zero   :           lv_product_is_zero
 526                                                 };
 527     endrule:rl_stage_3
 528
 529     rule rl_stage4(rg_state_handler == Stage3 && !wr_flush);
 530          rg_state_handler <= Stage4;
 531          let          lv_resultant_sign     =  ff_stage4.lv_resultant_sign;
 532          let          man2_gt_man3          =  ff_stage4.man2_gt_man3;
 533          let          mantissa2             =  ff_stage4.mantissa2;
 534          let          mantissa3             =  ff_stage4.mantissa3;
 535          let          actual_operation      =  ff_stage4.actual_operation;
 536          Bit#(fpexp2) resultant_exponent    =  ff_stage4.resultant_exponent;
 537          Bit#(3)      lv_rounding_mode      =  ff_stage4.rounding_mode;
 538          bit          lv_result_is_invalid  =  ff_stage4.result_is_invalid;
 539          Bit#(2)      lv_result_is_infinity =  ff_stage4.result_is_infinity;
 540          Bit#(2)      lv_result_is_zero     =  ff_stage4.result_is_zero;
 541          bit          lv_product_overflow   =  ff_stage4.product_overflow;
 542          let          quiet_nan_two         =  ff_stage4.quiet_nan_two;
 543          let          quiet_nan_three       =  ff_stage4.quiet_nan_three;
 544          let          lv_product_underflow  =  ff_stage4.product_underflow;
 545          let          lv_product_is_zero    =  ff_stage4.lv_product_is_zero;
 546          //ff_stage4 <= tagged Invalid;
 547
 548          Bit#(fmaman) resultant_mantissa = 0;
 549          Bit#(fmaman) add_mantissa = mantissa2 + mantissa3;
 550
 551          //Serial Path?
 552          Bit#(fmaman) sub_mantissa1 = (man2_gt_man3==1)? mantissa2 : mantissa3;
 553          Bit#(fmaman) sub_mantissa2 = (man2_gt_man3==1)? mantissa3 : mantissa2;
 554          Bit#(fmaman) sub_mantissa  = sub_mantissa1 - sub_mantissa2;
 555
 556
 557          if(actual_operation == 0)
 558              resultant_mantissa = add_mantissa;
 559          else
 560              resultant_mantissa = sub_mantissa;
 561
 562          //Case when Mantissa2 = Mantissa3 and hence the result is zero
 563          Bit#(2) add_sub_is_zero = 0;
 564
 565          if(resultant_mantissa == '0) begin
 566             if(lv_rounding_mode == 3'b010) begin
 567                add_sub_is_zero = 2'b11;
 568             end
 569             else begin
 570                add_sub_is_zero = 2'b01;       // checks the resultant mantissa for zero
 571             end
 572          end
 573
 574          let lv_zeros_on_left = pack(countZerosMSB(resultant_mantissa));
 575         ff_stage5 <= Stage5_data_type{
 576                                                     resultant_mantissa    : resultant_mantissa,
 577                                                     add_sub_is_zero       : add_sub_is_zero,
 578                                                     lv_resultant_sign     : lv_resultant_sign,
 579                                                     resultant_exponent    : resultant_exponent,
 580                                                     lv_rounding_mode      : lv_rounding_mode,
 581                                                     lv_result_is_invalid  : lv_result_is_invalid,
 582                                                     lv_result_is_infinity : lv_result_is_infinity,
 583                                                     lv_result_is_zero     : lv_result_is_zero,
 584                                                     lv_product_overflow   : lv_product_overflow,
 585                                                     quiet_nan_two         : quiet_nan_two,
 586                                                     quiet_nan_three       : quiet_nan_three,
 587                                                     lv_product_underflow  : lv_product_underflow,
 588                                                     lv_product_is_zero    : lv_product_is_zero,
 589                                                     lv_zeros_on_left      : lv_zeros_on_left
 590                                                 };
 591
 592     endrule
 593
 594
 595
 596     rule rl_stage_5_final_stage(rg_state_handler == Stage4 && !wr_flush);
 597         Bit#(fmaman) resultant_mantissa    = ff_stage5.resultant_mantissa;
 598         let add_sub_is_zero       = ff_stage5.add_sub_is_zero;
 599         let resultant_exponent    = ff_stage5.resultant_exponent;
 600         let lv_resultant_sign     = ff_stage5.lv_resultant_sign;
 601         let lv_rounding_mode      = ff_stage5.lv_rounding_mode;
 602         let lv_result_is_invalid  = ff_stage5.lv_result_is_invalid;
 603         let lv_result_is_infinity = ff_stage5.lv_result_is_infinity;
 604         let lv_result_is_zero     = ff_stage5.lv_result_is_zero;
 605         let lv_product_overflow   = ff_stage5.lv_product_overflow;
 606         let quiet_nan_two         = ff_stage5.quiet_nan_two;
 607         let quiet_nan_three       = ff_stage5.quiet_nan_three;
 608         let lv_product_underflow  = ff_stage5.lv_product_underflow;
 609         let lv_product_is_zero    = ff_stage5.lv_product_is_zero;
 610         let lv_zeros_on_left      = ff_stage5.lv_zeros_on_left;
 611         bit add_sub_subnormal = 0;
 612         //ff_stage5 <= tagged Invalid;
 613         rg_state_handler <= Begin;
 614          bit lv_sticky = resultant_mantissa[0];
 615
 616          //change-x+1
 617          let resultant_exponent_sub = resultant_exponent -1;
 618          let resultant_mantissa_unnormalized = resultant_mantissa >> 1;
 619          let resultant_exponent_inc = resultant_exponent + 1;
 620          let resultant_mantissa_norm_expo = resultant_mantissa << resultant_exponent_sub;
 621          let resultant_mantissa_norm_zerosMSB = resultant_mantissa << (lv_zeros_on_left - 1);
 622          let resultant_exponent_sub_zerosMSB = resultant_exponent - ((zeroExtend(lv_zeros_on_left)) - 1);
 623
 624
 625          if(resultant_mantissa[fMAMAN-1] == 1'b1) begin
 626             //resultant_mantissa = resultant_mantissa >> 1;
 627             resultant_mantissa = {resultant_mantissa_unnormalized[fMAMAN-1:1], lv_sticky | resultant_mantissa_unnormalized[0]};
 628             resultant_exponent = resultant_exponent_inc;
 629             //resultant_exponent = resultant_exponent + 1;
 630          end
 631
 632          else if(resultant_mantissa[fMAMAN-2] != 1'b1) begin
 633             if((zeroExtend(lv_zeros_on_left) - 1) > resultant_exponent_sub) begin
 634             //if((zeroExtend(lv_zeros_on_left) - 1) > (resultant_exponent - 1)) begin
 635                `ifdef verbose $display("resultant_exponent : %d",resultant_exponent); `endif
 636                //resultant_mantissa = resultant_mantissa << (resultant_exponent - 1);
 637                resultant_mantissa = resultant_mantissa_norm_expo;
 638                resultant_exponent = 0;
 639                `ifdef verbose $display("add_sub subnormal!!!"); `endif
 640                add_sub_subnormal = 1;
 641             end
 642             else begin
 643                //resultant_mantissa = resultant_mantissa << (lv_zeros_on_left - 1);
 644                //resultant_exponent = resultant_exponent - (zeroExtend(lv_zeros_on_left) - 1);
 645                resultant_mantissa = resultant_mantissa_norm_zerosMSB;
 646                resultant_exponent = resultant_exponent_sub_zerosMSB;
 647             end
 648          end
 649
 650
 651          `ifdef verbose $display("resultant_exponent : %b",resultant_exponent); `endif
 652          Bit#(TSub#(fpexp,1)) bias = '1;
 653          bit ex_overflow = 0;
 654          Int#(fpexp2) res_exp_int = unpack(resultant_exponent) - zeroExtend(unpack(bias));
 655          `ifdef verbose $display("resultant_exponent : %d res_exp_int : %d",resultant_exponent, res_exp_int); `endif
 656
 657          if(res_exp_int > zeroExtend(unpack(bias))) begin
 658              lv_product_overflow = 1;
 659              ex_overflow = 1;
 660          end
 661       /*   else if(res_exp_int == zeroExtend(unpack(bias)))
 662              ex_overflow = 1;*/
 663          else if(resultant_exponent[fPEXP+1] == 1 && lv_product_is_zero == 0) begin
 664              lv_product_underflow = 1;
 665              `ifdef verbose $display("Underflow"); `endif
 666          end
 667          /*`ifdef verbose $display("resultant_sign = %b resultant_exponent = %b resultant_mantissa = %b", resultant_sign, resultant_exponent, resultant_mantissa); `endif
 668          `ifdef verbose $display(); `endif
 669     */
 670          Bit#(TAdd#(fpman,2)) lv_rounded_mantissa =   resultant_mantissa[fMAMAN-1:iMPFPMAN2];
 671          Bit#(2) lv_res_man                       =   resultant_mantissa[fMAMAN-1:fMAMAN-2];
 672          Bit#(TSub#(impfpman2,2)) lv_res1         =   resultant_mantissa[iMPFPMAN2-3:0];
 673          bit lv_guard                             =   resultant_mantissa[iMPFPMAN2-1];
 674          bit lv_round                             =   resultant_mantissa[iMPFPMAN2-2];
 675          lv_sticky                                =   |lv_res1;
 676          bit lv_round_up                          =   0;
 677          bit lv_inexact                           =   lv_guard | lv_round | lv_sticky;
 678
 679          if(lv_rounding_mode == 'b000)
 680             lv_round_up = lv_guard & (resultant_mantissa[iMPFPMAN2] | lv_round | lv_sticky);
 681          else if(lv_rounding_mode == 'b100)
 682             lv_round_up = lv_guard ;//& (lv_round | lv_sticky | ~lv_resultant_sign);
 683          else if(lv_rounding_mode == 'b010)
 684             lv_round_up = lv_inexact & (lv_resultant_sign);
 685          else if(lv_rounding_mode == 'b011)
 686             lv_round_up = lv_inexact & (~lv_resultant_sign);
 687
 688         if(add_sub_subnormal == 1 && lv_inexact == 1)
 689             lv_product_underflow = 1;
 690
 691          `ifdef verbose $display("lv_guard = %b lv_round = %b lv_sticky = %b", lv_guard, lv_round, lv_sticky); `endif
 692          `ifdef verbose $display("lv_round_up = %b", lv_round_up); `endif
 693          `ifdef verbose $display("lv_rounded_mantissa = %b", lv_rounded_mantissa); `endif
 694
 695           if(lv_round_up == 1)
 696              lv_rounded_mantissa = lv_rounded_mantissa + 1;
 697
 698          `ifdef verbose $display("lv_rounded_mantissa = %b after roundup", lv_rounded_mantissa); `endif
 699
 700          if(lv_rounded_mantissa[fPMAN+1] == 1) begin
 701             resultant_exponent = resultant_exponent + 1;
 702             lv_rounded_mantissa   = lv_rounded_mantissa >> 1;
 703          end
 704          else if(lv_res_man == 'b0 && lv_rounded_mantissa[fPMAN] == 1) begin
 705             resultant_exponent = resultant_exponent + 1;
 706          end
 707
 708          Bit#(fpexp) lv_res_exp_temp         = resultant_exponent[fPEXP-1:0];
 709          Bit#(fpman) man_all_zeros           = '0;
 710          Bit#(TSub#(fpman,1)) man1_all_zeros = '0;
 711          Bit#(fpman) man_all_ones            = '1;
 712          Bit#(fpexp) exp_all_zeros           = '0;
 713          Bit#(TSub#(fpexp,1)) exp_all_ones_1 = '1;
 714          Bit#(fpinp) lv_final_output         =  0;
 715          Bit#(fpexp) exp_all_ones            = '1;
 716          Bit#(fpexp) out_exp                 = resultant_exponent[fPEXP-1:0];
 717          Bit#(fpman) out_man                 = lv_rounded_mantissa[fPMAN-1:0];
 718
 719
 720          //Can I put these invalid, infinity, zero, cases in the first stage which will clear some of the paths????
 721          if(lv_result_is_invalid == 1) begin
 722            lv_final_output = {1'b0, exp_all_ones,1'b1, man1_all_zeros};
 723          end
 724          else if(lv_result_is_infinity[0] == 1) begin
 725             lv_final_output = {lv_result_is_infinity[1], exp_all_ones, man_all_zeros};
 726             ex_overflow = 0; lv_product_underflow = 0; lv_inexact = 0;
 727          end
 728          else if(lv_result_is_zero[0] == 1) begin
 729              lv_final_output = {lv_result_is_zero[1],exp_all_zeros, man_all_zeros};
 730          end
 731          else if(add_sub_is_zero[0] == 1) begin
 732             lv_final_output = {add_sub_is_zero[1], exp_all_zeros , man_all_zeros};
 733          end
 734          else if(lv_product_overflow == 1 || lv_res_exp_temp == '1) begin
 735             lv_inexact = 1;
 736             ex_overflow = 1;
 737            if(lv_rounding_mode == 'b001)
 738              lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
 739            else if(lv_rounding_mode == 'b010 && lv_resultant_sign == 0)
 740              lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
 741            else if(lv_rounding_mode == 'b011 && lv_resultant_sign == 1)
 742              lv_final_output={lv_resultant_sign,{exp_all_ones_1,1'b0},man_all_ones}; //??
 743            else begin
 744              lv_final_output={lv_resultant_sign,exp_all_ones,man_all_zeros};
 745            end
 746          end
 747          else begin
 748              lv_final_output = {lv_resultant_sign, out_exp, out_man};
 749          end
 750
 751          if(lv_product_underflow == 1'b1 && lv_rounded_mantissa[fPMAN]==1'b1 && lv_rounding_mode!=3'b011) //Tininess vanishing after rounding
 752              lv_product_underflow = 0;
 753
 754          if(lv_result_is_invalid == 1) begin   //For effectively handling the flag cases between add,sub,mul and fused mul add
 755              ex_overflow  = 0;
 756              lv_inexact   = 0;
 757              lv_product_underflow = 0;
 758             if(quiet_nan_two == 1 || quiet_nan_three == 1)
 759                 lv_result_is_invalid = 0;
 760          end
 761
 762          Bit#(5) fflags={lv_result_is_invalid,1'b0,ex_overflow,lv_product_underflow,lv_inexact};
 763          `ifdef verbose $display("lv_inv : %b ex_overflow: %b lv_inexact : %b",lv_result_is_invalid,ex_overflow,lv_inexact); `endif
 764          ff_final_out <= Floating_output{
 765                                  final_result   :        lv_final_output,
 766                                  fflags         :        fflags
 767                                         };
 768
 769          `ifdef verbose $display("FMA: Result: %h fflags: %8h",lv_final_output, {24'b0,fflags}); `endif
 770     endrule
 771
 772     method Action _start(Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand1, Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand2,Tuple3#(Bit#(1),Bit#(fpexp),Bit#(fpman)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
 773
 774
 775          Bit#(TSub#(fpexp,1)) bias       =  '1;                                   //Bias for the exponent: 127 for SP and 1023 for DP
 776          Bit#(1) sign1                   =  tpl_1(_operand1);
 777          Bit#(1) sign2                   =  tpl_1(_operand2);
 778          Bit#(1) sign3                   =  tpl_1(_operand3);
 779          Bit#(fpexp) lv_exponent1        =  tpl_2(_operand1);
 780          Bit#(fpexp) lv_exponent2        =  tpl_2(_operand2);
 781          Bit#(fpexp) lv_exponent3        =  tpl_2(_operand3);
 782          Bit#(fpman) lv_mantissa1        =  tpl_3(_operand1);
 783          Bit#(fpman) lv_mantissa2        =  tpl_3(_operand2);
 784          Bit#(fpman) lv_mantissa3        =  tpl_3(_operand3);
 785          Bit#(5) flags1                  =  tpl_1(flags);
 786          Bit#(5) flags2                  =  tpl_2(flags);
 787          Bit#(5) flags3                  =  tpl_3(flags);
 788          Bit#(1) lv_op1_is_zero          =  flags1[3];                             //1 when operand1=0
 789          Bit#(1) lv_op2_is_zero          =  flags2[3];                             //1 when operand2=0
 790          Bit#(1) lv_op1_infinity         =  flags1[1];                             //1 when operand1=inf
 791          Bit#(1) lv_op2_infinity         =  flags2[1];                             //1 when operand2=inf
 792          Bit#(1) lv_op1_subnormal        =  flags1[4] | flags1[3];                 //1 when operand1 is subnormal
 793          Bit#(1) lv_op2_subnormal        =  flags2[4] | flags2[3];                 //1 when operand2 is subnormal
 794          Bit#(1) lv_inf                  =  0;                                     //Bit indicating infinity
 795          Bit#(1) lv_inv                  =  0;                                     //Invalid Bit
 796          Bit#(1) lv_zero                 =  0;                                     //Zero bit
 797          bit quiet_nan_two               = (flags1[2] & ~flags2[0]) | (flags2[2] & ~flags1[0]);
 798
 799          if((((flags1[0] | flags1[2])==1) || (flags2[0] | flags2[2])==1))  //If either of the operands are NaN's (Quiet or Signalling - Not distinguishing between them here)
 800              lv_inv = 1;
 801          else if(lv_op1_infinity==1 || lv_op2_infinity==1) begin           //If either of the operands are Infinity
 802              if(lv_op1_is_zero == 1 || lv_op2_is_zero ==1) begin                 //Provided atleast one of the operands are infinity, if either of them are zero, then res is NaN (0*inf)
 803                  lv_inv = 1;
 804              end
 805              else begin
 806                  lv_inf = 1;                                                //Else result is infinity - inf +/- op2 = inf
 807                  quiet_nan_two = 0;
 808              end
 809          end
 810          else if(lv_op1_is_zero == 1 || lv_op2_is_zero == 1)
 811              lv_zero = 1;                                                  //If they are not infinity - Checked for Zero, if it is then product is zero (0*x = 0)
 812
 813
 814          `ifdef verbose $display("lv_inv : %h lv_inf : %h lv_zero : %h",lv_inv,lv_inf,lv_zero);  `endif
 815          `ifdef verbose $display("flags1 : %b flags2 : %b flags3 : %b",flags1,flags2,flags3); `endif
 816
 817          /*
 818             When normal and denormal number is multiplied, exponent is
 819             (biased_exponent - bias) + (1 - bias) + bias = biased_exponent - bias + 1;
 820             either _operand1[30:23] == 0 or _operand2[30:23] == 0 for the above if condition so no harm in adding both
 821          */
 822
 823          Bit#(fpexp2) exp1_temp          =  {2'b0,lv_exponent1};
 824          Bit#(fpexp2) exp2_temp          =  {2'b0,lv_exponent2};
 825          Bit#(fpexp2) lv_summed_exponent =  exp1_temp + exp2_temp - zeroExtend(bias) + zeroExtend(lv_op1_subnormal) + zeroExtend(lv_op2_subnormal);
 826          Bit#(1) lv_sign                 =  sign1 ^ sign2;
 827
 828          `ifdef verbose $display("lv_summed_exponent = %b", lv_summed_exponent/*, lv_actual_exponent*/); `endif
 829
 830          Bit#(impfpman2) x = zeroExtend({~lv_op1_subnormal, lv_mantissa1})*zeroExtend({~lv_op2_subnormal, lv_mantissa2});    //Single Cycle Int Mul
 831          rg_state_handler <= Stage1;
 832          ff_input_register<= Input_data_type{
 833                                                            product_mantissa    :          x,
 834                                                            lv_summed_exponent  :          lv_summed_exponent,
 835                                                            sign                :          lv_sign,
 836                                                            _operand3           :          {sign3,lv_exponent3,lv_mantissa3},
 837                                                            rounding_mode       :          rounding_mode,
 838                                                            infinity            :          lv_inf,
 839                                                            add_flags           :          flags3,
 840                                                            invalid             :          lv_inv,
 841                                                            zero                :          lv_zero,
 842                                                            _operation          :          operation,
 843                                                            _negate             :          _negate,
 844                                                            mul                 :          mul,
 845                                                            muladd              :          muladd,
 846                                                            quiet_nan_two       :          quiet_nan_two,
 847                                                            inp_denormal        :          lv_op1_subnormal | lv_op2_subnormal
 848                                                          };
 849     endmethod
 850
 851
 852    method Floating_output#(fpinp) get_result();
 853        return ff_final_out;
 854    endmethod
 855     method Action flush;
 856         wr_flush <= True;
 857     endmethod
 858 endmodule
 859
 860
 861 module mkTb_fpu_fm_add_sub(Empty);
 862
 863         Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
 864
 865     function Tuple3#(Bit#(5), Bit#(5), Bit#(5)) condFlags (Tuple2#(Bit#(m), Bit#(e)) x, Tuple2#(Bit#(m), Bit#(e)) y, Tuple2#(Bit#(m),Bit#(e)) z);
 866         let s = valueOf(m);
 867         let man1  = tpl_1(x);
 868         let expo1 = tpl_2(x);
 869         let man2  = tpl_1(y);
 870         let expo2 = tpl_2(y);
 871         let man3  = tpl_1(z);
 872         let expo3 = tpl_2(z);
 873         Bit#(5) flags1, flags2,flags3;
 874         Bool expZ1 = (expo1 == 0);
 875         Bool manZ1 = (man1  == 0);
 876         Bool expO1 = (expo1 == '1);
 877         Bool manO1 = (man1  == '1);
 878         Bool topB1 = (man1[s-1] == 1);
 879         Bool expZ2 = (expo2 == 0);
 880         Bool manZ2 = (man2  == 0);
 881         Bool expO2 = (expo2 == '1);
 882         Bool manO2 = (man2  == '1);
 883         Bool topB2 = (man2[s-1] == 1 && man2 !=0);
 884         Bool expZ3 = (expo3 == 0);
 885         Bool manZ3 = (man3  == 0);
 886         Bool expO3 = (expo3 == '1);
 887         Bool manO3 = (man3  == '1);
 888         Bool topB3 = (man3[s-1] == 1 && man3 !=0);
 889         flags1 = {pack(expZ1 && !manZ1),pack(manZ1 && expZ1),pack(expO1 && topB1),pack(expO1 && manZ1),pack(expO1 && !topB1 && !manZ1)}; //Denormal, isZero, QNaN, Infinity, SNaN
 890         flags2 = {pack(expZ2 && !manZ2),pack(manZ2 && expZ2),pack(expO2 && topB2),pack(expO2 && manZ2),pack(expO2 && !topB2 && !manZ2)}; //Denormal, isZero, QNaN, Infinity, SNaN
 891         flags3 = {pack(expZ3 && !manZ3),pack(manZ3 && expZ3),pack(expO3 && topB3),pack(expO3 && manZ3),pack(expO3 && !topB3 && !manZ3)}; //Denormal, isZero, QNaN, Infinity, SNaN
 892         return tuple3(flags1,flags2,flags3);
 893     endfunction
 894
 895     function Tuple3#(Bit#(m),Bit#(m), Bit#(m)) getMantissa (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
 896         provisos(Add#(TAdd#(m,1),e,n),
 897                  Add#(7,a__,e)
 898                 );
 899         let expo = valueOf(e);
 900         let man  = valueOf(m);
 901         return tuple3(op1[man-1:0],op2[man-1:0],op3[man-1:0]);
 902     endfunction
 903
 904     function Tuple3#(Bit#(e), Bit#(e), Bit#(e)) getExp (Bit#(n) op1, Bit#(n) op2, Bit#(n) op3)
 905         provisos(Add#(TAdd#(m,1),e,n),
 906                  Add#(7,a__,e)
 907                 );
 908         let inp = valueOf(n);
 909         let man  = valueOf(m);
 910         return tuple3(op1[inp-2:man], op2[inp-2:man], op3[inp-2:man]);
 911     endfunction
 912
 913     function Bool isNaNBox(Bit#(64) op);
 914         return (op[63:32]=='1);
 915     endfunction
 916
 917     function Tuple3#(Bit#(32),Bit#(32),Bit#(32)) setCanNaN (Bit#(64) op1, Bit#(64) op2, Bit#(64) op3);
 918         return tuple3(isNaNBox(op1)? truncate(op1) : 32'h7fc00000, isNaNBox(op2)? truncate(op2) : 32'h7fc00000, isNaNBox(op3)? truncate(op3) : 32'h7fc00000);
 919     endfunction
 920
 921     Wrapper3#(Tuple2#(Bit#(23), Bit#(8)),Tuple2#(Bit#(23), Bit#(8)), Tuple2#(Bit#(23), Bit#(8)),  Tuple3#(Bit#(5),Bit#(5),Bit#(5)))    condFlags32     <- mkUniqueWrapper3(condFlags);
 922     Wrapper3#(Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)),Tuple2#(Bit#(52), Bit#(11)), Tuple3#(Bit#(5),Bit#(5),Bit#(5)))   condFlags64     <- mkUniqueWrapper3(condFlags);
 923     Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(23),Bit#(23),Bit#(23)))                                                          getMant32       <- mkUniqueWrapper3(getMantissa);
 924     Wrapper3#(Bit#(32),Bit#(32),Bit#(32),Tuple3#(Bit#(8),Bit#(8),Bit#(8)))                                                             getExp32        <- mkUniqueWrapper3(getExp);
 925     Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(52),Bit#(52),Bit#(52)))                                                          getMant64       <- mkUniqueWrapper3(getMantissa);
 926     Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(11),Bit#(11),Bit#(11)))                                                          getExp64        <- mkUniqueWrapper3(getExp);
 927     Wrapper3#(Bit#(64),Bit#(64),Bit#(64),Tuple3#(Bit#(32),Bit#(32),Bit#(32)))                                                          setCanonicalNaN <- mkUniqueWrapper3(setCanNaN);
 928
 929     Reg#(Bit#(32)) rg_clock <-mkReg(0);
 930     //Reg#(Bit#(64)) operand1 <- mkReg(64'h17fffffffffff860);
 931     //Reg#(Bit#(64)) operand2 <- mkReg(64'h0000000000000200);
 932     //Reg#(Bit#(64)) operand3 <- mkReg(64'h000000000000005f);
 933     Reg#(Bit#(32)) operand1 <- mkReg(32'h31f36ab4);
 934     Reg#(Bit#(32)) operand2 <- mkReg(32'h08835f4d);
 935     Reg#(Bit#(32)) operand3 <- mkReg(32'h0);
 936
 937     rule rl_count_clock ;
 938         rg_clock<=rg_clock+1;
 939         if(rg_clock=='d20) $finish(0);
 940     endrule
 941
 942     rule rl_input1(rg_clock==1);
 943              let {man1,man2,man3}   <- getMant32.func(operand1,operand2, operand3);
 944              let {exp1,exp2,exp3}   <- getExp32.func(operand1,operand2, operand3);
 945              let x <- condFlags32.func(tuple2(man1,exp1),tuple2(man2,exp2),tuple2(man3,exp3));
 946              let sign1 = operand1[31];
 947              let sign2 = operand2[31];
 948              let sign3 = operand3[31];
 949              uut._start(tuple3(sign1,exp1,man1),tuple3(sign2,exp2,man2),tuple3(sign3,exp3,man3),3'b0,1'b0,1'b0,1'b0,1'b1,x);
 950 `ifdef verbose $display("giving inputs at %0d", rg_clock); `endif
 951
 952     endrule
 953
 954     rule rl_finish;
 955         let res = uut.get_result();
 956         `ifdef verbose $display("Output = %h at %0d",res.final_result[31:0], rg_clock); `endif
 957     endrule
 958
 959 endmodule
 960
 961 `ifdef fpu_hierarchical
 962 (*synthesize*)
 963 module mkfpu_fm_add_sub32(Ifc_fpu_fm_add_sub32);
 964         Ifc_fpu_fm_add_sub#(32,23,8) uut <- mkfpu_fm_add_sub();
 965
 966    method Action _start(Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand1, Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand2,Tuple3#(Bit#(1),Bit#(8),Bit#(23)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul, bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
 967         uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd,flags);
 968    endmethod
 969     method Floating_output#(32) get_result();
 970         return uut.get_result();
 971     endmethod
 972     method Action flush;
 973         uut.flush();
 974     endmethod
 975 endmodule
 976
 977 (*synthesize*)
 978 module mkfpu_fm_add_sub64(Ifc_fpu_fm_add_sub64);
 979         Ifc_fpu_fm_add_sub#(64,52,11) uut <- mkfpu_fm_add_sub();
 980    method Action _start(Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand1, Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand2,Tuple3#(Bit#(1),Bit#(11),Bit#(52)) _operand3, Bit#(3) rounding_mode, bit operation, bit _negate, bit mul,bit muladd, Tuple3#(Bit#(5),Bit#(5),Bit#(5)) flags);
 981         uut._start(_operand1,_operand2,_operand3,rounding_mode,operation,_negate,mul,muladd, flags);
 982    endmethod
 983     method Floating_output#(64) get_result();
 984         return uut.get_result();
 985     endmethod
 986     method Action flush;
 987         uut.flush();
 988     endmethod
 989 endmodule
 990 `endif
 991
 992 //module mkTb_fpu_fm_add_sub_2 (Empty);
 993 //
 994 ////    RegFile #(Bit #(16), Bit #(100))  input_data <- mkRegFileFullLoad("./testcases/fma_inp_nor.txt");
 995 ////    RegFile #(Bit #(16), Bit #(68))  input_data <- mkRegFileFullLoad("./testcases/mul_denormal_testcases.txt");
 996 //    RegFile #(Bit #(16), Bit #(68))  input_data <- mkRegFileFullLoad("./testcases/Add_normal_testcases.hex");
 997 //      Reg #(Bit #(16)) index <- mkReg(0);
 998 //
 999 //      Ifc_fpu_fm_add_sub#(32,23,8,16) multiplier <- mkfpu_fm_add_sub();
1000 //      Reg #(Bit #(32)) state_clock <- mkReg(1);
1001 //    Reg #(Bit #(1))  rg_state <- mkReg(0);
1002 //
1003 //      Reg#(int) cnt <- mkReg(0);                  //File Variable
1004 //      let fh <- mkReg(InvalidFile) ;                          //File handler
1005 //
1006 //      //rule for file creation
1007 //      rule open (cnt == 0 ) ;
1008 //              File tb_mul_output <- $fopen("tb_madd_output.hex", "w+");
1009 //              fh <= tb_mul_output;
1010 //              cnt <= 1 ;
1011 //      endrule
1012 //
1013 //      rule state_clock_count;
1014 //              state_clock <= state_clock + 1;
1015 //      endrule
1016 //
1017 //      rule take_input_in (rg_state == 0);
1018 //      //      multiplier._start(input_data.sub(index)[99:68],input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1019 //      //      multiplier._start(input_data.sub(index)[67:36],input_data.sub(index)[35:4],32'b0,0,input_data.sub(index)[2:0],0,0);
1020 //              multiplier._start(32'h3f800000, input_data.sub(index)[67:36],input_data.sub(index)[35:4],0,input_data.sub(index)[2:0],0,0);
1021 //              index <= index + 1;
1022 //          rg_state <= 1;
1023 //      endrule
1024 //
1025 //      rule display_output (rg_state == 1);
1026 //        let abc = multiplier.get_result();
1027 //              $fwrite(fh, "%h\n", abc.final_result[31:0]);
1028 //              rg_state <= 0;
1029 //      endrule
1030 //
1031 //      rule end_testing (index == 16562);
1032 //              $finish(0);
1033 //      endrule : end_testing
1034 //
1035 //endmodule
1036
1037 endpackage