src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat
   6 from nmigen.cli import main
   7
   8
   9 class FPNum:
  10     def __init__(self, width, m_width=None):
  11         self.width = width
  12         if m_width is None:
  13             m_width = width + 3 # extra accuracy bits
  14         self.v = Signal(width)      # Latched copy of value
  15         self.m = Signal(m_width)    # Mantissa
  16         self.e = Signal((10, True)) # Exponent: 10 bits, signed
  17         self.s = Signal()           # Sign bit
  18
  19     def decode(self):
  20         v = self.v
  21         return [self.m.eq(Cat(0, 0, 0, v[0:23])), # mantissa
  22                 self.e.eq(Cat(v[23:31]) - 127),   # exponent (take off bias)
  23                 self.s.eq(Cat(v[31])),            # sign
  24                 ]
  25
  26     def create(self, s, e, m):
  27         return [
  28           self.v[31].eq(s),    # sign
  29           self.v[23:31].eq(e), # exp
  30           self.v[0:23].eq(m)   # mantissa
  31         ]
  32
  33     def nan(self, s):
  34         return self.create(s, 0xff, 1<<22)
  35
  36     def inf(self, s):
  37         return self.create(s, 0xff, 0)
  38
  39     def is_nan(self):
  40         return (self.e == 128) & (self.m != 0)
  41
  42     def is_inf(self):
  43         return (self.e == 128) & (self.m == 0)
  44
  45     def is_zero(self):
  46         return (self.e == -127) & (self.m == 0)
  47
  48
  49 class FPADD:
  50     def __init__(self, width):
  51         self.width = width
  52
  53         self.in_a     = Signal(width)
  54         self.in_a_stb = Signal()
  55         self.in_a_ack = Signal()
  56
  57         self.in_b     = Signal(width)
  58         self.in_b_stb = Signal()
  59         self.in_b_ack = Signal()
  60
  61         self.out_z     = Signal(width)
  62         self.out_z_stb = Signal()
  63         self.out_z_ack = Signal()
  64
  65     def get_fragment(self, platform):
  66         m = Module()
  67
  68         # Latches
  69         a = FPNum(self.width)
  70         b = FPNum(self.width)
  71         z = FPNum(self.width, 24)
  72
  73         # Sign
  74         a_s = Signal()
  75         b_s = Signal()
  76         z_s = Signal()
  77
  78         guard = Signal()
  79         round_bit = Signal()
  80         sticky = Signal()
  81
  82         tot = Signal(28)
  83
  84         with m.FSM() as fsm:
  85
  86             # ******
  87             # gets operand a
  88
  89             with m.State("get_a"):
  90                 with m.If((self.in_a_ack) & (self.in_a_stb)):
  91                     m.next = "get_b"
  92                     m.d.sync += [
  93                         a.v.eq(self.in_a),
  94                         self.in_a_ack.eq(0)
  95                     ]
  96                 with m.Else():
  97                     m.d.sync += self.in_a_ack.eq(1)
  98
  99             # ******
 100             # gets operand b
 101
 102             with m.State("get_b"):
 103                 with m.If((self.in_b_ack) & (self.in_b_stb)):
 104                     m.next = "get_a"
 105                     m.d.sync += [
 106                         b.v.eq(self.in_b),
 107                         self.in_b_ack.eq(0)
 108                     ]
 109                 with m.Else():
 110                     m.d.sync += self.in_b_ack.eq(1)
 111
 112             # ******
 113             # unpacks operands into sign, mantissa and exponent
 114
 115             with m.State("unpack"):
 116                 m.next = "special_cases"
 117                 m.d.sync += a.decode()
 118                 m.d.sync += b.decode()
 119
 120             # ******
 121             # special cases: NaNs, infs, zeros, denormalised
 122
 123             with m.State("special_cases"):
 124
 125                 # if a is NaN or b is NaN return NaN
 126                 with m.If(a.is_nan() | b.is_nan()):
 127                     m.next = "put_z"
 128                     m.d.sync += z.nan(1)
 129
 130                 # if a is inf return inf (or NaN)
 131                 with m.Elif(a.is_inf()):
 132                     m.next = "put_z"
 133                     m.d.sync += z.inf(a.s)
 134                     # if a is inf and signs don't match return NaN
 135                     with m.If((b.e == 128) & (a.s != b.s)):
 136                         m.d.sync += z.nan(b.s)
 137
 138                 # if b is inf return inf
 139                 with m.Elif(b.is_inf()):
 140                     m.next = "put_z"
 141                     m.d.sync += z.inf(b.s)
 142
 143                 # if a is zero and b zero return signed-a/b
 144                 with m.Elif(a.is_zero() & b.is_zero()):
 145                     m.next = "put_z"
 146                     m.d.sync += z.create(a.s & b.s, b.e[0:8] + 127, b.m[3:26])
 147
 148                 # if a is zero return b
 149                 with m.Elif((a.is_zero()):
 150                     m.next = "put_z"
 151                     m.d.sync += z.create(b.s, b.e[0:8] + 127, b.m[3:26])
 152
 153                 # if b is zero return a
 154                 with m.Elif((b.is_zero()):
 155                     m.next = "put_z"
 156                     m.d.sync += z.create(a.s, a.e[0:8] + 127, a.m[3:26])
 157
 158                 # Denormalised Number checks
 159                 with m.Else():
 160                     m.next = "align"
 161                     # denormalise a check
 162                     with m.If(a.e == -127):
 163                         m.d.sync += a.e.eq(-126) # limit a exponent
 164                     with m.Else():
 165                         m.d.sync += a.m[26].eq(1) # set highest mantissa bit
 166                     # denormalise b check
 167                     with m.If(b.e == -127):
 168                         m.d.sync += b.e.eq(-126) # limit b exponent
 169                     with m.Else():
 170                         m.d.sync += b.m[26].eq(1) # set highest mantissa bit
 171
 172             # ******
 173             # align.  NOTE: this does *not* do single-cycle multi-shifting,
 174             #         it *STAYS* in the align state until the exponents match
 175
 176             with m.State("align"):
 177                 # exponent of a greater than b: increment b exp, shift b mant
 178                 with m.If(a.e > b.e):
 179                     m.d.sync += [
 180                       b.e.eq(b.e + 1),
 181                       b.m.eq(b.m >> 1),
 182                       b.m[0].eq(b.m[0] | b.m[1]) # moo??
 183                     ]
 184                 # exponent of b greater than a: increment a exp, shift a mant
 185                 with m.Elif(a.e < b.e):
 186                     m.d.sync += [
 187                       a.e.eq(a.e + 1),
 188                       a.m.eq(a.m >> 1),
 189                       a.m[0].eq(a.m[0] | a.m[1]) # moo??
 190                     ]
 191                 # exponents equal: move to next stage.
 192                 with m.Else():
 193                     m.next = "add_0"
 194
 195             # ******
 196             # First stage of add.  covers same-sign (add) and subtract
 197             # special-casing when mantissas are greater or equal, to
 198             # give greatest accuracy.
 199
 200             with m.State("add_0"):
 201                 m.next = "add_1"
 202                 m.d.sync += z.e.eq(a.e)
 203                 # same-sign (both negative or both positive) add mantissas
 204                 with m.If(a.s == b.s):
 205                     m.d.sync += [
 206                         tot.eq(a.m + b.m),
 207                         z_s.eq(a.s)
 208                     ]
 209                 # a mantissa greater than b, use a
 210                 with m.Elif(a.m >= b.m):
 211                     m.d.sync += [
 212                         tot.eq(a.m - b.m),
 213                         z_s.eq(a.s)
 214                     ]
 215                 # b mantissa greater than a, use b
 216                 with m.Else():
 217                     m.d.sync += [
 218                         tot.eq(b.m - a.m),
 219                         z_s.eq(b.s)
 220                 ]
 221
 222             # ******
 223             # Second stage of add: preparation for normalisation.
 224             # detects when tot sum is too big (tot[27] is kinda a carry bit)
 225
 226             with m.State("add_1"):
 227                 m.next = "normalise_1"
 228                 # tot[27] gets set when the sum overflows. shift result down
 229                 with m.If(tot[27]):
 230                     m.d.sync += [
 231                         z.m.eq(tot[4:28]),
 232                         guard.eq(tot[3]),
 233                         round_bit.eq(tot[2]),
 234                         sticky.eq(tot[1] | tot[0]),
 235                         z.e.eq(z.e + 1)
 236                 ]
 237                 # tot[27] zero case
 238                 with m.Else():
 239                     m.d.sync += [
 240                         z.m.eq(tot[3:27]),
 241                         guard.eq(tot[2]),
 242                         round_bit.eq(tot[1]),
 243                         sticky.eq(tot[0])
 244                 ]
 245
 246             # ******
 247             # First stage of normalisation.
 248             # NOTE: just like "align", this one keeps going round every clock
 249             #       until the result's exponent is within acceptable "range"
 250             # NOTE: the weirdness of reassigning guard and round is due to
 251             #       the extra mantissa bits coming from tot[0..2]
 252
 253             with m.State("normalise_1"):
 254                 with m.If((z.m[23] == 0) & (z.e > -126)):
 255                     m.d.sync +=[
 256                         z.e.eq(z.e - 1),  # DECREASE exponent
 257                         z.m.eq(z.m << 1), # shift mantissa UP
 258                         z.m[0].eq(guard), # steal guard bit (was tot[2])
 259                         guard.eq(round_bit), # steal round_bit (was tot[1])
 260                     ]
 261                 with m.Else():
 262                     m.next = "normalize_2"
 263
 264             # ******
 265             # Second stage of normalisation.
 266             # NOTE: just like "align", this one keeps going round every clock
 267             #       until the result's exponent is within acceptable "range"
 268             # NOTE: the weirdness of reassigning guard and round is due to
 269             #       the extra mantissa bits coming from tot[0..2]
 270
 271             with m.State("normalise_2"):
 272                 with m.If(z.e < -126):
 273                     m.d.sync +=[
 274                         z.e.eq(z.e + 1),  # INCREASE exponent
 275                         z.m.eq(z.m >> 1), # shift mantissa DOWN
 276                         guard.eq(z.m[0]),
 277                         round_bit.eq(guard),
 278                         sticky.eq(sticky | round_bit)
 279                     ]
 280                 with m.Else():
 281                     m.next = "round"
 282
 283             # ******
 284             # rounding stage
 285
 286             with m.State("round"):
 287                 m.next = "pack"
 288                 with m.If(guard & (round_bit | sticky | z.m[0])):
 289                     m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
 290                     with m.If(z.m == 0xffffff): # all 1s
 291                         m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
 292
 293         return m
 294
 295 """
 296   always @(posedge clk)
 297   begin
 298
 299     case(state)
 300
 301       get_a:
 302       begin
 303         s_in_a_ack <= 1;
 304         if (s_in_a_ack && in_a_stb) begin
 305           a <= in_a;
 306           s_in_a_ack <= 0;
 307           state <= get_b;
 308         end
 309       end
 310
 311       get_b:
 312       begin
 313         s_in_b_ack <= 1;
 314         if (s_in_b_ack && in_b_stb) begin
 315           b <= in_b;
 316           s_in_b_ack <= 0;
 317           state <= unpack;
 318         end
 319       end
 320
 321       unpack:
 322       begin
 323         a_m <= {a[22 : 0], 3'd0};
 324         b_m <= {b[22 : 0], 3'd0};
 325         a_e <= a[30 : 23] - 127;
 326         b_e <= b[30 : 23] - 127;
 327         a_s <= a[31];
 328         b_s <= b[31];
 329         state <= special_cases;
 330       end
 331
 332       special_cases:
 333       begin
 334         //if a is NaN or b is NaN return NaN
 335         if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
 336           z[31] <= 1;
 337           z[30:23] <= 255;
 338           z[22] <= 1;
 339           z[21:0] <= 0;
 340           state <= put_z;
 341         //if a is inf return inf
 342         end else if (a_e == 128) begin
 343           z[31] <= a_s;
 344           z[30:23] <= 255;
 345           z[22:0] <= 0;
 346           //if a is inf and signs don't match return nan
 347           if ((b_e == 128) && (a_s != b_s)) begin
 348               z[31] <= b_s;
 349               z[30:23] <= 255;
 350               z[22] <= 1;
 351               z[21:0] <= 0;
 352           end
 353           state <= put_z;
 354         //if b is inf return inf
 355         end else if (b_e == 128) begin
 356           z[31] <= b_s;
 357           z[30:23] <= 255;
 358           z[22:0] <= 0;
 359           state <= put_z;
 360         //if a is zero return b
 361         end else if ((($signed(a_e) == -127) && (a_m == 0)) && (($signed(b_e) == -127) && (b_m == 0))) begin
 362           z[31] <= a_s & b_s;
 363           z[30:23] <= b_e[7:0] + 127;
 364           z[22:0] <= b_m[26:3];
 365           state <= put_z;
 366         //if a is zero return b
 367         end else if (($signed(a_e) == -127) && (a_m == 0)) begin
 368           z[31] <= b_s;
 369           z[30:23] <= b_e[7:0] + 127;
 370           z[22:0] <= b_m[26:3];
 371           state <= put_z;
 372         //if b is zero return a
 373         end else if (($signed(b_e) == -127) && (b_m == 0)) begin
 374           z[31] <= a_s;
 375           z[30:23] <= a_e[7:0] + 127;
 376           z[22:0] <= a_m[26:3];
 377           state <= put_z;
 378         end else begin
 379           //Denormalised Number
 380           if ($signed(a_e) == -127) begin
 381             a_e <= -126;
 382           end else begin
 383             a_m[26] <= 1;
 384           end
 385           //Denormalised Number
 386           if ($signed(b_e) == -127) begin
 387             b_e <= -126;
 388           end else begin
 389             b_m[26] <= 1;
 390           end
 391           state <= align;
 392         end
 393       end
 394
 395       align:
 396       begin
 397         if ($signed(a_e) > $signed(b_e)) begin
 398           b_e <= b_e + 1;
 399           b_m <= b_m >> 1;
 400           b_m[0] <= b_m[0] | b_m[1];
 401         end else if ($signed(a_e) < $signed(b_e)) begin
 402           a_e <= a_e + 1;
 403           a_m <= a_m >> 1;
 404           a_m[0] <= a_m[0] | a_m[1];
 405         end else begin
 406           state <= add_0;
 407         end
 408       end
 409
 410       add_0:
 411       begin
 412         z_e <= a_e;
 413         if (a_s == b_s) begin
 414           tot <= a_m + b_m;
 415           z_s <= a_s;
 416         end else begin
 417           if (a_m >= b_m) begin
 418             tot <= a_m - b_m;
 419             z_s <= a_s;
 420           end else begin
 421             tot <= b_m - a_m;
 422             z_s <= b_s;
 423           end
 424         end
 425         state <= add_1;
 426       end
 427
 428       add_1:
 429       begin
 430         if (tot[27]) begin
 431           z_m <= tot[27:4];
 432           guard <= tot[3];
 433           round_bit <= tot[2];
 434           sticky <= tot[1] | tot[0];
 435           z_e <= z_e + 1;
 436         end else begin
 437           z_m <= tot[26:3];
 438           guard <= tot[2];
 439           round_bit <= tot[1];
 440           sticky <= tot[0];
 441         end
 442         state <= normalise_1;
 443       end
 444
 445       normalise_1:
 446       begin
 447         if (z_m[23] == 0 && $signed(z_e) > -126) begin
 448           z_e <= z_e - 1;
 449           z_m <= z_m << 1;
 450           z_m[0] <= guard;
 451           guard <= round_bit;
 452           round_bit <= 0;
 453         end else begin
 454           state <= normalise_2;
 455         end
 456       end
 457
 458       normalise_2:
 459       begin
 460         if ($signed(z_e) < -126) begin
 461           z_e <= z_e + 1;
 462           z_m <= z_m >> 1;
 463           guard <= z_m[0];
 464           round_bit <= guard;
 465           sticky <= sticky | round_bit;
 466         end else begin
 467           state <= round;
 468         end
 469       end
 470
 471       round:
 472       begin
 473         if (guard && (round_bit | sticky | z_m[0])) begin
 474           z_m <= z_m + 1;
 475           if (z_m == 24'hffffff) begin
 476             z_e <=z_e + 1;
 477           end
 478         end
 479         state <= pack;
 480       end
 481
 482       pack:
 483       begin
 484         z[22 : 0] <= z_m[22:0];
 485         z[30 : 23] <= z_e[7:0] + 127;
 486         z[31] <= z_s;
 487         if ($signed(z_e) == -126 && z_m[23] == 0) begin
 488           z[30 : 23] <= 0;
 489         end
 490         if ($signed(z_e) == -126 && z_m[23:0] == 24'h0) begin
 491           z[31] <= 1'b0; // FIX SIGN BUG: -a + a = +0.
 492         end
 493         //if overflow occurs, return inf
 494         if ($signed(z_e) > 127) begin
 495           z[22 : 0] <= 0;
 496           z[30 : 23] <= 255;
 497           z[31] <= z_s;
 498         end
 499         state <= put_z;
 500       end
 501
 502       put_z:
 503       begin
 504         s_out_z_stb <= 1;
 505         s_out_z <= z;
 506         if (s_out_z_stb && out_z_ack) begin
 507           s_out_z_stb <= 0;
 508           state <= get_a;
 509         end
 510       end
 511
 512     endcase
 513
 514     if (rst == 1) begin
 515       state <= get_a;
 516       s_in_a_ack <= 0;
 517       s_in_b_ack <= 0;
 518       s_out_z_stb <= 0;
 519     end
 520
 521   end
 522   assign in_a_ack = s_in_a_ack;
 523   assign in_b_ack = s_in_b_ack;
 524   assign out_z_stb = s_out_z_stb;
 525   assign out_z = s_out_z;
 526
 527 endmodule
 528 """
 529
 530 if __name__ == "__main__":
 531     alu = FPADD(width=32)
 532     main(alu, ports=[
 533                     alu.in_a, alu.in_a_stb, alu.in_a_ack,
 534                     alu.in_b, alu.in_b_stb, alu.in_b_ack,
 535                     alu.out_z, alu.out_z_stb, alu.out_z_ack,
 536         ])