src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 #from fpbase import FPNumShiftMultiRight
  13
  14
  15 class FPState(FPBase):
  16     def __init__(self, state_from):
  17         self.state_from = state_from
  18
  19     def set_inputs(self, inputs):
  20         self.inputs = inputs
  21         for k,v in inputs.items():
  22             setattr(self, k, v)
  23
  24     def set_outputs(self, outputs):
  25         self.outputs = outputs
  26         for k,v in outputs.items():
  27             setattr(self, k, v)
  28
  29
  30 class FPGetSyncOpsMod:
  31     def __init__(self, width, num_ops=2):
  32         self.width = width
  33         self.num_ops = num_ops
  34         inops = []
  35         outops = []
  36         for i in range(num_ops):
  37             inops.append(Signal(width, reset_less=True))
  38             outops.append(Signal(width, reset_less=True))
  39         self.in_op = inops
  40         self.out_op = outops
  41         self.stb = Signal(num_ops)
  42         self.ack = Signal()
  43         self.ready = Signal(reset_less=True)
  44         self.out_decode = Signal(reset_less=True)
  45
  46     def elaborate(self, platform):
  47         m = Module()
  48         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  49         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  50         with m.If(self.out_decode):
  51             for i in range(self.num_ops):
  52                 m.d.comb += [
  53                         self.out_op[i].eq(self.in_op[i]),
  54                 ]
  55         return m
  56
  57     def ports(self):
  58         return self.in_op + self.out_op + [self.stb, self.ack]
  59
  60
  61 class FPOps(Trigger):
  62     def __init__(self, width, num_ops):
  63         Trigger.__init__(self)
  64         self.width = width
  65         self.num_ops = num_ops
  66
  67         res = []
  68         for i in range(num_ops):
  69             res.append(Signal(width))
  70         self.v  = Array(res)
  71
  72     def ports(self):
  73         res = []
  74         for i in range(self.num_ops):
  75             res.append(self.v[i])
  76         res.append(self.ack)
  77         res.append(self.stb)
  78         return res
  79
  80
  81 class InputGroup:
  82     def __init__(self, width, num_ops=2, num_rows=4):
  83         self.width = width
  84         self.num_ops = num_ops
  85         self.num_rows = num_rows
  86         self.mmax = int(log(self.num_rows) / log(2))
  87         self.rs = []
  88         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  89         for i in range(num_rows):
  90             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  91         self.rs = Array(self.rs)
  92
  93         self.out_op = FPOps(width, num_ops)
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97
  98         pe = PriorityEncoder(self.num_rows)
  99         m.submodules.selector = pe
 100         m.submodules.out_op = self.out_op
 101         m.submodules.out_op_v = self.out_op.v
 102         m.submodules += self.rs
 103
 104         # connect priority encoder
 105         in_ready = []
 106         for i in range(self.num_rows):
 107             in_ready.append(self.rs[i].ready)
 108         m.d.comb += pe.i.eq(Cat(*in_ready))
 109
 110         active = Signal(reset_less=True)
 111         out_en = Signal(reset_less=True)
 112         m.d.comb += active.eq(~pe.n) # encoder active
 113         m.d.comb += out_en.eq(active & self.out_op.trigger)
 114
 115         # encoder active: ack relevant input, record MID, pass output
 116         with m.If(out_en):
 117             rs = self.rs[pe.o]
 118             m.d.sync += self.mid.eq(pe.o)
 119             m.d.sync += rs.ack.eq(0)
 120             m.d.sync += self.out_op.stb.eq(0)
 121             for j in range(self.num_ops):
 122                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 123         with m.Else():
 124             m.d.sync += self.out_op.stb.eq(1)
 125             # acks all default to zero
 126             for i in range(self.num_rows):
 127                 m.d.sync += self.rs[i].ack.eq(1)
 128
 129         return m
 130
 131     def ports(self):
 132         res = []
 133         for i in range(self.num_rows):
 134             inop = self.rs[i]
 135             res += inop.in_op + [inop.stb]
 136         return self.out_op.ports() + res + [self.mid]
 137
 138
 139 class FPGetOpMod:
 140     def __init__(self, width):
 141         self.in_op = FPOp(width)
 142         self.out_op = Signal(width)
 143         self.out_decode = Signal(reset_less=True)
 144
 145     def elaborate(self, platform):
 146         m = Module()
 147         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 148         m.submodules.get_op_in = self.in_op
 149         #m.submodules.get_op_out = self.out_op
 150         with m.If(self.out_decode):
 151             m.d.comb += [
 152                 self.out_op.eq(self.in_op.v),
 153             ]
 154         return m
 155
 156
 157 class FPGetOp(FPState):
 158     """ gets operand
 159     """
 160
 161     def __init__(self, in_state, out_state, in_op, width):
 162         FPState.__init__(self, in_state)
 163         self.out_state = out_state
 164         self.mod = FPGetOpMod(width)
 165         self.in_op = in_op
 166         self.out_op = Signal(width)
 167         self.out_decode = Signal(reset_less=True)
 168
 169     def setup(self, m, in_op):
 170         """ links module to inputs and outputs
 171         """
 172         setattr(m.submodules, self.state_from, self.mod)
 173         m.d.comb += self.mod.in_op.copy(in_op)
 174         #m.d.comb += self.out_op.eq(self.mod.out_op)
 175         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 176
 177     def action(self, m):
 178         with m.If(self.out_decode):
 179             m.next = self.out_state
 180             m.d.sync += [
 181                 self.in_op.ack.eq(0),
 182                 self.out_op.eq(self.mod.out_op)
 183             ]
 184         with m.Else():
 185             m.d.sync += self.in_op.ack.eq(1)
 186
 187
 188 class FPGet2OpMod(Trigger):
 189     def __init__(self, width):
 190         Trigger.__init__(self)
 191         self.in_op1 = Signal(width, reset_less=True)
 192         self.in_op2 = Signal(width, reset_less=True)
 193         self.out_op1 = FPNumIn(None, width)
 194         self.out_op2 = FPNumIn(None, width)
 195
 196     def elaborate(self, platform):
 197         m = Trigger.elaborate(self, platform)
 198         #m.submodules.get_op_in = self.in_op
 199         m.submodules.get_op1_out = self.out_op1
 200         m.submodules.get_op2_out = self.out_op2
 201         with m.If(self.trigger):
 202             m.d.comb += [
 203                 self.out_op1.decode(self.in_op1),
 204                 self.out_op2.decode(self.in_op2),
 205             ]
 206         return m
 207
 208
 209 class FPGet2Op(FPState):
 210     """ gets operands
 211     """
 212
 213     def __init__(self, in_state, out_state, in_op1, in_op2, width):
 214         FPState.__init__(self, in_state)
 215         self.out_state = out_state
 216         self.mod = FPGet2OpMod(width)
 217         self.in_op1 = in_op1
 218         self.in_op2 = in_op2
 219         self.out_op1 = FPNumIn(None, width)
 220         self.out_op2 = FPNumIn(None, width)
 221         self.in_stb = Signal(reset_less=True)
 222         self.out_ack = Signal(reset_less=True)
 223         self.out_decode = Signal(reset_less=True)
 224
 225     def setup(self, m, in_op1, in_op2, in_stb, in_ack):
 226         """ links module to inputs and outputs
 227         """
 228         m.submodules.get_ops = self.mod
 229         m.d.comb += self.mod.in_op1.eq(in_op1)
 230         m.d.comb += self.mod.in_op2.eq(in_op2)
 231         m.d.comb += self.mod.stb.eq(in_stb)
 232         m.d.comb += self.out_ack.eq(self.mod.ack)
 233         m.d.comb += self.out_decode.eq(self.mod.trigger)
 234         m.d.comb += in_ack.eq(self.mod.ack)
 235
 236     def action(self, m):
 237         with m.If(self.out_decode):
 238             m.next = self.out_state
 239             m.d.sync += [
 240                 self.mod.ack.eq(0),
 241                 #self.out_op1.v.eq(self.mod.out_op1.v),
 242                 #self.out_op2.v.eq(self.mod.out_op2.v),
 243                 self.out_op1.copy(self.mod.out_op1),
 244                 self.out_op2.copy(self.mod.out_op2)
 245             ]
 246         with m.Else():
 247             m.d.sync += self.mod.ack.eq(1)
 248
 249
 250 class FPAddSpecialCasesMod:
 251     """ special cases: NaNs, infs, zeros, denormalised
 252         NOTE: some of these are unique to add.  see "Special Operations"
 253         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 254     """
 255
 256     def __init__(self, width):
 257         self.in_a = FPNumBase(width)
 258         self.in_b = FPNumBase(width)
 259         self.out_z = FPNumOut(width, False)
 260         self.out_do_z = Signal(reset_less=True)
 261
 262     def setup(self, m, in_a, in_b, out_do_z):
 263         """ links module to inputs and outputs
 264         """
 265         m.submodules.specialcases = self
 266         m.d.comb += self.in_a.copy(in_a)
 267         m.d.comb += self.in_b.copy(in_b)
 268         m.d.comb += out_do_z.eq(self.out_do_z)
 269
 270     def elaborate(self, platform):
 271         m = Module()
 272
 273         m.submodules.sc_in_a = self.in_a
 274         m.submodules.sc_in_b = self.in_b
 275         m.submodules.sc_out_z = self.out_z
 276
 277         s_nomatch = Signal()
 278         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 279
 280         m_match = Signal()
 281         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 282
 283         # if a is NaN or b is NaN return NaN
 284         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 285             m.d.comb += self.out_do_z.eq(1)
 286             m.d.comb += self.out_z.nan(0)
 287
 288         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 289         # under review
 290
 291         ## if a is zero and b is NaN return -b
 292         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 293         #    m.d.comb += self.out_do_z.eq(1)
 294         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 295
 296         ## if b is zero and a is NaN return -a
 297         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 298         #    m.d.comb += self.out_do_z.eq(1)
 299         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 300
 301         ## if a is -zero and b is NaN return -b
 302         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 303         #    m.d.comb += self.out_do_z.eq(1)
 304         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 305
 306         ## if b is -zero and a is NaN return -a
 307         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 308         #    m.d.comb += self.out_do_z.eq(1)
 309         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 310
 311         # if a is inf return inf (or NaN)
 312         with m.Elif(self.in_a.is_inf):
 313             m.d.comb += self.out_do_z.eq(1)
 314             m.d.comb += self.out_z.inf(self.in_a.s)
 315             # if a is inf and signs don't match return NaN
 316             with m.If(self.in_b.exp_128 & s_nomatch):
 317                 m.d.comb += self.out_z.nan(0)
 318
 319         # if b is inf return inf
 320         with m.Elif(self.in_b.is_inf):
 321             m.d.comb += self.out_do_z.eq(1)
 322             m.d.comb += self.out_z.inf(self.in_b.s)
 323
 324         # if a is zero and b zero return signed-a/b
 325         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 326             m.d.comb += self.out_do_z.eq(1)
 327             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 328                                           self.in_b.e,
 329                                           self.in_b.m[3:-1])
 330
 331         # if a is zero return b
 332         with m.Elif(self.in_a.is_zero):
 333             m.d.comb += self.out_do_z.eq(1)
 334             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 335                                       self.in_b.m[3:-1])
 336
 337         # if b is zero return a
 338         with m.Elif(self.in_b.is_zero):
 339             m.d.comb += self.out_do_z.eq(1)
 340             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 341                                       self.in_a.m[3:-1])
 342
 343         # if a equal to -b return zero (+ve zero)
 344         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 345             m.d.comb += self.out_do_z.eq(1)
 346             m.d.comb += self.out_z.zero(0)
 347
 348         # Denormalised Number checks
 349         with m.Else():
 350             m.d.comb += self.out_do_z.eq(0)
 351
 352         return m
 353
 354
 355 class FPID:
 356     def __init__(self, id_wid):
 357         self.id_wid = id_wid
 358         if self.id_wid:
 359             self.in_mid = Signal(id_wid, reset_less=True)
 360             self.out_mid = Signal(id_wid, reset_less=True)
 361         else:
 362             self.in_mid = None
 363             self.out_mid = None
 364
 365     def idsync(self, m):
 366         if self.id_wid is not None:
 367             m.d.sync += self.out_mid.eq(self.in_mid)
 368
 369
 370 class FPAddSpecialCases(FPState, FPID):
 371     """ special cases: NaNs, infs, zeros, denormalised
 372         NOTE: some of these are unique to add.  see "Special Operations"
 373         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 374     """
 375
 376     def __init__(self, width, id_wid):
 377         FPState.__init__(self, "special_cases")
 378         FPID.__init__(self, id_wid)
 379         self.mod = FPAddSpecialCasesMod(width)
 380         self.out_z = FPNumOut(width, False)
 381         self.out_do_z = Signal(reset_less=True)
 382
 383     def setup(self, m, in_a, in_b, in_mid):
 384         """ links module to inputs and outputs
 385         """
 386         self.mod.setup(m, in_a, in_b, self.out_do_z)
 387         if self.in_mid is not None:
 388             m.d.comb += self.in_mid.eq(in_mid)
 389
 390     def action(self, m):
 391         self.idsync(m)
 392         with m.If(self.out_do_z):
 393             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 394             m.next = "put_z"
 395         with m.Else():
 396             m.next = "denormalise"
 397
 398
 399 class FPAddSpecialCasesDeNorm(FPState, FPID):
 400     """ special cases: NaNs, infs, zeros, denormalised
 401         NOTE: some of these are unique to add.  see "Special Operations"
 402         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 403     """
 404
 405     def __init__(self, width, id_wid):
 406         FPState.__init__(self, "special_cases")
 407         FPID.__init__(self, id_wid)
 408         self.smod = FPAddSpecialCasesMod(width)
 409         self.out_z = FPNumOut(width, False)
 410         self.out_do_z = Signal(reset_less=True)
 411
 412         self.dmod = FPAddDeNormMod(width)
 413         self.out_a = FPNumBase(width)
 414         self.out_b = FPNumBase(width)
 415
 416     def setup(self, m, in_a, in_b, in_mid):
 417         """ links module to inputs and outputs
 418         """
 419         self.smod.setup(m, in_a, in_b, self.out_do_z)
 420         self.dmod.setup(m, in_a, in_b)
 421         if self.in_mid is not None:
 422             m.d.comb += self.in_mid.eq(in_mid)
 423
 424     def action(self, m):
 425         self.idsync(m)
 426         with m.If(self.out_do_z):
 427             m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
 428             m.next = "put_z"
 429         with m.Else():
 430             m.next = "align"
 431             m.d.sync += self.out_a.copy(self.dmod.out_a)
 432             m.d.sync += self.out_b.copy(self.dmod.out_b)
 433
 434
 435 class FPAddDeNormMod(FPState):
 436
 437     def __init__(self, width):
 438         self.in_a = FPNumBase(width)
 439         self.in_b = FPNumBase(width)
 440         self.out_a = FPNumBase(width)
 441         self.out_b = FPNumBase(width)
 442
 443     def setup(self, m, in_a, in_b):
 444         """ links module to inputs and outputs
 445         """
 446         m.submodules.denormalise = self
 447         m.d.comb += self.in_a.copy(in_a)
 448         m.d.comb += self.in_b.copy(in_b)
 449
 450     def elaborate(self, platform):
 451         m = Module()
 452         m.submodules.denorm_in_a = self.in_a
 453         m.submodules.denorm_in_b = self.in_b
 454         m.submodules.denorm_out_a = self.out_a
 455         m.submodules.denorm_out_b = self.out_b
 456         # hmmm, don't like repeating identical code
 457         m.d.comb += self.out_a.copy(self.in_a)
 458         with m.If(self.in_a.exp_n127):
 459             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 460         with m.Else():
 461             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 462
 463         m.d.comb += self.out_b.copy(self.in_b)
 464         with m.If(self.in_b.exp_n127):
 465             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 466         with m.Else():
 467             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 468
 469         return m
 470
 471
 472 class FPAddDeNorm(FPState, FPID):
 473
 474     def __init__(self, width, id_wid):
 475         FPState.__init__(self, "denormalise")
 476         FPID.__init__(self, id_wid)
 477         self.mod = FPAddDeNormMod(width)
 478         self.out_a = FPNumBase(width)
 479         self.out_b = FPNumBase(width)
 480
 481     def setup(self, m, in_a, in_b, in_mid):
 482         """ links module to inputs and outputs
 483         """
 484         self.mod.setup(m, in_a, in_b)
 485         if self.in_mid is not None:
 486             m.d.comb += self.in_mid.eq(in_mid)
 487
 488     def action(self, m):
 489         self.idsync(m)
 490         # Denormalised Number checks
 491         m.next = "align"
 492         m.d.sync += self.out_a.copy(self.mod.out_a)
 493         m.d.sync += self.out_b.copy(self.mod.out_b)
 494
 495
 496 class FPAddAlignMultiMod(FPState):
 497
 498     def __init__(self, width):
 499         self.in_a = FPNumBase(width)
 500         self.in_b = FPNumBase(width)
 501         self.out_a = FPNumIn(None, width)
 502         self.out_b = FPNumIn(None, width)
 503         self.exp_eq = Signal(reset_less=True)
 504
 505     def elaborate(self, platform):
 506         # This one however (single-cycle) will do the shift
 507         # in one go.
 508
 509         m = Module()
 510
 511         m.submodules.align_in_a = self.in_a
 512         m.submodules.align_in_b = self.in_b
 513         m.submodules.align_out_a = self.out_a
 514         m.submodules.align_out_b = self.out_b
 515
 516         # NOTE: this does *not* do single-cycle multi-shifting,
 517         #       it *STAYS* in the align state until exponents match
 518
 519         # exponent of a greater than b: shift b down
 520         m.d.comb += self.exp_eq.eq(0)
 521         m.d.comb += self.out_a.copy(self.in_a)
 522         m.d.comb += self.out_b.copy(self.in_b)
 523         agtb = Signal(reset_less=True)
 524         altb = Signal(reset_less=True)
 525         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 526         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 527         with m.If(agtb):
 528             m.d.comb += self.out_b.shift_down(self.in_b)
 529         # exponent of b greater than a: shift a down
 530         with m.Elif(altb):
 531             m.d.comb += self.out_a.shift_down(self.in_a)
 532         # exponents equal: move to next stage.
 533         with m.Else():
 534             m.d.comb += self.exp_eq.eq(1)
 535         return m
 536
 537
 538 class FPAddAlignMulti(FPState, FPID):
 539
 540     def __init__(self, width, id_wid):
 541         FPID.__init__(self, id_wid)
 542         FPState.__init__(self, "align")
 543         self.mod = FPAddAlignMultiMod(width)
 544         self.out_a = FPNumIn(None, width)
 545         self.out_b = FPNumIn(None, width)
 546         self.exp_eq = Signal(reset_less=True)
 547
 548     def setup(self, m, in_a, in_b, in_mid):
 549         """ links module to inputs and outputs
 550         """
 551         m.submodules.align = self.mod
 552         m.d.comb += self.mod.in_a.copy(in_a)
 553         m.d.comb += self.mod.in_b.copy(in_b)
 554         #m.d.comb += self.out_a.copy(self.mod.out_a)
 555         #m.d.comb += self.out_b.copy(self.mod.out_b)
 556         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 557         if self.in_mid is not None:
 558             m.d.comb += self.in_mid.eq(in_mid)
 559
 560     def action(self, m):
 561         self.idsync(m)
 562         m.d.sync += self.out_a.copy(self.mod.out_a)
 563         m.d.sync += self.out_b.copy(self.mod.out_b)
 564         with m.If(self.exp_eq):
 565             m.next = "add_0"
 566
 567
 568 class FPAddAlignSingleMod:
 569
 570     def __init__(self, width):
 571         self.width = width
 572         self.in_a = FPNumBase(width)
 573         self.in_b = FPNumBase(width)
 574         self.out_a = FPNumIn(None, width)
 575         self.out_b = FPNumIn(None, width)
 576
 577     def setup(self, m, in_a, in_b):
 578         """ links module to inputs and outputs
 579         """
 580         m.submodules.align = self
 581         m.d.comb += self.in_a.copy(in_a)
 582         m.d.comb += self.in_b.copy(in_b)
 583
 584     def elaborate(self, platform):
 585         """ Aligns A against B or B against A, depending on which has the
 586             greater exponent.  This is done in a *single* cycle using
 587             variable-width bit-shift
 588
 589             the shifter used here is quite expensive in terms of gates.
 590             Mux A or B in (and out) into temporaries, as only one of them
 591             needs to be aligned against the other
 592         """
 593         m = Module()
 594
 595         m.submodules.align_in_a = self.in_a
 596         m.submodules.align_in_b = self.in_b
 597         m.submodules.align_out_a = self.out_a
 598         m.submodules.align_out_b = self.out_b
 599
 600         # temporary (muxed) input and output to be shifted
 601         t_inp = FPNumBase(self.width)
 602         t_out = FPNumIn(None, self.width)
 603         espec = (len(self.in_a.e), True)
 604         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 605         m.submodules.align_t_in = t_inp
 606         m.submodules.align_t_out = t_out
 607         m.submodules.multishift_r = msr
 608
 609         ediff = Signal(espec, reset_less=True)
 610         ediffr = Signal(espec, reset_less=True)
 611         tdiff = Signal(espec, reset_less=True)
 612         elz = Signal(reset_less=True)
 613         egz = Signal(reset_less=True)
 614
 615         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 616         m.d.comb += msr.inp.eq(t_inp.m)
 617         m.d.comb += msr.diff.eq(tdiff)
 618         m.d.comb += t_out.m.eq(msr.m)
 619         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 620         m.d.comb += t_out.s.eq(t_inp.s)
 621
 622         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 623         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 624         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 625         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 626
 627         # default: A-exp == B-exp, A and B untouched (fall through)
 628         m.d.comb += self.out_a.copy(self.in_a)
 629         m.d.comb += self.out_b.copy(self.in_b)
 630         # only one shifter (muxed)
 631         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 632         # exponent of a greater than b: shift b down
 633         with m.If(egz):
 634             m.d.comb += [t_inp.copy(self.in_b),
 635                          tdiff.eq(ediff),
 636                          self.out_b.copy(t_out),
 637                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 638                         ]
 639         # exponent of b greater than a: shift a down
 640         with m.Elif(elz):
 641             m.d.comb += [t_inp.copy(self.in_a),
 642                          tdiff.eq(ediffr),
 643                          self.out_a.copy(t_out),
 644                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 645                         ]
 646         return m
 647
 648
 649 class FPAddAlignSingle(FPState, FPID):
 650
 651     def __init__(self, width, id_wid):
 652         FPState.__init__(self, "align")
 653         FPID.__init__(self, id_wid)
 654         self.mod = FPAddAlignSingleMod(width)
 655         self.out_a = FPNumIn(None, width)
 656         self.out_b = FPNumIn(None, width)
 657
 658     def setup(self, m, in_a, in_b, in_mid):
 659         """ links module to inputs and outputs
 660         """
 661         self.mod.setup(m, in_a, in_b)
 662         if self.in_mid is not None:
 663             m.d.comb += self.in_mid.eq(in_mid)
 664
 665     def action(self, m):
 666         self.idsync(m)
 667         # NOTE: could be done as comb
 668         m.d.sync += self.out_a.copy(self.mod.out_a)
 669         m.d.sync += self.out_b.copy(self.mod.out_b)
 670         m.next = "add_0"
 671
 672
 673 class FPAddAlignSingleAdd(FPState, FPID):
 674
 675     def __init__(self, width, id_wid):
 676         FPState.__init__(self, "align")
 677         FPID.__init__(self, id_wid)
 678         self.mod = FPAddAlignSingleMod(width)
 679         self.out_a = FPNumIn(None, width)
 680         self.out_b = FPNumIn(None, width)
 681
 682         self.a0mod = FPAddStage0Mod(width)
 683         self.a0_out_z = FPNumBase(width, False)
 684         self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
 685         self.a0_out_z = FPNumBase(width, False)
 686
 687         self.a1mod = FPAddStage1Mod(width)
 688         self.out_z = FPNumBase(width, False)
 689         self.out_of = Overflow()
 690
 691     def setup(self, m, in_a, in_b, in_mid):
 692         """ links module to inputs and outputs
 693         """
 694         self.mod.setup(m, in_a, in_b)
 695         m.d.comb += self.out_a.copy(self.mod.out_a)
 696         m.d.comb += self.out_b.copy(self.mod.out_b)
 697
 698         self.a0mod.setup(m, self.out_a, self.out_b)
 699         m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
 700         m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
 701
 702         self.a1mod.setup(m, self.out_tot, self.a0_out_z)
 703
 704         if self.in_mid is not None:
 705             m.d.comb += self.in_mid.eq(in_mid)
 706
 707     def action(self, m):
 708         self.idsync(m)
 709         m.d.sync += self.out_of.copy(self.a1mod.out_of)
 710         m.d.sync += self.out_z.copy(self.a1mod.out_z)
 711         m.next = "normalise_1"
 712
 713
 714 class FPAddStage0Mod:
 715
 716     def __init__(self, width):
 717         self.in_a = FPNumBase(width)
 718         self.in_b = FPNumBase(width)
 719         self.in_z = FPNumBase(width, False)
 720         self.out_z = FPNumBase(width, False)
 721         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 722
 723     def setup(self, m, in_a, in_b):
 724         """ links module to inputs and outputs
 725         """
 726         m.submodules.add0 = self
 727         m.d.comb += self.in_a.copy(in_a)
 728         m.d.comb += self.in_b.copy(in_b)
 729
 730     def elaborate(self, platform):
 731         m = Module()
 732         m.submodules.add0_in_a = self.in_a
 733         m.submodules.add0_in_b = self.in_b
 734         m.submodules.add0_out_z = self.out_z
 735
 736         m.d.comb += self.out_z.e.eq(self.in_a.e)
 737
 738         # store intermediate tests (and zero-extended mantissas)
 739         seq = Signal(reset_less=True)
 740         mge = Signal(reset_less=True)
 741         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 742         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 743         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 744                      mge.eq(self.in_a.m >= self.in_b.m),
 745                      am0.eq(Cat(self.in_a.m, 0)),
 746                      bm0.eq(Cat(self.in_b.m, 0))
 747                     ]
 748         # same-sign (both negative or both positive) add mantissas
 749         with m.If(seq):
 750             m.d.comb += [
 751                 self.out_tot.eq(am0 + bm0),
 752                 self.out_z.s.eq(self.in_a.s)
 753             ]
 754         # a mantissa greater than b, use a
 755         with m.Elif(mge):
 756             m.d.comb += [
 757                 self.out_tot.eq(am0 - bm0),
 758                 self.out_z.s.eq(self.in_a.s)
 759             ]
 760         # b mantissa greater than a, use b
 761         with m.Else():
 762             m.d.comb += [
 763                 self.out_tot.eq(bm0 - am0),
 764                 self.out_z.s.eq(self.in_b.s)
 765         ]
 766         return m
 767
 768
 769 class FPAddStage0(FPState, FPID):
 770     """ First stage of add.  covers same-sign (add) and subtract
 771         special-casing when mantissas are greater or equal, to
 772         give greatest accuracy.
 773     """
 774
 775     def __init__(self, width, id_wid):
 776         FPState.__init__(self, "add_0")
 777         FPID.__init__(self, id_wid)
 778         self.mod = FPAddStage0Mod(width)
 779         self.out_z = FPNumBase(width, False)
 780         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 781
 782     def setup(self, m, in_a, in_b, in_mid):
 783         """ links module to inputs and outputs
 784         """
 785         self.mod.setup(m, in_a, in_b)
 786         if self.in_mid is not None:
 787             m.d.comb += self.in_mid.eq(in_mid)
 788
 789     def action(self, m):
 790         self.idsync(m)
 791         # NOTE: these could be done as combinatorial (merge add0+add1)
 792         m.d.sync += self.out_z.copy(self.mod.out_z)
 793         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 794         m.next = "add_1"
 795
 796
 797 class FPAddStage1Mod(FPState):
 798     """ Second stage of add: preparation for normalisation.
 799         detects when tot sum is too big (tot[27] is kinda a carry bit)
 800     """
 801
 802     def __init__(self, width):
 803         self.out_norm = Signal(reset_less=True)
 804         self.in_z = FPNumBase(width, False)
 805         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 806         self.out_z = FPNumBase(width, False)
 807         self.out_of = Overflow()
 808
 809     def setup(self, m, in_tot, in_z):
 810         """ links module to inputs and outputs
 811         """
 812         m.submodules.add1 = self
 813         m.submodules.add1_out_overflow = self.out_of
 814
 815         m.d.comb += self.in_z.copy(in_z)
 816         m.d.comb += self.in_tot.eq(in_tot)
 817
 818     def elaborate(self, platform):
 819         m = Module()
 820         #m.submodules.norm1_in_overflow = self.in_of
 821         #m.submodules.norm1_out_overflow = self.out_of
 822         #m.submodules.norm1_in_z = self.in_z
 823         #m.submodules.norm1_out_z = self.out_z
 824         m.d.comb += self.out_z.copy(self.in_z)
 825         # tot[27] gets set when the sum overflows. shift result down
 826         with m.If(self.in_tot[-1]):
 827             m.d.comb += [
 828                 self.out_z.m.eq(self.in_tot[4:]),
 829                 self.out_of.m0.eq(self.in_tot[4]),
 830                 self.out_of.guard.eq(self.in_tot[3]),
 831                 self.out_of.round_bit.eq(self.in_tot[2]),
 832                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 833                 self.out_z.e.eq(self.in_z.e + 1)
 834         ]
 835         # tot[27] zero case
 836         with m.Else():
 837             m.d.comb += [
 838                 self.out_z.m.eq(self.in_tot[3:]),
 839                 self.out_of.m0.eq(self.in_tot[3]),
 840                 self.out_of.guard.eq(self.in_tot[2]),
 841                 self.out_of.round_bit.eq(self.in_tot[1]),
 842                 self.out_of.sticky.eq(self.in_tot[0])
 843         ]
 844         return m
 845
 846
 847 class FPAddStage1(FPState, FPID):
 848
 849     def __init__(self, width, id_wid):
 850         FPState.__init__(self, "add_1")
 851         FPID.__init__(self, id_wid)
 852         self.mod = FPAddStage1Mod(width)
 853         self.out_z = FPNumBase(width, False)
 854         self.out_of = Overflow()
 855         self.norm_stb = Signal()
 856
 857     def setup(self, m, in_tot, in_z, in_mid):
 858         """ links module to inputs and outputs
 859         """
 860         self.mod.setup(m, in_tot, in_z)
 861
 862         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 863
 864         if self.in_mid is not None:
 865             m.d.comb += self.in_mid.eq(in_mid)
 866
 867     def action(self, m):
 868         self.idsync(m)
 869         m.d.sync += self.out_of.copy(self.mod.out_of)
 870         m.d.sync += self.out_z.copy(self.mod.out_z)
 871         m.d.sync += self.norm_stb.eq(1)
 872         m.next = "normalise_1"
 873
 874
 875 class FPNorm1ModSingle:
 876
 877     def __init__(self, width):
 878         self.width = width
 879         self.out_norm = Signal(reset_less=True)
 880         self.in_z = FPNumBase(width, False)
 881         self.in_of = Overflow()
 882         self.out_z = FPNumBase(width, False)
 883         self.out_of = Overflow()
 884
 885     def setup(self, m, in_z, in_of, out_z):
 886         """ links module to inputs and outputs
 887         """
 888         m.submodules.normalise_1 = self
 889
 890         m.d.comb += self.in_z.copy(in_z)
 891         m.d.comb += self.in_of.copy(in_of)
 892
 893         m.d.comb += out_z.copy(self.out_z)
 894
 895     def elaborate(self, platform):
 896         m = Module()
 897
 898         mwid = self.out_z.m_width+2
 899         pe = PriorityEncoder(mwid)
 900         m.submodules.norm_pe = pe
 901
 902         m.submodules.norm1_out_z = self.out_z
 903         m.submodules.norm1_out_overflow = self.out_of
 904         m.submodules.norm1_in_z = self.in_z
 905         m.submodules.norm1_in_overflow = self.in_of
 906
 907         in_z = FPNumBase(self.width, False)
 908         in_of = Overflow()
 909         m.submodules.norm1_insel_z = in_z
 910         m.submodules.norm1_insel_overflow = in_of
 911
 912         espec = (len(in_z.e), True)
 913         ediff_n126 = Signal(espec, reset_less=True)
 914         msr = MultiShiftRMerge(mwid, espec)
 915         m.submodules.multishift_r = msr
 916
 917         m.d.comb += in_z.copy(self.in_z)
 918         m.d.comb += in_of.copy(self.in_of)
 919         # initialise out from in (overridden below)
 920         m.d.comb += self.out_z.copy(in_z)
 921         m.d.comb += self.out_of.copy(in_of)
 922         # normalisation increase/decrease conditions
 923         decrease = Signal(reset_less=True)
 924         increase = Signal(reset_less=True)
 925         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 926         m.d.comb += increase.eq(in_z.exp_lt_n126)
 927         # decrease exponent
 928         with m.If(decrease):
 929             # *sigh* not entirely obvious: count leading zeros (clz)
 930             # with a PriorityEncoder: to find from the MSB
 931             # we reverse the order of the bits.
 932             temp_m = Signal(mwid, reset_less=True)
 933             temp_s = Signal(mwid+1, reset_less=True)
 934             clz = Signal((len(in_z.e), True), reset_less=True)
 935             # make sure that the amount to decrease by does NOT
 936             # go below the minimum non-INF/NaN exponent
 937             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 938                          in_z.exp_sub_n126)
 939             m.d.comb += [
 940                 # cat round and guard bits back into the mantissa
 941                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 942                 pe.i.eq(temp_m[::-1]),          # inverted
 943                 clz.eq(limclz),                 # count zeros from MSB down
 944                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 945                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 946                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 947                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 948                 # overflow in bits 0..1: got shifted too (leave sticky)
 949                 self.out_of.guard.eq(temp_s[1]),     # guard
 950                 self.out_of.round_bit.eq(temp_s[0]), # round
 951             ]
 952         # increase exponent
 953         with m.Elif(increase):
 954             temp_m = Signal(mwid+1, reset_less=True)
 955             m.d.comb += [
 956                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 957                               in_z.m)),
 958                 ediff_n126.eq(in_z.N126 - in_z.e),
 959                 # connect multi-shifter to inp/out mantissa (and ediff)
 960                 msr.inp.eq(temp_m),
 961                 msr.diff.eq(ediff_n126),
 962                 self.out_z.m.eq(msr.m[3:]),
 963                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 964                 # overflow in bits 0..1: got shifted too (leave sticky)
 965                 self.out_of.guard.eq(temp_s[2]),     # guard
 966                 self.out_of.round_bit.eq(temp_s[1]), # round
 967                 self.out_of.sticky.eq(temp_s[0]), # sticky
 968                 self.out_z.e.eq(in_z.e + ediff_n126),
 969             ]
 970
 971         return m
 972
 973
 974 class FPNorm1ModMulti:
 975
 976     def __init__(self, width, single_cycle=True):
 977         self.width = width
 978         self.in_select = Signal(reset_less=True)
 979         self.out_norm = Signal(reset_less=True)
 980         self.in_z = FPNumBase(width, False)
 981         self.in_of = Overflow()
 982         self.temp_z = FPNumBase(width, False)
 983         self.temp_of = Overflow()
 984         self.out_z = FPNumBase(width, False)
 985         self.out_of = Overflow()
 986
 987     def elaborate(self, platform):
 988         m = Module()
 989
 990         m.submodules.norm1_out_z = self.out_z
 991         m.submodules.norm1_out_overflow = self.out_of
 992         m.submodules.norm1_temp_z = self.temp_z
 993         m.submodules.norm1_temp_of = self.temp_of
 994         m.submodules.norm1_in_z = self.in_z
 995         m.submodules.norm1_in_overflow = self.in_of
 996
 997         in_z = FPNumBase(self.width, False)
 998         in_of = Overflow()
 999         m.submodules.norm1_insel_z = in_z
1000         m.submodules.norm1_insel_overflow = in_of
1001
1002         # select which of temp or in z/of to use
1003         with m.If(self.in_select):
1004             m.d.comb += in_z.copy(self.in_z)
1005             m.d.comb += in_of.copy(self.in_of)
1006         with m.Else():
1007             m.d.comb += in_z.copy(self.temp_z)
1008             m.d.comb += in_of.copy(self.temp_of)
1009         # initialise out from in (overridden below)
1010         m.d.comb += self.out_z.copy(in_z)
1011         m.d.comb += self.out_of.copy(in_of)
1012         # normalisation increase/decrease conditions
1013         decrease = Signal(reset_less=True)
1014         increase = Signal(reset_less=True)
1015         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1016         m.d.comb += increase.eq(in_z.exp_lt_n126)
1017         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1018         # decrease exponent
1019         with m.If(decrease):
1020             m.d.comb += [
1021                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1022                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1023                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1024                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1025                 self.out_of.round_bit.eq(0),        # reset round bit
1026                 self.out_of.m0.eq(in_of.guard),
1027             ]
1028         # increase exponent
1029         with m.Elif(increase):
1030             m.d.comb += [
1031                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1032                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1033                 self.out_of.guard.eq(in_z.m[0]),
1034                 self.out_of.m0.eq(in_z.m[1]),
1035                 self.out_of.round_bit.eq(in_of.guard),
1036                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1037             ]
1038
1039         return m
1040
1041
1042 class FPNorm1Single(FPState, FPID):
1043
1044     def __init__(self, width, id_wid, single_cycle=True):
1045         FPID.__init__(self, id_wid)
1046         FPState.__init__(self, "normalise_1")
1047         self.mod = FPNorm1ModSingle(width)
1048         self.out_norm = Signal(reset_less=True)
1049         self.out_z = FPNumBase(width)
1050         self.out_roundz = Signal(reset_less=True)
1051
1052     def setup(self, m, in_z, in_of, in_mid):
1053         """ links module to inputs and outputs
1054         """
1055         self.mod.setup(m, in_z, in_of, self.out_z)
1056
1057         if self.in_mid is not None:
1058             m.d.comb += self.in_mid.eq(in_mid)
1059
1060     def action(self, m):
1061         self.idsync(m)
1062         m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1063         m.next = "round"
1064
1065
1066 class FPNorm1Multi(FPState, FPID):
1067
1068     def __init__(self, width, id_wid):
1069         FPID.__init__(self, id_wid)
1070         FPState.__init__(self, "normalise_1")
1071         self.mod = FPNorm1ModMulti(width)
1072         self.stb = Signal(reset_less=True)
1073         self.ack = Signal(reset=0, reset_less=True)
1074         self.out_norm = Signal(reset_less=True)
1075         self.in_accept = Signal(reset_less=True)
1076         self.temp_z = FPNumBase(width)
1077         self.temp_of = Overflow()
1078         self.out_z = FPNumBase(width)
1079         self.out_roundz = Signal(reset_less=True)
1080
1081     def setup(self, m, in_z, in_of, norm_stb, in_mid):
1082         """ links module to inputs and outputs
1083         """
1084         self.mod.setup(m, in_z, in_of, norm_stb,
1085                        self.in_accept, self.temp_z, self.temp_of,
1086                        self.out_z, self.out_norm)
1087
1088         m.d.comb += self.stb.eq(norm_stb)
1089         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1090
1091         if self.in_mid is not None:
1092             m.d.comb += self.in_mid.eq(in_mid)
1093
1094     def action(self, m):
1095         self.idsync(m)
1096         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1097         m.d.sync += self.temp_of.copy(self.mod.out_of)
1098         m.d.sync += self.temp_z.copy(self.out_z)
1099         with m.If(self.out_norm):
1100             with m.If(self.in_accept):
1101                 m.d.sync += [
1102                     self.ack.eq(1),
1103                 ]
1104             with m.Else():
1105                 m.d.sync += self.ack.eq(0)
1106         with m.Else():
1107             # normalisation not required (or done).
1108             m.next = "round"
1109             m.d.sync += self.ack.eq(1)
1110             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1111
1112
1113 class FPNormToPack(FPState, FPID):
1114
1115     def __init__(self, width, id_wid):
1116         FPID.__init__(self, id_wid)
1117         FPState.__init__(self, "normalise_1")
1118         self.width = width
1119
1120     def setup(self, m, in_z, in_of, in_mid):
1121         """ links module to inputs and outputs
1122         """
1123
1124         # Normalisation (chained to input in_z+in_of)
1125         nmod = FPNorm1ModSingle(self.width)
1126         n_out_z = FPNumBase(self.width)
1127         n_out_roundz = Signal(reset_less=True)
1128         nmod.setup(m, in_z, in_of, n_out_z)
1129
1130         # Rounding (chained to normalisation)
1131         rmod = FPRoundMod(self.width)
1132         r_out_z = FPNumBase(self.width)
1133         rmod.setup(m, n_out_z, n_out_roundz)
1134         m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1135         m.d.comb += r_out_z.copy(rmod.out_z)
1136
1137         # Corrections (chained to rounding)
1138         cmod = FPCorrectionsMod(self.width)
1139         c_out_z = FPNumBase(self.width)
1140         cmod.setup(m, r_out_z)
1141         m.d.comb += c_out_z.copy(cmod.out_z)
1142
1143         # Pack (chained to corrections)
1144         self.pmod = FPPackMod(self.width)
1145         self.out_z = FPNumBase(self.width)
1146         self.pmod.setup(m, c_out_z)
1147
1148         # Multiplex ID
1149         if self.in_mid is not None:
1150             m.d.comb += self.in_mid.eq(in_mid)
1151
1152     def action(self, m):
1153         self.idsync(m) # copies incoming ID to outgoing
1154         m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1155         m.next = "pack_put_z"
1156
1157
1158 class FPRoundMod:
1159
1160     def __init__(self, width):
1161         self.in_roundz = Signal(reset_less=True)
1162         self.in_z = FPNumBase(width, False)
1163         self.out_z = FPNumBase(width, False)
1164
1165     def setup(self, m, in_z, roundz):
1166         m.submodules.roundz = self
1167
1168         m.d.comb += self.in_z.copy(in_z)
1169         m.d.comb += self.in_roundz.eq(roundz)
1170
1171     def elaborate(self, platform):
1172         m = Module()
1173         m.d.comb += self.out_z.copy(self.in_z)
1174         with m.If(self.in_roundz):
1175             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1176             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1177                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1178         return m
1179
1180
1181 class FPRound(FPState, FPID):
1182
1183     def __init__(self, width, id_wid):
1184         FPState.__init__(self, "round")
1185         FPID.__init__(self, id_wid)
1186         self.mod = FPRoundMod(width)
1187         self.out_z = FPNumBase(width)
1188
1189     def setup(self, m, in_z, roundz, in_mid):
1190         """ links module to inputs and outputs
1191         """
1192         self.mod.setup(m, in_z, roundz)
1193
1194         if self.in_mid is not None:
1195             m.d.comb += self.in_mid.eq(in_mid)
1196
1197     def action(self, m):
1198         self.idsync(m)
1199         m.d.sync += self.out_z.copy(self.mod.out_z)
1200         m.next = "corrections"
1201
1202
1203 class FPCorrectionsMod:
1204
1205     def __init__(self, width):
1206         self.in_z = FPNumOut(width, False)
1207         self.out_z = FPNumOut(width, False)
1208
1209     def setup(self, m, in_z):
1210         """ links module to inputs and outputs
1211         """
1212         m.submodules.corrections = self
1213         m.d.comb += self.in_z.copy(in_z)
1214
1215     def elaborate(self, platform):
1216         m = Module()
1217         m.submodules.corr_in_z = self.in_z
1218         m.submodules.corr_out_z = self.out_z
1219         m.d.comb += self.out_z.copy(self.in_z)
1220         with m.If(self.in_z.is_denormalised):
1221             m.d.comb += self.out_z.e.eq(self.in_z.N127)
1222         return m
1223
1224
1225 class FPCorrections(FPState, FPID):
1226
1227     def __init__(self, width, id_wid):
1228         FPState.__init__(self, "corrections")
1229         FPID.__init__(self, id_wid)
1230         self.mod = FPCorrectionsMod(width)
1231         self.out_z = FPNumBase(width)
1232
1233     def setup(self, m, in_z, in_mid):
1234         """ links module to inputs and outputs
1235         """
1236         self.mod.setup(m, in_z)
1237         if self.in_mid is not None:
1238             m.d.comb += self.in_mid.eq(in_mid)
1239
1240     def action(self, m):
1241         self.idsync(m)
1242         m.d.sync += self.out_z.copy(self.mod.out_z)
1243         m.next = "pack"
1244
1245
1246 class FPPackMod:
1247
1248     def __init__(self, width):
1249         self.in_z = FPNumOut(width, False)
1250         self.out_z = FPNumOut(width, False)
1251
1252     def setup(self, m, in_z):
1253         """ links module to inputs and outputs
1254         """
1255         m.submodules.pack = self
1256         m.d.comb += self.in_z.copy(in_z)
1257
1258     def elaborate(self, platform):
1259         m = Module()
1260         m.submodules.pack_in_z = self.in_z
1261         with m.If(self.in_z.is_overflowed):
1262             m.d.comb += self.out_z.inf(self.in_z.s)
1263         with m.Else():
1264             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1265         return m
1266
1267
1268 class FPPack(FPState, FPID):
1269
1270     def __init__(self, width, id_wid):
1271         FPState.__init__(self, "pack")
1272         FPID.__init__(self, id_wid)
1273         self.mod = FPPackMod(width)
1274         self.out_z = FPNumOut(width, False)
1275
1276     def setup(self, m, in_z, in_mid):
1277         """ links module to inputs and outputs
1278         """
1279         self.mod.setup(m, in_z)
1280         if self.in_mid is not None:
1281             m.d.comb += self.in_mid.eq(in_mid)
1282
1283     def action(self, m):
1284         self.idsync(m)
1285         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1286         m.next = "pack_put_z"
1287
1288
1289 class FPPutZ(FPState):
1290
1291     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1292         FPState.__init__(self, state)
1293         if to_state is None:
1294             to_state = "get_ops"
1295         self.to_state = to_state
1296         self.in_z = in_z
1297         self.out_z = out_z
1298         self.in_mid = in_mid
1299         self.out_mid = out_mid
1300
1301     def action(self, m):
1302         if self.in_mid is not None:
1303             m.d.sync += self.out_mid.eq(self.in_mid)
1304         m.d.sync += [
1305           self.out_z.v.eq(self.in_z.v)
1306         ]
1307         with m.If(self.out_z.stb & self.out_z.ack):
1308             m.d.sync += self.out_z.stb.eq(0)
1309             m.next = self.to_state
1310         with m.Else():
1311             m.d.sync += self.out_z.stb.eq(1)
1312
1313
1314 class FPPutZIdx(FPState):
1315
1316     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1317         FPState.__init__(self, state)
1318         if to_state is None:
1319             to_state = "get_ops"
1320         self.to_state = to_state
1321         self.in_z = in_z
1322         self.out_zs = out_zs
1323         self.in_mid = in_mid
1324
1325     def action(self, m):
1326         outz_stb = Signal(reset_less=True)
1327         outz_ack = Signal(reset_less=True)
1328         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1329                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1330                     ]
1331         m.d.sync += [
1332           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1333         ]
1334         with m.If(outz_stb & outz_ack):
1335             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1336             m.next = self.to_state
1337         with m.Else():
1338             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1339
1340
1341 class FPADDBaseMod(FPID):
1342
1343     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1344         """ IEEE754 FP Add
1345
1346             * width: bit-width of IEEE754.  supported: 16, 32, 64
1347             * id_wid: an identifier that is sync-connected to the input
1348             * single_cycle: True indicates each stage to complete in 1 clock
1349             * compact: True indicates a reduced number of stages
1350         """
1351         FPID.__init__(self, id_wid)
1352         self.width = width
1353         self.single_cycle = single_cycle
1354         self.compact = compact
1355
1356         self.in_t = Trigger()
1357         self.in_a  = Signal(width)
1358         self.in_b  = Signal(width)
1359         self.out_z = FPOp(width)
1360
1361         self.states = []
1362
1363     def add_state(self, state):
1364         self.states.append(state)
1365         return state
1366
1367     def get_fragment(self, platform=None):
1368         """ creates the HDL code-fragment for FPAdd
1369         """
1370         m = Module()
1371         m.submodules.out_z = self.out_z
1372         m.submodules.in_t = self.in_t
1373         if self.compact:
1374             self.get_compact_fragment(m, platform)
1375         else:
1376             self.get_longer_fragment(m, platform)
1377
1378         with m.FSM() as fsm:
1379
1380             for state in self.states:
1381                 with m.State(state.state_from):
1382                     state.action(m)
1383
1384         return m
1385
1386     def get_longer_fragment(self, m, platform=None):
1387
1388         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1389                                       self.in_a, self.in_b, self.width))
1390         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1391         a = get.out_op1
1392         b = get.out_op2
1393
1394         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1395         sc.setup(m, a, b, self.in_mid)
1396
1397         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1398         dn.setup(m, a, b, sc.in_mid)
1399
1400         if self.single_cycle:
1401             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1402             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1403         else:
1404             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1405             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1406
1407         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1408         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1409
1410         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1411         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1412
1413         if self.single_cycle:
1414             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1415             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1416         else:
1417             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1418             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1419
1420         rn = self.add_state(FPRound(self.width, self.id_wid))
1421         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1422
1423         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1424         cor.setup(m, rn.out_z, rn.in_mid)
1425
1426         pa = self.add_state(FPPack(self.width, self.id_wid))
1427         pa.setup(m, cor.out_z, rn.in_mid)
1428
1429         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1430                                     pa.in_mid, self.out_mid))
1431
1432         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1433                                     pa.in_mid, self.out_mid))
1434
1435     def get_compact_fragment(self, m, platform=None):
1436
1437         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1438                                       self.in_a, self.in_b, self.width))
1439         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1440         a = get.out_op1
1441         b = get.out_op2
1442
1443         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1444         sc.setup(m, a, b, self.in_mid)
1445
1446         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1447         alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1448
1449         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1450         n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1451
1452         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1453                                     n1.in_mid, self.out_mid))
1454
1455         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1456                                     sc.in_mid, self.out_mid))
1457
1458
1459 class FPADDBase(FPState, FPID):
1460
1461     def __init__(self, width, id_wid=None, single_cycle=False):
1462         """ IEEE754 FP Add
1463
1464             * width: bit-width of IEEE754.  supported: 16, 32, 64
1465             * id_wid: an identifier that is sync-connected to the input
1466             * single_cycle: True indicates each stage to complete in 1 clock
1467         """
1468         FPID.__init__(self, id_wid)
1469         FPState.__init__(self, "fpadd")
1470         self.width = width
1471         self.single_cycle = single_cycle
1472         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1473
1474         self.in_t = Trigger()
1475         self.in_a  = Signal(width)
1476         self.in_b  = Signal(width)
1477         #self.out_z = FPOp(width)
1478
1479         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1480         self.in_accept = Signal(reset_less=True)
1481         self.add_stb = Signal(reset_less=True)
1482         self.add_ack = Signal(reset=0, reset_less=True)
1483
1484     def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1485         self.out_z = out_z
1486         self.out_mid = out_mid
1487         m.d.comb += [self.in_a.eq(a),
1488                      self.in_b.eq(b),
1489                      self.mod.in_a.eq(self.in_a),
1490                      self.mod.in_b.eq(self.in_b),
1491                      self.in_mid.eq(in_mid),
1492                      self.mod.in_mid.eq(self.in_mid),
1493                      self.z_done.eq(self.mod.out_z.trigger),
1494                      #self.add_stb.eq(add_stb),
1495                      self.mod.in_t.stb.eq(self.in_t.stb),
1496                      self.in_t.ack.eq(self.mod.in_t.ack),
1497                      self.out_mid.eq(self.mod.out_mid),
1498                      self.out_z.v.eq(self.mod.out_z.v),
1499                      self.out_z.stb.eq(self.mod.out_z.stb),
1500                      self.mod.out_z.ack.eq(self.out_z.ack),
1501                     ]
1502
1503         m.d.sync += self.add_stb.eq(add_stb)
1504         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1505         m.d.sync += self.out_z.ack.eq(0) # likewise
1506         #m.d.sync += self.in_t.stb.eq(0)
1507
1508         m.submodules.fpadd = self.mod
1509
1510     def action(self, m):
1511
1512         # in_accept is set on incoming strobe HIGH and ack LOW.
1513         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1514
1515         #with m.If(self.in_t.ack):
1516         #    m.d.sync += self.in_t.stb.eq(0)
1517         with m.If(~self.z_done):
1518             # not done: test for accepting an incoming operand pair
1519             with m.If(self.in_accept):
1520                 m.d.sync += [
1521                     self.add_ack.eq(1), # acknowledge receipt...
1522                     self.in_t.stb.eq(1), # initiate add
1523                 ]
1524             with m.Else():
1525                 m.d.sync += [self.add_ack.eq(0),
1526                              self.in_t.stb.eq(0),
1527                              self.out_z.ack.eq(1),
1528                             ]
1529         with m.Else():
1530             # done: acknowledge, and write out id and value
1531             m.d.sync += [self.add_ack.eq(1),
1532                          self.in_t.stb.eq(0)
1533                         ]
1534             m.next = "put_z"
1535
1536             return
1537
1538             if self.in_mid is not None:
1539                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1540
1541             m.d.sync += [
1542               self.out_z.v.eq(self.mod.out_z.v)
1543             ]
1544             # move to output state on detecting z ack
1545             with m.If(self.out_z.trigger):
1546                 m.d.sync += self.out_z.stb.eq(0)
1547                 m.next = "put_z"
1548             with m.Else():
1549                 m.d.sync += self.out_z.stb.eq(1)
1550
1551 class ResArray:
1552     def __init__(self, width, id_wid):
1553         self.width = width
1554         self.id_wid = id_wid
1555         res = []
1556         for i in range(rs_sz):
1557             out_z = FPOp(width)
1558             out_z.name = "out_z_%d" % i
1559             res.append(out_z)
1560         self.res = Array(res)
1561         self.in_z = FPOp(width)
1562         self.in_mid = Signal(self.id_wid, reset_less=True)
1563
1564     def setup(self, m, in_z, in_mid):
1565         m.d.comb += [self.in_z.copy(in_z),
1566                      self.in_mid.eq(in_mid)]
1567
1568     def get_fragment(self, platform=None):
1569         """ creates the HDL code-fragment for FPAdd
1570         """
1571         m = Module()
1572         m.submodules.res_in_z = self.in_z
1573         m.submodules += self.res
1574
1575         return m
1576
1577     def ports(self):
1578         res = []
1579         for z in self.res:
1580             res += z.ports()
1581         return res
1582
1583
1584 class FPADD(FPID):
1585     """ FPADD: stages as follows:
1586
1587         FPGetOp (a)
1588            |
1589         FPGetOp (b)
1590            |
1591         FPAddBase---> FPAddBaseMod
1592            |            |
1593         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1594
1595         FPAddBase is tricky: it is both a stage and *has* stages.
1596         Connection to FPAddBaseMod therefore requires an in stb/ack
1597         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1598         needs to be the thing that raises the incoming stb.
1599     """
1600
1601     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1602         """ IEEE754 FP Add
1603
1604             * width: bit-width of IEEE754.  supported: 16, 32, 64
1605             * id_wid: an identifier that is sync-connected to the input
1606             * single_cycle: True indicates each stage to complete in 1 clock
1607         """
1608         self.width = width
1609         self.id_wid = id_wid
1610         self.single_cycle = single_cycle
1611
1612         #self.out_z = FPOp(width)
1613         self.ids = FPID(id_wid)
1614
1615         rs = []
1616         for i in range(rs_sz):
1617             in_a  = FPOp(width)
1618             in_b  = FPOp(width)
1619             in_a.name = "in_a_%d" % i
1620             in_b.name = "in_b_%d" % i
1621             rs.append((in_a, in_b))
1622         self.rs = Array(rs)
1623
1624         res = []
1625         for i in range(rs_sz):
1626             out_z = FPOp(width)
1627             out_z.name = "out_z_%d" % i
1628             res.append(out_z)
1629         self.res = Array(res)
1630
1631         self.states = []
1632
1633     def add_state(self, state):
1634         self.states.append(state)
1635         return state
1636
1637     def get_fragment(self, platform=None):
1638         """ creates the HDL code-fragment for FPAdd
1639         """
1640         m = Module()
1641         m.submodules += self.rs
1642
1643         in_a = self.rs[0][0]
1644         in_b = self.rs[0][1]
1645
1646         out_z = FPOp(self.width)
1647         out_mid = Signal(self.id_wid, reset_less=True)
1648         m.submodules.out_z = out_z
1649
1650         geta = self.add_state(FPGetOp("get_a", "get_b",
1651                                       in_a, self.width))
1652         geta.setup(m, in_a)
1653         a = geta.out_op
1654
1655         getb = self.add_state(FPGetOp("get_b", "fpadd",
1656                                       in_b, self.width))
1657         getb.setup(m, in_b)
1658         b = getb.out_op
1659
1660         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1661         ab = self.add_state(ab)
1662         ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1663                  out_z, out_mid)
1664
1665         pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1666                                     out_mid, "get_a"))
1667
1668         with m.FSM() as fsm:
1669
1670             for state in self.states:
1671                 with m.State(state.state_from):
1672                     state.action(m)
1673
1674         return m
1675
1676
1677 if __name__ == "__main__":
1678     if True:
1679         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1680         main(alu, ports=alu.rs[0][0].ports() + \
1681                         alu.rs[0][1].ports() + \
1682                         alu.res[0].ports() + \
1683                         [alu.ids.in_mid, alu.ids.out_mid])
1684     else:
1685         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1686         main(alu, ports=[alu.in_a, alu.in_b] + \
1687                         alu.in_t.ports() + \
1688                         alu.out_z.ports() + \
1689                         [alu.in_mid, alu.out_mid])
1690
1691
1692     # works... but don't use, just do "python fname.py convert -t v"
1693     #print (verilog.convert(alu, ports=[
1694     #                        ports=alu.in_a.ports() + \
1695     #                              alu.in_b.ports() + \
1696     #                              alu.out_z.ports())