X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Fnmigen_add_experiment.py;h=92c1b6772c0eefbb0e6bce33a2572d6de505b25b;hb=ad26042b4e313d8f1273ff8bac9bac317440bffc;hp=8a7f00fab6f69327a34f440521a51b4f145c1027;hpb=3597dda29683c1b06bd70edc882f4585f1243350;p=ieee754fpu.git diff --git a/src/add/nmigen_add_experiment.py b/src/add/nmigen_add_experiment.py index 8a7f00fa..92c1b677 100644 --- a/src/add/nmigen_add_experiment.py +++ b/src/add/nmigen_add_experiment.py @@ -7,7 +7,7 @@ from nmigen.lib.coding import PriorityEncoder from nmigen.cli import main, verilog from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase -from fpbase import MultiShiftRMerge +from fpbase import MultiShiftRMerge, Trigger #from fpbase import FPNumShiftMultiRight class FPState(FPBase): @@ -28,17 +28,17 @@ class FPState(FPBase): class FPGetOpMod: def __init__(self, width): self.in_op = FPOp(width) - self.out_op = FPNumIn(self.in_op, width) + self.out_op = Signal(width) self.out_decode = Signal(reset_less=True) def elaborate(self, platform): m = Module() m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb)) - #m.submodules.get_op_in = self.in_op - m.submodules.get_op_out = self.out_op + m.submodules.get_op_in = self.in_op + #m.submodules.get_op_out = self.out_op with m.If(self.out_decode): m.d.comb += [ - self.out_op.decode(self.in_op.v), + self.out_op.eq(self.in_op.v), ] return m @@ -52,7 +52,7 @@ class FPGetOp(FPState): self.out_state = out_state self.mod = FPGetOpMod(width) self.in_op = in_op - self.out_op = FPNumIn(in_op, width) + self.out_op = Signal(width) self.out_decode = Signal(reset_less=True) def setup(self, m, in_op): @@ -60,7 +60,7 @@ class FPGetOp(FPState): """ setattr(m.submodules, self.state_from, self.mod) m.d.comb += self.mod.in_op.copy(in_op) - m.d.comb += self.out_op.v.eq(self.mod.out_op.v) + #m.d.comb += self.out_op.eq(self.mod.out_op) m.d.comb += self.out_decode.eq(self.mod.out_decode) def action(self, m): @@ -68,12 +68,74 @@ class FPGetOp(FPState): m.next = self.out_state m.d.sync += [ self.in_op.ack.eq(0), - self.out_op.copy(self.mod.out_op) + self.out_op.eq(self.mod.out_op) ] with m.Else(): m.d.sync += self.in_op.ack.eq(1) +class FPGet2OpMod(Trigger): + def __init__(self, width): + Trigger.__init__(self) + self.in_op1 = Signal(width, reset_less=True) + self.in_op2 = Signal(width, reset_less=True) + self.out_op1 = FPNumIn(None, width) + self.out_op2 = FPNumIn(None, width) + + def elaborate(self, platform): + m = Trigger.elaborate(self, platform) + #m.submodules.get_op_in = self.in_op + m.submodules.get_op1_out = self.out_op1 + m.submodules.get_op2_out = self.out_op2 + with m.If(self.trigger): + m.d.comb += [ + self.out_op1.decode(self.in_op1), + self.out_op2.decode(self.in_op2), + ] + return m + + +class FPGet2Op(FPState): + """ gets operands + """ + + def __init__(self, in_state, out_state, in_op1, in_op2, width): + FPState.__init__(self, in_state) + self.out_state = out_state + self.mod = FPGet2OpMod(width) + self.in_op1 = in_op1 + self.in_op2 = in_op2 + self.out_op1 = FPNumIn(None, width) + self.out_op2 = FPNumIn(None, width) + self.in_stb = Signal(reset_less=True) + self.out_ack = Signal(reset_less=True) + self.out_decode = Signal(reset_less=True) + + def setup(self, m, in_op1, in_op2, in_stb, in_ack): + """ links module to inputs and outputs + """ + m.submodules.get_ops = self.mod + m.d.comb += self.mod.in_op1.eq(in_op1) + m.d.comb += self.mod.in_op2.eq(in_op2) + m.d.comb += self.mod.stb.eq(in_stb) + m.d.comb += self.out_ack.eq(self.mod.ack) + m.d.comb += self.out_decode.eq(self.mod.trigger) + m.d.comb += in_ack.eq(self.mod.ack) + + def action(self, m): + with m.If(self.out_decode): + m.next = self.out_state + m.d.sync += [ + self.mod.ack.eq(0), + #self.out_op1.v.eq(self.mod.out_op1.v), + #self.out_op2.v.eq(self.mod.out_op2.v), + self.out_op1.copy(self.mod.out_op1), + self.out_op2.copy(self.mod.out_op2) + ] + with m.Else(): + m.d.sync += self.mod.ack.eq(1) + + class FPAddSpecialCasesMod: """ special cases: NaNs, infs, zeros, denormalised NOTE: some of these are unique to add. see "Special Operations" @@ -86,6 +148,14 @@ class FPAddSpecialCasesMod: self.out_z = FPNumOut(width, False) self.out_do_z = Signal(reset_less=True) + def setup(self, m, in_a, in_b, out_do_z): + """ links module to inputs and outputs + """ + m.submodules.specialcases = self + m.d.comb += self.in_a.copy(in_a) + m.d.comb += self.in_b.copy(in_b) + m.d.comb += out_do_z.eq(self.out_do_z) + def elaborate(self, platform): m = Module() @@ -175,14 +245,14 @@ class FPID: def __init__(self, id_wid): self.id_wid = id_wid if self.id_wid: - self.in_mid = Signal(width, reset_less) - self.out_mid = Signal(width, reset_less) + self.in_mid = Signal(id_wid, reset_less=True) + self.out_mid = Signal(id_wid, reset_less=True) else: self.in_mid = None self.out_mid = None def idsync(self, m): - if self.id_wid: + if self.id_wid is not None: m.d.sync += self.out_mid.eq(self.in_mid) @@ -202,12 +272,8 @@ class FPAddSpecialCases(FPState, FPID): def setup(self, m, in_a, in_b, in_mid): """ links module to inputs and outputs """ - m.submodules.specialcases = self.mod - m.d.comb += self.mod.in_a.copy(in_a) - m.d.comb += self.mod.in_b.copy(in_b) - #m.d.comb += self.out_z.v.eq(self.mod.out_z.v) - m.d.comb += self.out_do_z.eq(self.mod.out_do_z) - if self.in_mid: + self.mod.setup(m, in_a, in_b, self.out_do_z) + if self.in_mid is not None: m.d.comb += self.in_mid.eq(in_mid) def action(self, m): @@ -264,7 +330,7 @@ class FPAddDeNorm(FPState, FPID): m.submodules.denormalise = self.mod m.d.comb += self.mod.in_a.copy(in_a) m.d.comb += self.mod.in_b.copy(in_b) - if self.in_mid: + if self.in_mid is not None: m.d.comb += self.in_mid.eq(in_mid) def action(self, m): @@ -317,16 +383,17 @@ class FPAddAlignMultiMod(FPState): return m -class FPAddAlignMulti(FPState): +class FPAddAlignMulti(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): + FPID.__init__(self, id_wid) FPState.__init__(self, "align") self.mod = FPAddAlignMultiMod(width) self.out_a = FPNumIn(None, width) self.out_b = FPNumIn(None, width) self.exp_eq = Signal(reset_less=True) - def setup(self, m, in_a, in_b): + def setup(self, m, in_a, in_b, in_mid): """ links module to inputs and outputs """ m.submodules.align = self.mod @@ -335,8 +402,11 @@ class FPAddAlignMulti(FPState): #m.d.comb += self.out_a.copy(self.mod.out_a) #m.d.comb += self.out_b.copy(self.mod.out_b) m.d.comb += self.exp_eq.eq(self.mod.exp_eq) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): + self.idsync(m) m.d.sync += self.out_a.copy(self.mod.out_a) m.d.sync += self.out_b.copy(self.mod.out_b) with m.If(self.exp_eq): @@ -417,22 +487,26 @@ class FPAddAlignSingleMod: return m -class FPAddAlignSingle(FPState): +class FPAddAlignSingle(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "align") + FPID.__init__(self, id_wid) self.mod = FPAddAlignSingleMod(width) self.out_a = FPNumIn(None, width) self.out_b = FPNumIn(None, width) - def setup(self, m, in_a, in_b): + def setup(self, m, in_a, in_b, in_mid): """ links module to inputs and outputs """ m.submodules.align = self.mod m.d.comb += self.mod.in_a.copy(in_a) m.d.comb += self.mod.in_b.copy(in_b) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): + self.idsync(m) # NOTE: could be done as comb m.d.sync += self.out_a.copy(self.mod.out_a) m.d.sync += self.out_b.copy(self.mod.out_b) @@ -487,31 +561,34 @@ class FPAddStage0Mod: return m -class FPAddStage0(FPState): +class FPAddStage0(FPState, FPID): """ First stage of add. covers same-sign (add) and subtract special-casing when mantissas are greater or equal, to give greatest accuracy. """ - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "add_0") + FPID.__init__(self, id_wid) self.mod = FPAddStage0Mod(width) self.out_z = FPNumBase(width, False) self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True) - def setup(self, m, in_a, in_b): + def setup(self, m, in_a, in_b, in_mid): """ links module to inputs and outputs """ m.submodules.add0 = self.mod - m.d.comb += self.mod.in_a.copy(in_a) m.d.comb += self.mod.in_b.copy(in_b) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): - m.next = "add_1" + self.idsync(m) # NOTE: these could be done as combinatorial (merge add0+add1) m.d.sync += self.out_z.copy(self.mod.out_z) m.d.sync += self.out_tot.eq(self.mod.out_tot) + m.next = "add_1" class FPAddStage1Mod(FPState): @@ -555,27 +632,32 @@ class FPAddStage1Mod(FPState): return m -class FPAddStage1(FPState): +class FPAddStage1(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "add_1") + FPID.__init__(self, id_wid) self.mod = FPAddStage1Mod(width) self.out_z = FPNumBase(width, False) self.out_of = Overflow() self.norm_stb = Signal() - def setup(self, m, in_tot, in_z): + def setup(self, m, in_tot, in_z, in_mid): """ links module to inputs and outputs """ m.submodules.add1 = self.mod + m.submodules.add1_out_overflow = self.out_of m.d.comb += self.mod.in_z.copy(in_z) m.d.comb += self.mod.in_tot.eq(in_tot) m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) + def action(self, m): - m.submodules.add1_out_overflow = self.out_of + self.idsync(m) m.d.sync += self.out_of.copy(self.mod.out_of) m.d.sync += self.out_z.copy(self.mod.out_z) m.d.sync += self.norm_stb.eq(1) @@ -586,15 +668,22 @@ class FPNorm1ModSingle: def __init__(self, width): self.width = width - self.in_select = Signal(reset_less=True) self.out_norm = Signal(reset_less=True) self.in_z = FPNumBase(width, False) self.in_of = Overflow() - self.temp_z = FPNumBase(width, False) - self.temp_of = Overflow() self.out_z = FPNumBase(width, False) self.out_of = Overflow() + def setup(self, m, in_z, in_of, out_z): + """ links module to inputs and outputs + """ + m.submodules.normalise_1 = self + + m.d.comb += self.in_z.copy(in_z) + m.d.comb += self.in_of.copy(in_of) + + m.d.comb += out_z.copy(self.out_z) + def elaborate(self, platform): m = Module() @@ -604,8 +693,6 @@ class FPNorm1ModSingle: m.submodules.norm1_out_z = self.out_z m.submodules.norm1_out_overflow = self.out_of - m.submodules.norm1_temp_z = self.temp_z - m.submodules.norm1_temp_of = self.temp_of m.submodules.norm1_in_z = self.in_z m.submodules.norm1_in_overflow = self.in_of @@ -619,13 +706,8 @@ class FPNorm1ModSingle: msr = MultiShiftRMerge(mwid, espec) m.submodules.multishift_r = msr - # select which of temp or in z/of to use - with m.If(self.in_select): - m.d.comb += in_z.copy(self.in_z) - m.d.comb += in_of.copy(self.in_of) - with m.Else(): - m.d.comb += in_z.copy(self.temp_z) - m.d.comb += in_of.copy(self.temp_of) + m.d.comb += in_z.copy(self.in_z) + m.d.comb += in_of.copy(self.in_of) # initialise out from in (overridden below) m.d.comb += self.out_z.copy(in_z) m.d.comb += self.out_of.copy(in_of) @@ -634,7 +716,6 @@ class FPNorm1ModSingle: increase = Signal(reset_less=True) m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126) m.d.comb += increase.eq(in_z.exp_lt_n126) - m.d.comb += self.out_norm.eq(0) # loop-end condition # decrease exponent with m.If(decrease): # *sigh* not entirely obvious: count leading zeros (clz) @@ -750,14 +831,36 @@ class FPNorm1ModMulti: return m -class FPNorm1(FPState): +class FPNorm1Single(FPState, FPID): - def __init__(self, width, single_cycle=True): + def __init__(self, width, id_wid, single_cycle=True): + FPID.__init__(self, id_wid) FPState.__init__(self, "normalise_1") - if single_cycle: - self.mod = FPNorm1ModSingle(width) - else: - self.mod = FPNorm1ModMulti(width) + self.mod = FPNorm1ModSingle(width) + self.out_norm = Signal(reset_less=True) + self.out_z = FPNumBase(width) + self.out_roundz = Signal(reset_less=True) + + def setup(self, m, in_z, in_of, in_mid): + """ links module to inputs and outputs + """ + self.mod.setup(m, in_z, in_of, self.out_z) + + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) + + def action(self, m): + self.idsync(m) + m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz) + m.next = "round" + + +class FPNorm1Multi(FPState, FPID): + + def __init__(self, width, id_wid): + FPID.__init__(self, id_wid) + FPState.__init__(self, "normalise_1") + self.mod = FPNorm1ModMulti(width) self.stb = Signal(reset_less=True) self.ack = Signal(reset=0, reset_less=True) self.out_norm = Signal(reset_less=True) @@ -767,26 +870,21 @@ class FPNorm1(FPState): self.out_z = FPNumBase(width) self.out_roundz = Signal(reset_less=True) - def setup(self, m, in_z, in_of, norm_stb): + def setup(self, m, in_z, in_of, norm_stb, in_mid): """ links module to inputs and outputs """ - m.submodules.normalise_1 = self.mod - - m.d.comb += self.mod.in_z.copy(in_z) - m.d.comb += self.mod.in_of.copy(in_of) - - m.d.comb += self.mod.in_select.eq(self.in_accept) - m.d.comb += self.mod.temp_z.copy(self.temp_z) - m.d.comb += self.mod.temp_of.copy(self.temp_of) - - m.d.comb += self.out_z.copy(self.mod.out_z) - m.d.comb += self.out_norm.eq(self.mod.out_norm) + self.mod.setup(m, in_z, in_of, norm_stb, + self.in_accept, self.temp_z, self.temp_of, + self.out_z, self.out_norm) m.d.comb += self.stb.eq(norm_stb) m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state - def action(self, m): + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) + def action(self, m): + self.idsync(m) m.d.comb += self.in_accept.eq((~self.ack) & (self.stb)) m.d.sync += self.temp_of.copy(self.mod.out_of) m.d.sync += self.temp_z.copy(self.out_z) @@ -804,6 +902,51 @@ class FPNorm1(FPState): m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz) +class FPNormToPack(FPState, FPID): + + def __init__(self, width, id_wid): + FPID.__init__(self, id_wid) + FPState.__init__(self, "normalise_1") + self.width = width + + def setup(self, m, in_z, in_of, in_mid): + """ links module to inputs and outputs + """ + + # Normalisation (chained to input in_z+in_of) + nmod = FPNorm1ModSingle(self.width) + n_out_z = FPNumBase(self.width) + n_out_roundz = Signal(reset_less=True) + nmod.setup(m, in_z, in_of, n_out_z) + + # Rounding (chained to normalisation) + rmod = FPRoundMod(self.width) + r_out_z = FPNumBase(self.width) + rmod.setup(m, n_out_z, n_out_roundz) + m.d.comb += n_out_roundz.eq(nmod.out_of.roundz) + m.d.comb += r_out_z.copy(rmod.out_z) + + # Corrections (chained to rounding) + cmod = FPCorrectionsMod(self.width) + c_out_z = FPNumBase(self.width) + cmod.setup(m, r_out_z) + m.d.comb += c_out_z.copy(cmod.out_z) + + # Pack (chained to corrections) + self.pmod = FPPackMod(self.width) + self.out_z = FPNumBase(self.width) + self.pmod.setup(m, c_out_z) + + # Multiplex ID + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) + + def action(self, m): + self.idsync(m) # copies incoming ID to outgoing + m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result + m.next = "pack_put_z" + + class FPRoundMod: def __init__(self, width): @@ -811,6 +954,12 @@ class FPRoundMod: self.in_z = FPNumBase(width, False) self.out_z = FPNumBase(width, False) + def setup(self, m, in_z, roundz): + m.submodules.roundz = self + + m.d.comb += self.in_z.copy(in_z) + m.d.comb += self.in_roundz.eq(roundz) + def elaborate(self, platform): m = Module() m.d.comb += self.out_z.copy(self.in_z) @@ -821,22 +970,24 @@ class FPRoundMod: return m -class FPRound(FPState): +class FPRound(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "round") + FPID.__init__(self, id_wid) self.mod = FPRoundMod(width) self.out_z = FPNumBase(width) - def setup(self, m, in_z, roundz): + def setup(self, m, in_z, roundz, in_mid): """ links module to inputs and outputs """ - m.submodules.roundz = self.mod + self.mod.setup(m, in_z, roundz) - m.d.comb += self.mod.in_z.copy(in_z) - m.d.comb += self.mod.in_roundz.eq(roundz) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): + self.idsync(m) m.d.sync += self.out_z.copy(self.mod.out_z) m.next = "corrections" @@ -847,6 +998,12 @@ class FPCorrectionsMod: self.in_z = FPNumOut(width, False) self.out_z = FPNumOut(width, False) + def setup(self, m, in_z): + """ links module to inputs and outputs + """ + m.submodules.corrections = self + m.d.comb += self.in_z.copy(in_z) + def elaborate(self, platform): m = Module() m.submodules.corr_in_z = self.in_z @@ -857,20 +1014,23 @@ class FPCorrectionsMod: return m -class FPCorrections(FPState): +class FPCorrections(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "corrections") + FPID.__init__(self, id_wid) self.mod = FPCorrectionsMod(width) self.out_z = FPNumBase(width) - def setup(self, m, in_z): + def setup(self, m, in_z, in_mid): """ links module to inputs and outputs """ - m.submodules.corrections = self.mod - m.d.comb += self.mod.in_z.copy(in_z) + self.mod.setup(m, in_z) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): + self.idsync(m) m.d.sync += self.out_z.copy(self.mod.out_z) m.next = "pack" @@ -881,6 +1041,12 @@ class FPPackMod: self.in_z = FPNumOut(width, False) self.out_z = FPNumOut(width, False) + def setup(self, m, in_z): + """ links module to inputs and outputs + """ + m.submodules.pack = self + m.d.comb += self.in_z.copy(in_z) + def elaborate(self, platform): m = Module() m.submodules.pack_in_z = self.in_z @@ -891,57 +1057,67 @@ class FPPackMod: return m -class FPPack(FPState): +class FPPack(FPState, FPID): - def __init__(self, width): + def __init__(self, width, id_wid): FPState.__init__(self, "pack") + FPID.__init__(self, id_wid) self.mod = FPPackMod(width) self.out_z = FPNumOut(width, False) - def setup(self, m, in_z): + def setup(self, m, in_z, in_mid): """ links module to inputs and outputs """ - m.submodules.pack = self.mod - m.d.comb += self.mod.in_z.copy(in_z) + self.mod.setup(m, in_z) + if self.in_mid is not None: + m.d.comb += self.in_mid.eq(in_mid) def action(self, m): + self.idsync(m) m.d.sync += self.out_z.v.eq(self.mod.out_z.v) m.next = "pack_put_z" class FPPutZ(FPState): - def __init__(self, state, in_z, out_z): + def __init__(self, state, in_z, out_z, in_mid, out_mid): FPState.__init__(self, state) self.in_z = in_z self.out_z = out_z + self.in_mid = in_mid + self.out_mid = out_mid def action(self, m): + if self.in_mid is not None: + m.d.sync += self.out_mid.eq(self.in_mid) m.d.sync += [ self.out_z.v.eq(self.in_z.v) ] with m.If(self.out_z.stb & self.out_z.ack): m.d.sync += self.out_z.stb.eq(0) - m.next = "get_a" + m.next = "get_ops" with m.Else(): m.d.sync += self.out_z.stb.eq(1) -class FPADD(FPID): +class FPADDBaseMod(FPID): - def __init__(self, width, id_wid=None, single_cycle=False): + def __init__(self, width, id_wid=None, single_cycle=False, compact=True): """ IEEE754 FP Add * width: bit-width of IEEE754. supported: 16, 32, 64 * id_wid: an identifier that is sync-connected to the input * single_cycle: True indicates each stage to complete in 1 clock + * compact: True indicates a reduced number of stages """ FPID.__init__(self, id_wid) self.width = width self.single_cycle = single_cycle + self.compact = compact - self.in_a = FPOp(width) - self.in_b = FPOp(width) + self.in_t = Trigger() + self.in_a = Signal(width) + self.in_b = Signal(width) self.out_z = FPOp(width) self.states = [] @@ -954,19 +1130,28 @@ class FPADD(FPID): """ creates the HDL code-fragment for FPAdd """ m = Module() - m.submodules.in_a = self.in_a - m.submodules.in_b = self.in_b m.submodules.out_z = self.out_z + m.submodules.in_t = self.in_t + if self.compact: + self.get_compact_fragment(m, platform) + else: + self.get_longer_fragment(m, platform) - geta = self.add_state(FPGetOp("get_a", "get_b", - self.in_a, self.width)) - geta.setup(m, self.in_a) - a = geta.out_op + with m.FSM() as fsm: - getb = self.add_state(FPGetOp("get_b", "special_cases", - self.in_b, self.width)) - getb.setup(m, self.in_b) - b = getb.out_op + for state in self.states: + with m.State(state.state_from): + state.action(m) + + return m + + def get_longer_fragment(self, m, platform=None): + + get = self.add_state(FPGet2Op("get_ops", "special_cases", + self.in_a, self.in_b, self.width)) + get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack) + a = get.out_op1 + b = get.out_op2 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid)) sc.setup(m, a, b, self.in_mid) @@ -975,33 +1160,232 @@ class FPADD(FPID): dn.setup(m, a, b, sc.in_mid) if self.single_cycle: - alm = self.add_state(FPAddAlignSingle(self.width)) - alm.setup(m, dn.out_a, dn.out_b) + alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) else: - alm = self.add_state(FPAddAlignMulti(self.width)) - alm.setup(m, dn.out_a, dn.out_b) + alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) + + add0 = self.add_state(FPAddStage0(self.width, self.id_wid)) + add0.setup(m, alm.out_a, alm.out_b, alm.in_mid) + + add1 = self.add_state(FPAddStage1(self.width, self.id_wid)) + add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid) + + if self.single_cycle: + n1 = self.add_state(FPNorm1Single(self.width, self.id_wid)) + n1.setup(m, add1.out_z, add1.out_of, add0.in_mid) + else: + n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid)) + n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid) + + rn = self.add_state(FPRound(self.width, self.id_wid)) + rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid) - add0 = self.add_state(FPAddStage0(self.width)) - add0.setup(m, alm.out_a, alm.out_b) + cor = self.add_state(FPCorrections(self.width, self.id_wid)) + cor.setup(m, rn.out_z, rn.in_mid) - add1 = self.add_state(FPAddStage1(self.width)) - add1.setup(m, add0.out_tot, add0.out_z) + pa = self.add_state(FPPack(self.width, self.id_wid)) + pa.setup(m, cor.out_z, rn.in_mid) - n1 = self.add_state(FPNorm1(self.width)) - n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb) + ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z, + pa.in_mid, self.out_mid)) - rn = self.add_state(FPRound(self.width)) - rn.setup(m, n1.out_z, n1.out_roundz) + pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z, + pa.in_mid, self.out_mid)) - cor = self.add_state(FPCorrections(self.width)) - cor.setup(m, rn.out_z) + def get_compact_fragment(self, m, platform=None): - pa = self.add_state(FPPack(self.width)) - pa.setup(m, cor.out_z) + get = self.add_state(FPGet2Op("get_ops", "special_cases", + self.in_a, self.in_b, self.width)) + get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack) + a = get.out_op1 + b = get.out_op2 + + sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid)) + sc.setup(m, a, b, self.in_mid) + + dn = self.add_state(FPAddDeNorm(self.width, self.id_wid)) + dn.setup(m, a, b, sc.in_mid) + + if self.single_cycle: + alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) + else: + alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid)) + alm.setup(m, dn.out_a, dn.out_b, dn.in_mid) + + add0 = self.add_state(FPAddStage0(self.width, self.id_wid)) + add0.setup(m, alm.out_a, alm.out_b, alm.in_mid) + + add1 = self.add_state(FPAddStage1(self.width, self.id_wid)) + add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid) + + n1 = self.add_state(FPNormToPack(self.width, self.id_wid)) + n1.setup(m, add1.out_z, add1.out_of, add0.in_mid) + + ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z, + n1.in_mid, self.out_mid)) + + pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z, + sc.in_mid, self.out_mid)) + + +class FPADDBase(FPState, FPID): + + def __init__(self, width, id_wid=None, single_cycle=False): + """ IEEE754 FP Add + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier that is sync-connected to the input + * single_cycle: True indicates each stage to complete in 1 clock + """ + FPID.__init__(self, id_wid) + FPState.__init__(self, "fpadd") + self.width = width + self.single_cycle = single_cycle + self.mod = FPADDBaseMod(width, id_wid, single_cycle) + + self.in_t = Trigger() + self.in_a = Signal(width) + self.in_b = Signal(width) + #self.out_z = FPOp(width) + + self.z_done = Signal(reset_less=True) # connects to out_z Strobe + self.in_accept = Signal(reset_less=True) + self.add_stb = Signal(reset_less=True) + self.add_ack = Signal(reset=0, reset_less=True) + + def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid): + self.out_z = out_z + self.out_mid = out_mid + m.d.comb += [self.in_a.eq(a), + self.in_b.eq(b), + self.mod.in_a.eq(self.in_a), + self.mod.in_b.eq(self.in_b), + self.in_mid.eq(in_mid), + self.mod.in_mid.eq(self.in_mid), + self.z_done.eq(self.mod.out_z.trigger), + #self.add_stb.eq(add_stb), + self.mod.in_t.stb.eq(self.in_t.stb), + self.in_t.ack.eq(self.mod.in_t.ack), + self.out_mid.eq(self.mod.out_mid), + self.out_z.v.eq(self.mod.out_z.v), + self.out_z.stb.eq(self.mod.out_z.stb), + self.mod.out_z.ack.eq(self.out_z.ack), + ] + + m.d.sync += self.add_stb.eq(add_stb) + m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state + #m.d.sync += self.in_t.stb.eq(0) + + m.submodules.fpadd = self.mod + + def action(self, m): + + # in_accept is set on incoming strobe HIGH and ack LOW. + m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb)) + + #with m.If(self.in_t.ack): + # m.d.sync += self.in_t.stb.eq(0) + with m.If(~self.z_done): + # not done: test for accepting an incoming operand pair + with m.If(self.in_accept): + m.d.sync += [ + self.add_ack.eq(1), # acknowledge receipt... + self.in_t.stb.eq(1), # initiate add + ] + with m.Else(): + m.d.sync += [self.add_ack.eq(0), + self.in_t.stb.eq(0), + ] + with m.Else(): + # done: acknowledge, and write out id and value + m.d.sync += [self.add_ack.eq(1), + self.in_t.stb.eq(0) + ] + m.next = "get_a" + + return + + if self.in_mid is not None: + m.d.sync += self.out_mid.eq(self.mod.out_mid) + + m.d.sync += [ + self.out_z.v.eq(self.mod.out_z.v) + ] + # move to output state on detecting z ack + with m.If(self.out_z.trigger): + m.d.sync += self.out_z.stb.eq(0) + m.next = "put_z" + with m.Else(): + m.d.sync += self.out_z.stb.eq(1) + + +class FPADD(FPID): + """ FPADD: stages as follows: + + FPGetOp (a) + | + FPGetOp (b) + | + FPAddBase---> FPAddBaseMod + | | + PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ + + FPAddBase is tricky: it is both a stage and *has* stages. + Connection to FPAddBaseMod therefore requires an in stb/ack + and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp + needs to be the thing that raises the incoming stb. + """ + + def __init__(self, width, id_wid=None, single_cycle=False): + """ IEEE754 FP Add + + * width: bit-width of IEEE754. supported: 16, 32, 64 + * id_wid: an identifier that is sync-connected to the input + * single_cycle: True indicates each stage to complete in 1 clock + """ + FPID.__init__(self, id_wid) + self.width = width + self.id_wid = id_wid + self.single_cycle = single_cycle + + self.in_a = FPOp(width) + self.in_b = FPOp(width) + self.out_z = FPOp(width) + + self.states = [] + + def add_state(self, state): + self.states.append(state) + return state + + def get_fragment(self, platform=None): + """ creates the HDL code-fragment for FPAdd + """ + m = Module() + m.submodules.in_a = self.in_a + m.submodules.in_b = self.in_b + m.submodules.out_z = self.out_z + + geta = self.add_state(FPGetOp("get_a", "get_b", + self.in_a, self.width)) + geta.setup(m, self.in_a) + a = geta.out_op + + getb = self.add_state(FPGetOp("get_b", "fpadd", + self.in_b, self.width)) + getb.setup(m, self.in_b) + b = getb.out_op - ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z)) + ab = FPADDBase(self.width, self.id_wid, self.single_cycle) + ab = self.add_state(ab) + ab.setup(m, a, b, getb.out_decode, self.in_mid, + self.out_z, self.out_mid) - pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z)) + #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z, + # ab.out_mid, self.out_mid)) with m.FSM() as fsm: @@ -1013,8 +1397,18 @@ class FPADD(FPID): if __name__ == "__main__": - alu = FPADD(width=32, single_cycle=True) - main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports()) + if True: + alu = FPADD(width=32, id_wid=5, single_cycle=True) + main(alu, ports=alu.in_a.ports() + \ + alu.in_b.ports() + \ + alu.out_z.ports() + \ + [alu.in_mid, alu.out_mid]) + else: + alu = FPADDBase(width=32, id_wid=5, single_cycle=True) + main(alu, ports=[alu.in_a, alu.in_b] + \ + alu.in_t.ports() + \ + alu.out_z.ports() + \ + [alu.in_mid, alu.out_mid]) # works... but don't use, just do "python fname.py convert -t v"