From 65e9d4ede5860dfea323b709d1372fb23c75c3f9 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 4 Jul 2019 09:34:03 +0100 Subject: [PATCH] add fcvt first version --- src/ieee754/fcvt/pipeline.py | 197 ++++++++++++++++++++++++ src/ieee754/fcvt/test/test_fcvt_pipe.py | 22 +++ src/ieee754/fpcommon/test/fpmux.py | 31 ++-- 3 files changed, 240 insertions(+), 10 deletions(-) create mode 100644 src/ieee754/fcvt/pipeline.py create mode 100644 src/ieee754/fcvt/test/test_fcvt_pipe.py diff --git a/src/ieee754/fcvt/pipeline.py b/src/ieee754/fcvt/pipeline.py new file mode 100644 index 00000000..35b92db9 --- /dev/null +++ b/src/ieee754/fcvt/pipeline.py @@ -0,0 +1,197 @@ +# IEEE Floating Point Adder (Single Precision) +# Copyright (C) Jonathan P Dawson 2013 +# 2013-12-12 + +from nmigen import Module +from nmigen.cli import main, verilog + +from nmutil.singlepipe import ControlBase +from nmutil.concurrentunit import ReservationStations, num_bits + +from ieee754.fpcommon.getop import FPADDBaseData +from ieee754.fpcommon.denorm import FPSCData +from ieee754.fpcommon.pack import FPPackData +from ieee754.fpcommon.normtopack import FPNormToPack + + +from nmigen import Module, Signal, Elaboratable +from math import log + +from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord +from ieee754.fpcommon.fpbase import FPState, FPNumBase +from ieee754.fpcommon.getop import FPPipeContext + +from nmigen import Module, Signal, Cat, Const, Elaboratable + +from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord +from nmutil.singlepipe import SimpleHandshake, StageChain + +from ieee754.fpcommon.fpbase import FPState, FPID +from ieee754.fpcommon.getop import FPADDBaseData + + +class FPCVTSpecialCasesMod(Elaboratable): + """ special cases: NaNs, infs, zeros, denormalised + see "Special Operations" + https://steve.hollasch.net/cgindex/coding/ieeefloat.html + """ + + def __init__(self, in_width, out_width, pspec): + self.in_width = in_width + self.out_width = out_width + self.pspec = pspec + self.i = self.ispec() + self.o = self.ospec() + + def ispec(self): + return FPADDBaseData(self.in_width, self.pspec) + + def ospec(self): + return FPAddStage1Data(self.in_width, self.pspec) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + m.submodules.specialcases = self + m.d.comb += self.i.eq(i) + + def process(self, i): + return self.o + + def elaborate(self, platform): + m = Module() + + #m.submodules.sc_out_z = self.o.z + + # decode: XXX really should move to separate stage + a1 = FPNumBaseRecord(self.width, False) + m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1) + m.d.comb += [a1.v.eq(self.i.a), + self.o.a.eq(a1), + ] + + # intermediaries + exp_sub_n126 = Signal((a1.e_width, True), reset_less=True) + exp_gt127 = Signal(reset_less=True) + m.d.comb += exp_sub_n126.eq(a1.e - z1.fp.N126) + m.d.comb += exp_gt127.eq(a1.e > z1.fp.P127) + + # if a zero, return zero (signed) + with m.If(a1.exp_n127): + m.d.comb += self.o.z.zero(a1.s) + + # if a range within z min range (-126) + with m.Elif(exp_sub_n126 < 0): + m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw:]) + m.d.comb += self.o.of.guard.eq(a1.m[-self.o.z.rmw-1]) + m.d.comb += self.o.of.round.eq(a1.m[-self.o.z.rmw-2]) + m.d.comb += self.o.of.sticky.eq(a1.m[-self.o.z.rmw-2:] != 0) + + # if a is inf return inf + with m.Elif(a1.is_inf): + m.d.comb += self.o.z.inf(a1.s) + + # if a is NaN return NaN + with m.Elif(a1.is_nan): + m.d.comb += self.o.z.nan(a1.s) + + # if a mantissa greater than 127, return inf + with m.Elif(exp_gt127): + m.d.comb += self.o.z.inf(a1.s) + + # ok after all that, anything else should fit fine (whew) + with m.Else(): + m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw:]) + + # copy the context (muxid, operator) + m.d.comb += self.o.ctx.eq(self.i.ctx) + + return m + + +class FPCVTSpecialCases(FPState): + """ special cases: NaNs, infs, zeros, denormalised + """ + + def __init__(self, width, id_wid): + FPState.__init__(self, "special_cases") + self.mod = FPCVTSpecialCasesMod(width) + self.out_z = self.mod.ospec() + self.out_do_z = Signal(reset_less=True) + + def setup(self, m, i): + """ links module to inputs and outputs + """ + self.mod.setup(m, i, self.out_do_z) + m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output + m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context) + + def action(self, m): + self.idsync(m) + with m.If(self.out_do_z): + m.next = "put_z" + with m.Else(): + m.next = "denormalise" + + +class FPCVTSpecialCasesDeNorm(FPState, SimpleHandshake): + """ special cases: NaNs, infs, zeros, denormalised + """ + + def __init__(self, width, pspec): + FPState.__init__(self, "special_cases") + self.width = width + self.pspec = pspec + sc = FPCVTSpecialCasesMod(self.width, self.pspec) + SimpleHandshake.__init__(self, sc) + self.out = self.ospec() + + +class FPCVTBasePipe(ControlBase): + def __init__(self, in_width, out_width, in_pspec, out_pspec): + ControlBase.__init__(self) + self.pipe1 = FPCVTSpecialCasesDeNorm(in_width, out_width, in_pspec) + self.pipe2 = FPNormToPack(out_width, out_pspec) + + self._eqs = self.connect([self.pipe1, self.pipe2]) + + def elaborate(self, platform): + m = ControlBase.elaborate(self, platform) + m.submodules.scnorm = self.pipe1 + m.submodules.normpack = self.pipe2 + m.d.comb += self._eqs + return m + + +class FPCVTMuxInOut(ReservationStations): + """ Reservation-Station version of FPCVT pipeline. + + * fan-in on inputs (an array of FPADDBaseData: a,b,mid) + * 2-stage multiplier pipeline + * fan-out on outputs (an array of FPPackData: z,mid) + + Fan-in and Fan-out are combinatorial. + """ + def __init__(self, in_width, out_width, num_rows, op_wid=0): + self.in_width = in_width + self.out_width = out_width + self.op_wid = op_wid + self.id_wid = num_bits(in_width) + self.out_id_wid = num_bits(out_width) + + self.in_pspec = {} + self.in_pspec['id_wid'] = self.id_wid + self.in_pspec['op_wid'] = self.op_wid + + self.out_pspec = {} + self.out_pspec['id_wid'] = self.out_id_wid + self.out_pspec['op_wid'] = self.op_wid + + self.alu = FPCVTBasePipe(width, self.in_pspec, self.out_pspec) + ReservationStations.__init__(self, num_rows) + + def i_specfn(self): + return FPADDBaseData(self.in_width, self.in_pspec) + + def o_specfn(self): + return FPPackData(self.out_width, self.out_pspec) diff --git a/src/ieee754/fcvt/test/test_fcvt_pipe.py b/src/ieee754/fcvt/test/test_fcvt_pipe.py new file mode 100644 index 00000000..667c2136 --- /dev/null +++ b/src/ieee754/fcvt/test/test_fcvt_pipe.py @@ -0,0 +1,22 @@ +""" test of FPMULMuxInOut +""" + +from ieee754.fcvt.pipeline import (FPMULMuxInOut,) +from ieee754.fpcommon.test.fpmux import runfp + +from sfpy import Float64, Float32, Float16 + +def fcvt_32_16(x): + return Float16(x) + +def test_pipe_fp32_16(): + dut = FPMULMuxInOut(32, 16, 4) + runfp(dut, 32, "test_fcvt_pipe_fp32_16", Float32, fcvt_32_16) + +def test_pipe_fp64(): + dut = FPMULMuxInOut(64, 4) + runfp(dut, 64, "test_fcvt_pipe_fp64", Float64, mul) + +if __name__ == '__main__': + test_pipe_fp32() + diff --git a/src/ieee754/fpcommon/test/fpmux.py b/src/ieee754/fpcommon/test/fpmux.py index 239c822d..f32cb65c 100644 --- a/src/ieee754/fpcommon/test/fpmux.py +++ b/src/ieee754/fpcommon/test/fpmux.py @@ -11,10 +11,11 @@ from nmigen.cli import verilog, rtlil class InputTest: - def __init__(self, dut, width, fpkls, fpop): + def __init__(self, dut, width, fpkls, fpop, single_op=False): self.dut = dut self.fpkls = fpkls self.fpop = fpop + self.single_op = single_op self.di = {} self.do = {} self.tlen = 10 @@ -35,8 +36,12 @@ class InputTest: #op2 = 0xb4658540 # expect 0x8016147c #op1 = 0x40900000 #op2 = 0x40200000 - res = self.fpop(self.fpkls(op1), self.fpkls(op2)) - self.di[muxid][i] = (op1, op2) + if self.single_op: + res = self.fpop(self.fpkls(op1)) + self.di[muxid][i] = (op1, op2) + else: + res = self.fpop(self.fpkls(op1), self.fpkls(op2)) + self.di[muxid][i] = (op1, op2) self.do[muxid].append(res.bits) def send(self, muxid): @@ -53,11 +58,17 @@ class InputTest: yield o_p_ready = yield rs.ready_o - fop1 = self.fpkls(op1) - fop2 = self.fpkls(op2) - res = self.fpop(fop1, fop2) - print ("send", muxid, i, hex(op1), hex(op2), hex(res.bits), - fop1, fop2, res) + if self.single_op: + fop1 = self.fpkls(op1) + res = self.fpop(fop1) + print ("send", muxid, i, hex(op1), hex(res.bits), + fop1, res) + else: + fop1 = self.fpkls(op1) + fop2 = self.fpkls(op2) + res = self.fpop(fop1, fop2) + print ("send", muxid, i, hex(op1), hex(op2), hex(res.bits), + fop1, fop2, res) yield rs.valid_i.eq(0) # wait random period of time before queueing another value @@ -114,12 +125,12 @@ class InputTest: print ("recv ended", muxid) -def runfp(dut, width, name, fpkls, fpop): +def runfp(dut, width, name, fpkls, fpop, single_op=False): vl = rtlil.convert(dut, ports=dut.ports()) with open("%s.il" % name, "w") as f: f.write(vl) - test = InputTest(dut, width, fpkls, fpop) + test = InputTest(dut, width, fpkls, fpop, single_op) run_simulation(dut, [test.rcv(1), test.rcv(0), test.rcv(3), test.rcv(2), test.send(0), test.send(1), -- 2.30.2