__pycache__
*.v
*.il
+*.il.*
.eggs
*.egg-info
*.gtkw
self.bit_width = bit_width
self.fract_width = fract_width
self.log2_radix = log2_radix
+ print(f"{self}: n_stages={self.n_stages}")
def __repr__(self):
""" Get repr. """
log2_radix = min(log2_radix, current_shift)
assert log2_radix > 0
current_shift -= log2_radix
+ print(f"DivPipeCoreCalc: stage {self.stage_index}"
+ + f" of {self.core_config.n_stages} handling "
+ + f"bits [{current_shift}, {current_shift+log2_radix})"
+ + f" of {self.core_config.bit_width}")
radix = 1 << log2_radix
# trials within this radix range. carried out by Trial module,
DivPipeCoreSetupStage,
DivPipeCoreCalculateStage,
DivPipeCoreFinalStage,
- )
+ )
from ieee754.fpcommon.getop import FPPipeContext
from ieee754.fpcommon.fpbase import FPFormat, FPNumBaseRecord
""" Create a ``DivPipeBaseData`` instance. """
self.pspec = pspec
width = pspec.width
- self.z = FPNumBaseRecord(width, False) # s and e carried: m ignored
+ # s and e carried: m ignored
+ self.z = FPNumBaseRecord(width, False, name="z")
self.out_do_z = Signal(reset_less=True)
self.oz = Signal(width, reset_less=True)
def eq(self, rhs):
""" Assign member signals. """
return DivPipeCoreInputData.eq(self, rhs) + \
- DivPipeBaseData.eq(self, rhs)
+ DivPipeBaseData.eq(self, rhs)
class DivPipeInterstageData(DivPipeCoreInterstageData, DivPipeBaseData):
""" Assign member signals. """
#print (self, rhs)
return DivPipeCoreInterstageData.eq(self, rhs) + \
- DivPipeBaseData.eq(self, rhs)
+ DivPipeBaseData.eq(self, rhs)
class DivPipeOutputData(DivPipeCoreOutputData, DivPipeBaseData):
def eq(self, rhs):
""" Assign member signals. """
return DivPipeCoreOutputData.eq(self, rhs) + \
- DivPipeBaseData.eq(self, rhs)
+ DivPipeBaseData.eq(self, rhs)
class DivPipeBaseStage:
class DivPipeSetupStage(DivPipeBaseStage, DivPipeCoreSetupStage):
+ """ FIXME: add docs. """
def __init__(self, pspec):
self.pspec = pspec
class DivPipeCalculateStage(DivPipeBaseStage, DivPipeCoreCalculateStage):
+ """ FIXME: add docs. """
def __init__(self, pspec, stage_index):
self.pspec = pspec
- DivPipeCoreCalculateStage.__init__(self, pspec.core_config, stage_index)
+ DivPipeCoreCalculateStage.__init__(
+ self, pspec.core_config, stage_index)
def ispec(self):
""" Get the input spec for this pipeline stage."""
class DivPipeFinalStage(DivPipeBaseStage, DivPipeCoreFinalStage):
+ """ FIXME: add docs. """
def __init__(self, pspec):
self.pspec = pspec
m = DivPipeCoreFinalStage.elaborate(self, platform)
self._elaborate(m, platform)
return m
-
# special-cases module(s) and will propagate, along with its
# "bypass" signal out_do_z, through the pipeline, *disabling*
# all processing of all subsequent stages.
- self.a = FPNumBaseRecord(width, m_extra) # operand a
- self.b = FPNumBaseRecord(width, m_extra) # operand b
- self.z = FPNumBaseRecord(width, False) # denormed result
+ self.a = FPNumBaseRecord(width, m_extra, name="a") # operand a
+ self.b = FPNumBaseRecord(width, m_extra, name="b") # operand b
+ self.z = FPNumBaseRecord(width, False, name="z") # denormed result
self.oz = Signal(width, reset_less=True) # "finished" (bypass) result
self.out_do_z = Signal(reset_less=True) # "bypass" enabled
self.ctx = FPPipeContext(pspec)
def eq(self, i):
ret = [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.a.eq(i.a), self.b.eq(i.b), self.ctx.eq(i.ctx)]
+ self.a.eq(i.a), self.b.eq(i.b), self.ctx.eq(i.ctx)]
return ret
# XXX hmmm, don't like repeating identical code
m.d.comb += self.o.a.eq(self.i.a)
with m.If(in_a.exp_n127):
- m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
+ m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
with m.Else():
- m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
+ m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
m.d.comb += self.o.b.eq(self.i.b)
with m.If(in_b.exp_n127):
- m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
+ m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
with m.Else():
- m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
+ m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
m.d.comb += self.o.ctx.eq(self.i.ctx)
m.d.comb += self.o.z.eq(self.i.z)
def action(self, m):
# Denormalised Number checks
m.next = "align"
-
-
""" Check for equality. """
if not isinstance(other, FPFormat):
return NotImplemented
- return (self.e_width == other.e_width and
- self.m_width == other.m_width and
- self.has_int_bit == other.has_int_bit and
- self.has_sign == other.has_sign)
+ return (self.e_width == other.e_width
+ and self.m_width == other.m_width
+ and self.has_int_bit == other.has_int_bit
+ and self.has_sign == other.has_sign)
@staticmethod
def standard(width):
""" Floating-point Base Number Class
"""
- def __init__(self, width, m_extra=True, e_extra=False):
+ def __init__(self, width, m_extra=True, e_extra=False, name=None):
+ if name is None:
+ name = ""
+ # assert false, "missing name"
+ else:
+ name += "_"
self.width = width
m_width = {16: 11, 32: 24, 64: 53}[width] # 1 extra bit (overflow)
e_width = {16: 7, 32: 10, 64: 13}[width] # 2 extra bits (overflow)
self.e_start = self.rmw
self.e_end = self.rmw + self.e_width - 2 # for decoding
- self.v = Signal(width, reset_less=True) # Latched copy of value
- self.m = Signal(m_width, reset_less=True) # Mantissa
- self.e = Signal((e_width, True), reset_less=True) # exp+2 bits, signed
- self.s = Signal(reset_less=True) # Sign bit
+ self.v = Signal(width, reset_less=True,
+ name=name+"v") # Latched copy of value
+ self.m = Signal(m_width, reset_less=True, name=name+"m") # Mantissa
+ self.e = Signal((e_width, True),
+ reset_less=True, name=name+"e") # exp+2 bits, signed
+ self.s = Signal(reset_less=True, name=name+"s") # Sign bit
self.fp = self
self.drop_in(self)
def elaborate(self, platform):
m = Module()
- z = FPNumBaseRecord(self.pspec.width, False)
+ z = FPNumBaseRecord(self.pspec.width, False, name="z")
m.submodules.pack_in_z = in_z = FPNumBase(self.i.z)
#m.submodules.pack_out_z = out_z = FPNumOut(z)
m.d.comb += self.o.ctx.eq(self.i.ctx)
from ieee754.fpcommon.fpbase import Overflow, FPNumBaseRecord
from ieee754.fpcommon.getop import FPPipeContext
+
class FPAddStage1Data:
def __init__(self, pspec, e_extra=False):
width = pspec.width
- self.z = FPNumBaseRecord(width, False, e_extra)
+ self.z = FPNumBaseRecord(width, False, e_extra, name="z")
self.out_do_z = Signal(reset_less=True)
self.oz = Signal(width, reset_less=True)
self.of = Overflow()
def __init__(self, pspec):
width = pspec.width
self.roundz = Signal(reset_less=True, name="norm1_roundz")
- self.z = FPNumBaseRecord(width, False)
+ self.z = FPNumBaseRecord(width, False, name="z")
self.out_do_z = Signal(reset_less=True)
self.oz = Signal(width, reset_less=True)
self.ctx = FPPipeContext(pspec)
def eq(self, i):
ret = [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
- self.roundz.eq(i.roundz), self.ctx.eq(i.ctx)]
+ self.roundz.eq(i.roundz), self.ctx.eq(i.ctx)]
return ret
of.m0.eq(msr.m_out[3]), # copy of mantissa[0]
# overflow in bits 0..2: got shifted too (leave sticky)
of.guard.eq(msr.m_out[2]), # guard
- of.round_bit.eq(msr.m_out[1]), # round
+ of.round_bit.eq(msr.m_out[1]), # round
of.sticky.eq(msr.m_out[0]), # sticky
# now exponent
self.o.z.e.eq(msr.e_out),
increase = Signal(reset_less=True)
m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
m.d.comb += increase.eq(in_z.exp_lt_n126)
- m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
+ m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
# decrease exponent
with m.If(decrease):
m.d.comb += [
self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
- self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
- self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
- self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
+ self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
+ self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
+ self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
self.out_of.round_bit.eq(0), # reset round bit
self.out_of.m0.eq(in_of.guard),
]
with m.Elif(increase):
m.d.comb += [
self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
- self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
+ self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
self.out_of.guard.eq(in_z.m[0]),
self.out_of.m0.eq(in_z.m[1]),
self.out_of.round_bit.eq(in_of.guard),
self.out_z, self.out_norm)
m.d.comb += self.stb.eq(norm_stb)
- m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
+ # sets to zero when not in normalise_1 state
+ m.d.sync += self.ack.eq(0)
def action(self, m):
m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
m.next = "round"
m.d.sync += self.ack.eq(1)
m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
-
-
def __init__(self, pspec):
width = pspec.width
- self.z = FPNumBaseRecord(width, False)
+ self.z = FPNumBaseRecord(width, False, name="z")
self.ctx = FPPipeContext(pspec)
self.muxid = self.ctx.muxid
# pipeline bypass [data comes from specialcases]
if self.single_op:
fop1 = self.fpkls(op1)
res = self.fpop(fop1)
- print ("send", muxid, i, hex(op1), hex(res.bits),
- fop1, res)
+ print("send", muxid, i, hex(op1), hex(res.bits),
+ fop1, res)
else:
fop1 = self.fpkls(op1)
fop2 = self.fpkls(op2)
res = self.fpop(fop1, fop2)
- print ("send", muxid, i, hex(op1), hex(op2), hex(res.bits),
- fop1, fop2, res)
+ print("send", muxid, i, hex(op1), hex(op2), hex(res.bits),
+ fop1, fop2, res)
yield rs.valid_i.eq(0)
# wait random period of time before queueing another value
yield rs.valid_i.eq(0)
yield
- print ("send ended", muxid)
+ print("send ended", muxid)
## wait random period of time before queueing another value
#for i in range(randint(0, 3)):
out_i = 0
- print ("recv", out_muxid, hex(out_z), "expected",
- hex(self.do[muxid][out_i] ))
+ print("recv", out_muxid, hex(out_z), "expected",
+ hex(self.do[muxid][out_i]))
# see if this output has occurred already, delete it if it has
assert muxid == out_muxid, "out_muxid %d not correct %d" % \
# check if there's any more outputs
if len(self.do[muxid]) == 0:
break
- print ("recv ended", muxid)
+ print("recv ended", muxid)
def create_random(num_rows, width, single_op=False, n_vals=10):
for muxid in range(num_rows):
for i in range(n_vals):
if single_op:
- op1 = randint(0, (1<<width)-1)
+ op1 = randint(0, (1 << width)-1)
#op1 = 0x40900000
#op1 = 0x94607b66
#op1 = 0x889cd8c
#op1 = 0x3449f9a9
#op1 = 0x1ba94baa
+ #if i % 2:
+ # op1 = 0x0001
+ #else:
+ # op1 = 0x3C00
+
# FRSQRT
#op1 = 0x3686
#op1 = 0x4400
vals.append((op1,))
else:
- op1 = randint(0, (1<<width)-1)
- op2 = randint(0, (1<<width)-1)
+ op1 = randint(0, (1 << width)-1)
+ op2 = randint(0, (1 << width)-1)
+ # op1 = 0x3F800000 # 1.0f32
+ # op2 = 0x40000000 # 2.0f32
#op2 = 0x4000
#op1 = 0x3c50
#print ("repeat", i, fn, single_op, list(vals))
fmt = "test_pipe_fp%d_%s_cornercases_%d"
runfp(dut, width, fmt % (width, name, i),
- fmod, fpfn, vals=vals, single_op=single_op, opcode=opcode)
+ fmod, fpfn, vals=vals, single_op=single_op, opcode=opcode)
def runfp(dut, width, name, fpkls, fpop, single_op=False, n_vals=10,
Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
"""
-from nmigen import Module, Signal, Cat, Elaboratable, Const
+from nmigen import Module, Signal, Cat, Elaboratable, Const, Mux
from nmigen.cli import main, verilog
from ieee754.fpcommon.fpbase import (FPNumBaseRecord, Overflow)
from ieee754.fpcommon.denorm import FPSCData
from ieee754.fpcommon.getop import FPPipeContext
from ieee754.div_rem_sqrt_rsqrt.div_pipe import DivPipeInputData
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation as DPCOp
class FPDivStage0Mod(Elaboratable):
# it is PURELY the *ENTRY* point into the chain, performing
# "preparation" work.
- with m.If(~self.i.out_do_z):
- # do conversion here, of both self.i.a and self.i.b,
- # into DivPipeInputData dividend and divisor.
-
- # XXX *sigh* magic constants...
- if self.pspec.width == 16:
- if self.pspec.log2_radix == 1:
- extra = 2
- elif self.pspec.log2_radix == 3:
- extra = 2
- else:
- extra = 3
- elif self.pspec.width == 32:
- if self.pspec.log2_radix == 1:
- extra = 3
- else:
- extra = 4
- elif self.pspec.width == 64:
- if self.pspec.log2_radix == 1:
- extra = 2
- elif self.pspec.log2_radix == 3:
- extra = 2
- else:
- extra = 3
-
- # the mantissas, having been de-normalised (and containing
- # a "1" in the MSB) represent numbers in the range 0.5 to
- # 0.9999999-recurring. the min and max range of the
- # result is therefore 0.4999999 (0.5/0.99999) and 1.9999998
- # (0.99999/0.5).
+ # mantissas start in the range [1.0, 2.0)
+
+ is_div = Signal(reset_less=True)
+ need_exp_adj = Signal(reset_less=True)
+
+ # ``self.i.a.rmw`` fractional bits and 2 integer bits
+ adj_a_m_fract_width = self.i.a.rmw
+ adj_a_m = Signal(self.i.a.rmw + 2, reset_less=True)
+
+ adj_a_e = Signal((len(self.i.a.e), True), reset_less=True)
+
+ m.d.comb += [is_div.eq(self.i.ctx.op == int(DPCOp.UDivRem)),
+ need_exp_adj.eq(~is_div & self.i.a.e[0]),
+ adj_a_m.eq(self.i.a.m << need_exp_adj),
+ adj_a_e.eq(self.i.a.e - need_exp_adj)]
+
+ # adj_a_m now in the range [1.0, 4.0) for sqrt/rsqrt
+ # and [1.0, 2.0) for div
+
+ dividend_fract_width = self.pspec.core_config.fract_width * 2
+ dividend = Signal(len(self.o.dividend),
+ reset_less=True)
+ divr_rad_fract_width = self.pspec.core_config.fract_width
+ divr_rad = Signal(len(self.o.divisor_radicand),
+ reset_less=True)
+
+ a_m_fract_width = self.i.a.rmw
+ b_m_fract_width = self.i.b.rmw
+
+ m.d.comb += [
+ dividend.eq(self.i.a.m << (
+ dividend_fract_width - a_m_fract_width)),
+ divr_rad.eq(Mux(is_div,
+ self.i.b.m << (
+ divr_rad_fract_width - b_m_fract_width),
+ adj_a_m << (
+ divr_rad_fract_width - adj_a_m_fract_width))),
+ ]
+
+ m.d.comb += [
+ self.o.dividend.eq(dividend),
+ self.o.divisor_radicand.eq(divr_rad),
+ ]
+
+ # set default since it's not always set; non-zero value for debugging
+ m.d.comb += self.o.operation.eq(1)
+
+ with m.If(~self.i.out_do_z):
# DIV
- with m.If(self.i.ctx.op == 0):
- am0 = Signal(len(self.i.a.m)+1, reset_less=True)
- bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
- m.d.comb += [
- am0.eq(Cat(self.i.a.m, 0)),
- bm0.eq(Cat(self.i.b.m, 0)),
- ]
-
- # zero-extend the mantissas (room for sticky/round/guard)
- # plus the extra MSB.
- m.d.comb += [self.o.z.e.eq(self.i.a.e - self.i.b.e + 1),
+ with m.If(self.i.ctx.op == int(DPCOp.UDivRem)):
+ m.d.comb += [self.o.z.e.eq(self.i.a.e - self.i.b.e),
self.o.z.s.eq(self.i.a.s ^ self.i.b.s),
- self.o.dividend[len(self.i.a.m)+extra:].eq(am0),
- self.o.divisor_radicand.eq(bm0),
- self.o.operation.eq(Const(0)) # XXX DIV operation
- ]
+ self.o.operation.eq(int(DPCOp.UDivRem))
+ ]
# SQRT
- with m.Elif(self.i.ctx.op == 1):
- am0 = Signal(len(self.i.a.m)+3, reset_less=True)
- with m.If(self.i.a.e[0]):
- m.d.comb += am0.eq(Cat(self.i.a.m, 0)<<(extra-2))
- m.d.comb += self.o.z.e.eq(((self.i.a.e+1) >> 1)+1)
- with m.Else():
- m.d.comb += am0.eq(Cat(0, self.i.a.m)<<(extra-2))
- m.d.comb += self.o.z.e.eq((self.i.a.e >> 1)+1)
-
- m.d.comb += [self.o.z.s.eq(self.i.a.s),
- self.o.divisor_radicand.eq(am0),
- self.o.operation.eq(Const(1)) # XXX SQRT operation
- ]
+ with m.Elif(self.i.ctx.op == int(DPCOp.SqrtRem)):
+ m.d.comb += [self.o.z.e.eq(adj_a_e >> 1),
+ self.o.z.s.eq(self.i.a.s),
+ self.o.operation.eq(int(DPCOp.SqrtRem))
+ ]
# RSQRT
- with m.Elif(self.i.ctx.op == 2):
- am0 = Signal(len(self.i.a.m)+3, reset_less=True)
- with m.If(self.i.a.e[0]):
- m.d.comb += am0.eq(Cat(self.i.a.m, 0)<<(extra-3))
- m.d.comb += self.o.z.e.eq(-((self.i.a.e+1) >> 1)+4)
- with m.Else():
- m.d.comb += am0.eq(Cat(self.i.a.m)<<(extra-2))
- m.d.comb += self.o.z.e.eq(-(self.i.a.e >> 1)+4)
-
- m.d.comb += [self.o.z.s.eq(self.i.a.s),
- self.o.divisor_radicand.eq(am0),
- self.o.operation.eq(Const(2)) # XXX RSQRT operation
- ]
+ with m.Elif(self.i.ctx.op == int(DPCOp.RSqrtRem)):
+ m.d.comb += [self.o.z.e.eq(-(adj_a_e >> 1)),
+ self.o.z.s.eq(self.i.a.s),
+ self.o.operation.eq(int(DPCOp.RSqrtRem))
+ ]
# these are required and must not be touched
m.d.comb += self.o.oz.eq(self.i.oz)
self.o = self.ospec()
def ispec(self):
- return DivPipeOutputData(self.pspec) # Q/Rem in...
+ return DivPipeOutputData(self.pspec) # Q/Rem in...
def ospec(self):
# XXX REQUIRED. MUST NOT BE CHANGED. this is the format
# required for ongoing processing (normalisation, correction etc.)
- return FPAddStage1Data(self.pspec) # out to post-process
+ return FPAddStage1Data(self.pspec) # out to post-process
def process(self, i):
return self.o
def elaborate(self, platform):
m = Module()
- # copies sign and exponent and mantissa (mantissa to be overridden
- # below)
+ # copies sign and exponent and mantissa (mantissa and exponent to be
+ # overridden below)
m.d.comb += self.o.z.eq(self.i.z)
# TODO: this is "phase 3" of divide (the very end of the pipeline)
# NOTE: this phase does NOT do ACTUAL DIV processing, it ONLY
# does "conversion" *out* of the Q/REM last stage
+ # Operations and input/output mantissa ranges:
+ # fdiv:
+ # dividend [1.0, 2.0)
+ # divisor [1.0, 2.0)
+ # result (0.5, 2.0)
+ #
+ # fsqrt:
+ # radicand [1.0, 4.0)
+ # result [1.0, 2.0)
+ #
+ # frsqrt:
+ # radicand [1.0, 4.0)
+ # result (0.5, 1.0]
+
+ # following section partially normalizes result to the range [1.0, 2.0)
+
+ qr_int_part = Signal(2, reset_less=True)
+ m.d.comb += qr_int_part.eq(
+ self.i.quotient_root[self.pspec.core_config.fract_width:][:2])
+
+ need_shift = Signal(reset_less=True)
+
+ # shift left when result is less than 2.0 since result_m has 1 more
+ # fraction bit, making assigning to it the equivalent of dividing by 2.
+ # this all comes out to:
+ # if quotient_root < 2.0:
+ # # div by 2 from assign; mul by 2 from shift left
+ # result = (quotient_root * 2) / 2
+ # else:
+ # # div by 2 from assign
+ # result = quotient_root / 2
+ m.d.comb += need_shift.eq(qr_int_part < 2)
+
+ # one extra fraction bit to accommodate the result when not shifting
+ # and for effective div by 2
+ result_m_fract_width = self.pspec.core_config.fract_width + 1
+ # 1 integer bit since the numbers are less than 2.0
+ result_m = Signal(1 + result_m_fract_width, reset_less=True)
+ result_e = Signal(len(self.i.z.e), reset_less=True)
+
+ m.d.comb += [
+ result_m.eq(self.i.quotient_root << need_shift),
+ result_e.eq(self.i.z.e + (1 - need_shift))
+ ]
+
+ # result_m is now in the range [1.0, 2.0)
+
+ # FIXME: below comment block out of date
# NOTE: see FPDivStage0Mod comment. the quotient is assumed
# to be in the range 0.499999-recurring to 1.999998. normalisation
# will take care of that, *however*, it *might* be necessary to
# mantissa to compensate. this is pretty much exactly what's
# done in FPMUL, due to 0.5-0.9999 * 0.5-0.9999 also producing
# values within the range 0.5 to 1.999998
+ # FIXME: above comment block out of date
- with m.If(~self.i.out_do_z):
- mw = self.o.z.m_width
- # TODO: compensate for answer being in range 0.49999 to 1.99998
- pl = len(self.i.quotient_root) + 1
- pt = Signal(pl, reset_less=True)
- m.d.comb += pt.eq(Cat(0, self.i.quotient_root))
- p = Signal(pl-1, reset_less=True) # drop top bit
- with m.If(self.i.quotient_root[-1]):
- m.d.comb += p.eq(pt[1:])
- with m.Else():
- # get 1 bit of extra accuracy if the mantissa top bit is zero
- m.d.comb += p.eq(pt)
- m.d.comb += self.o.z.e.eq(self.i.z.e-1)
-
- # TODO: use p here instead of quotient_root, direct.
- # XXX what to do about remainder? shift that as well?
- # hmm, how about concatenate remainder and quotient...
+ with m.If(~self.i.out_do_z): # FIXME: does this need to be conditional?
m.d.comb += [
- self.o.z.m.eq(p[-mw:]),
- self.o.of.m0.eq(p[-mw]), # copy of LSB
- self.o.of.guard.eq(p[-mw-1]),
- self.o.of.round_bit.eq(p[-mw-2]),
- self.o.of.sticky.eq(p[:-mw-2].bool() | self.i.remainder.bool())
+ self.o.z.m.eq(result_m[3:]),
+ self.o.of.m0.eq(result_m[3]), # copy of LSB
+ self.o.of.guard.eq(result_m[2]),
+ self.o.of.round_bit.eq(result_m[1]),
+ self.o.of.sticky.eq(result_m[0] | self.i.remainder.bool()),
+ self.o.z.e.eq(result_e),
]
m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
"""
self.mod.setup(m, i)
- m.d.sync += self.norm_stb.eq(0) # sets to zero when not in div1 state
+ m.d.sync += self.norm_stb.eq(0) # sets to zero when not in div1 state
m.d.sync += self.out_of.eq(self.mod.out_of)
m.d.sync += self.out_z.eq(self.mod.out_z)
def action(self, m):
m.next = "normalise_1"
-
# get number of stages, set up loop.
n_stages = pspec.core_config.n_stages
max_n_comb_stages = self.pspec.n_comb_stages
- print ("n_stages", n_stages)
+ print("n_stages", n_stages)
stage_idx = 0
end = False
# needs to convert input from pipestart ospec
if stage_idx == 0:
n_comb_stages -= 1
- kls = FPDivStagesSetup # does n_comb_stages-1 calcs as well
+ kls = FPDivStagesSetup # does n_comb_stages-1 calcs as well
# needs to convert output to pipeend ispec
elif stage_idx + n_comb_stages >= n_stages:
- kls = FPDivStagesFinal # does n_comb_stages-1 calcs as well
+ kls = FPDivStagesFinal # does n_comb_stages-1 calcs as well
end = True
n_comb_stages = n_stages - stage_idx
# intermediary stage
else:
- kls = FPDivStagesIntermediate # does n_comb_stages calcs
+ kls = FPDivStagesIntermediate # does n_comb_stages calcs
# create (in each pipe) a StageChain n_comb_stages in length
pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
- stage_idx += n_comb_stages # increment so that each CalcStage
- # gets a (correct) unique index
+ stage_idx += n_comb_stages # increment so that each CalcStage
+ # gets a (correct) unique index
self.pipechain = pipechain
return m
+
def roundup(x, mod):
return x if x % mod == 0 else x + mod - x % mod
# get the standard mantissa width, store in the pspec HOWEVER...
fmt = FPFormat.standard(width)
log2_radix = 3 # tested options so far: 1, 2 and 3.
- n_comb_stages = 3 # TODO (depends on how many RS's we want)
-
- # ...5 extra bits on the mantissa: MSB is zero, MSB-1 is 1
- # then there is guard, round and sticky at the LSB end.
- # also: round up to nearest radix
- if width == 16:
- extra = 5
- elif width == 32:
- extra = 6
- elif width == 64:
- extra = 5
- fmt.m_width = roundup(fmt.m_width + extra, log2_radix)
- print ("width", fmt.m_width)
-
- cfg = DivPipeCoreConfig(fmt.m_width, fmt.fraction_width, log2_radix)
+
+ # TODO (depends on how many RS's we want)
+ #n_comb_stages = width // (2 * log2_radix) # 2 compute steps per stage
+ n_comb_stages = 2 # FIXME: switch back
+
+ fraction_width = fmt.fraction_width
+
+ # extra bits needed: guard + round
+ fraction_width += 2
+
+ # rounding width to a multiple of log2_radix is not needed,
+ # DivPipeCoreCalculateStage just internally reduces log2_radix on
+ # the last stage
+ cfg = DivPipeCoreConfig(fmt.width, fraction_width, log2_radix)
self.pspec.fpformat = fmt
- self.pspec.log2_radix = log2_radix
self.pspec.n_comb_stages = n_comb_stages
self.pspec.core_config = cfg
-# IEEE Floating Point Multiplier
+# IEEE Floating Point Multiplier
from nmigen import Module, Signal, Cat, Const, Elaboratable
from nmigen.cli import main, verilog
#m.submodules.sc_out_z = self.o.z
# decode: XXX really should move to separate stage
- a1 = FPNumBaseRecord(self.pspec.width, False)
- b1 = FPNumBaseRecord(self.pspec.width, False)
+ a1 = FPNumBaseRecord(self.pspec.width, False, name="a1")
+ b1 = FPNumBaseRecord(self.pspec.width, False, name="b1")
m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
m.submodules.sc_decode_b = b1 = FPNumDecode(None, b1)
m.d.comb += [a1.v.eq(self.i.a),
b1.v.eq(self.i.b),
self.o.a.eq(a1),
self.o.b.eq(b1)
- ]
+ ]
sabx = Signal(reset_less=True) # sign a xor b (sabx, get it?)
m.d.comb += sabx.eq(a1.s ^ b1.s)
abinf = Signal(reset_less=True)
m.d.comb += abinf.eq(a1.is_inf & b1.is_inf)
- with m.If(self.i.ctx.op == 0): # DIV
+ with m.If(self.i.ctx.op == 0): # DIV
# if a is NaN or b is NaN return NaN
with m.If(abnan):
m.d.comb += self.o.out_do_z.eq(1)
with m.Else():
m.d.comb += self.o.out_do_z.eq(0)
- with m.If(self.i.ctx.op == 1): # SQRT
+ with m.If(self.i.ctx.op == 1): # SQRT
# if a is zero return zero
with m.If(a1.is_zero):
with m.Else():
m.d.comb += self.o.out_do_z.eq(0)
- with m.If(self.i.ctx.op == 2): # RSQRT
+ with m.If(self.i.ctx.op == 2): # RSQRT
# if a is zero return NaN
with m.If(a1.is_zero):
with m.Else():
m.d.comb += self.o.out_do_z.eq(0)
-
m.d.comb += self.o.oz.eq(self.o.z.v)
m.d.comb += self.o.ctx.eq(self.i.ctx)
""" links module to inputs and outputs
"""
self.mod.setup(m, i, self.out_do_z)
- m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
+ m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
def action(self, m):
def __init__(self, pspec):
FPState.__init__(self, "special_cases")
self.pspec = pspec
- SimpleHandshake.__init__(self, self) # pipe is its own stage
+ SimpleHandshake.__init__(self, self) # pipe is its own stage
self.out = self.ospec()
def ispec(self):
- return FPADDBaseData(self.pspec) # SpecialCases ispec
+ return FPADDBaseData(self.pspec) # SpecialCases ispec
def ospec(self):
- return FPSCData(self.pspec, False) # Align ospec
+ return FPSCData(self.pspec, False) # Align ospec
def setup(self, m, i):
""" links module to inputs and outputs
#with m.Else():
m.d.sync += self.out.eq(self.process(None))
m.next = "align"
-
-
from ieee754.fpdiv.pipeline import (FPDIVMuxInOut,)
from ieee754.fpcommon.test.fpmux import runfp
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
import unittest
from sfpy import Float64, Float32, Float16
class TestDivPipe(unittest.TestCase):
def test_pipe_div_fp16(self):
dut = FPDIVMuxInOut(16, 4)
- runfp(dut, 16, "test_fpdiv_pipe_fp16", Float16, div)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.UDivRem)
+ runfp(dut, 16, "test_fpdiv_pipe_fp16", Float16, div,
+ opcode=opcode)
def test_pipe_div_fp32(self):
dut = FPDIVMuxInOut(32, 4)
- runfp(dut, 32, "test_fpdiv_pipe_fp32", Float32, div)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.UDivRem)
+ runfp(dut, 32, "test_fpdiv_pipe_fp32", Float32, div,
+ opcode=opcode)
def test_pipe_div_fp64(self):
dut = FPDIVMuxInOut(64, 4)
- runfp(dut, 64, "test_fpdiv_pipe_fp64", Float64, div)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.UDivRem)
+ runfp(dut, 64, "test_fpdiv_pipe_fp64", Float64, div,
+ opcode=opcode)
if __name__ == '__main__':
from ieee754.fpcommon.test.case_gen import run_pipe_fp
from ieee754.fpcommon.test import unit_test_half
from ieee754.fpdiv.test.div_data16 import regressions
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
import unittest
from sfpy import Float16
class TestDivPipe(unittest.TestCase):
def test_pipe_fp16(self):
dut = FPDIVMuxInOut(16, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.UDivRem)
run_pipe_fp(dut, 16, "div16", unit_test_half, Float16,
- regressions, div, 10)
+ regressions, div, 10, opcode=opcode)
if __name__ == '__main__':
from ieee754.fpcommon.test.case_gen import run_pipe_fp
from ieee754.fpcommon.test import unit_test_single
from ieee754.fpdiv.test.div_data32 import regressions
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
import unittest
from sfpy import Float32
class TestDivPipe(unittest.TestCase):
def test_pipe_fp32(self):
dut = FPDIVMuxInOut(32, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.UDivRem)
run_pipe_fp(dut, 32, "div32", unit_test_single, Float32,
- regressions, div, 10)
+ regressions, div, 10, opcode=opcode)
if __name__ == '__main__':
from ieee754.fpdiv.pipeline import (FPDIVMuxInOut,)
from ieee754.fpcommon.test.fpmux import runfp
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
import unittest
from sfpy import Float64, Float32, Float16
def rsqrt(x):
- # FIXME: switch to correct implementation (rounding once)
- return x.__class__(1.0) / x.sqrt()
+ # FIXME: switch to correct implementation
+ # needs to use exact arithmetic and rounding only once at the end
+ return x.__class__(float(Float64(1.0) / x.to_f64().sqrt()))
class TestDivPipe(unittest.TestCase):
def test_pipe_rsqrt_fp16(self):
dut = FPDIVMuxInOut(16, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.RSqrtRem)
runfp(dut, 16, "test_fprsqrt_pipe_fp16", Float16, rsqrt,
- single_op=True, opcode=2, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
def test_pipe_rsqrt_fp32(self):
dut = FPDIVMuxInOut(32, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.RSqrtRem)
runfp(dut, 32, "test_fprsqrt_pipe_fp32", Float32, rsqrt,
- single_op=True, opcode=2, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
+ @unittest.skip("rsqrt not implemented for fp64")
def test_pipe_rsqrt_fp64(self):
dut = FPDIVMuxInOut(64, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.RSqrtRem)
runfp(dut, 64, "test_fprsqrt_pipe_fp64", Float64, rsqrt,
- single_op=True, opcode=2, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
if __name__ == '__main__':
from ieee754.fpdiv.pipeline import (FPDIVMuxInOut,)
from ieee754.fpcommon.test.fpmux import runfp
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
import unittest
from sfpy import Float64, Float32, Float16
class TestDivPipe(unittest.TestCase):
def test_pipe_sqrt_fp16(self):
dut = FPDIVMuxInOut(16, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.SqrtRem)
runfp(dut, 16, "test_fpsqrt_pipe_fp16", Float16, sqrt,
- single_op=True, opcode=1, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
def test_pipe_sqrt_fp32(self):
dut = FPDIVMuxInOut(32, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.SqrtRem)
runfp(dut, 32, "test_fpsqrt_pipe_fp32", Float32, sqrt,
- single_op=True, opcode=1, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
def test_pipe_sqrt_fp64(self):
dut = FPDIVMuxInOut(64, 4)
+ # don't forget to initialize opcode; don't use magic numbers
+ opcode = int(DivPipeCoreOperation.SqrtRem)
runfp(dut, 64, "test_fpsqrt_pipe_fp64", Float64, sqrt,
- single_op=True, opcode=1, n_vals=100)
+ single_op=True, opcode=opcode, n_vals=100)
if __name__ == '__main__':
self.id_wid = id_width
self.op_wid = op_wid
self.opkls = opkls
+ self.core_config = None
+ self.fpformat = None
+ self.n_comb_stages = None