from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
from nmutil.pipemodbase import PipeModBase
-from soc.fu.logical.pipe_data import LogicalInputData
-from soc.fu.alu.pipe_data import ALUOutputData
from ieee754.part.partsig import PartitionedSignal
from soc.decoder.power_enums import InternalOp
from soc.decoder.power_fields import DecodeFields
from soc.decoder.power_fieldsn import SignalBitRange
-from soc.fu.div.pipe_data import CoreInputData, CoreInterstageData, CoreOutputData
+from soc.fu.div.pipe_data import (CoreInputData,
+ CoreInterstageData,
+ CoreOutputData)
from ieee754.div_rem_sqrt_rsqrt.core import (DivPipeCoreSetupStage,
DivPipeCoreCalculateStage,
DivPipeCoreFinalStage)
# simply over-ride ALUInputStage ispec / ospec
class DivMulInputStage(ALUInputStage):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+
def ispec(self): return DIVInputData(self.pspec)
def ospec(self): return DIVInputData(self.pspec)
-# This stage is intended to adjust the input data before sending it to
-# the actual ALU. Things like handling inverting the input, xer_ca
-# generation for subtraction, and handling of immediates should happen
-# in the base class (CommonOutputStage.elaborate).
-from soc.fu.alu.output_stage import ALUOutputStage
+# This stage is the setup stage that converts the inputs
+# into the values expected by DivPipeCore
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.logical.pipe_data import LogicalInputData
from soc.fu.div.pipe_data import DivMulOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+from soc.fu.div.pipe_data import CoreOutputData
+
+
+class DivOutputStage(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "output_stage")
+ self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+ self.fields.create_specs()
+ self.quotient_neg = Signal()
+ self.remainder_neg = Signal()
+ self.quotient_64 = Signal(64)
+ self.remainder_64 = Signal(64)
+
+ def ispec(self):
+ return CoreOutputData(self.pspec)
+
+ def ospec(self):
+ return DivMulOutputData(self.pspec)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ op = self.i.ctx.op
+ abs_quotient = self.i.core.quotient_root
+ fract_width = self.pspec.core_config.fract_width
+ # fract width of `DivPipeCoreOutputData.remainder`
+ remainder_fract_width = fract_width * 3
+ # fract width of `DivPipeCoreInputData.dividend`
+ dividend_fract_width = fract_width * 2
+ rem_start = remainder_fract_width - dividend_fract_width
+ abs_remainder = self.i.core.remainder[rem_start:rem_start+64]
+ dividend_neg = self.i.dividend_neg
+ divisor_neg = self.i.divisor_neg
+ quotient_64 = self.quotient_64
+ remainder_64 = self.remainder_64
+
+ comb += self.quotient_neg.eq(dividend_neg ^ divisor_neg)
+ # follows rules for truncating division
+ comb += self.remainder_neg.eq(dividend_neg)
+
+ # negation of a 64-bit value produces the same lower 32-bit
+ # result as negation of just the lower 32-bits, so we don't
+ # need to do anything special before negating
+ comb += [
+ quotient_64.eq(Mux(self.quotient_neg,
+ -abs_quotient, abs_quotient)),
+ remainder_64.eq(Mux(self.remainder_neg,
+ -abs_remainder, abs_remainder))
+ ]
+
+ xer_ov = self.o.xer_ov.data
+
+ def calc_overflow(dive_abs_overflow, sign_bit_mask):
+ nonlocal comb
+ overflow = dive_abs_overflow | self.i.div_by_zero
+ with m.If(op.is_signed):
+ comb += xer_ov.eq(overflow
+ | (abs_quotient > sign_bit_mask)
+ | ((abs_quotient == sign_bit_mask)
+ & ~self.quotient_neg))
+ with m.Else():
+ comb += xer_ov.eq(overflow)
+
+ with m.If(op.is_32bit):
+ calc_overflow(self.i.dive_abs_ov32, 0x80000000)
+ with m.Else():
+ calc_overflow(self.i.dive_abs_ov64, 0x8000000000000000)
+
+ ##########################
+ # main switch for DIV
+
+ o = self.o.o.data
+
+ with m.Switch(op.insn_type):
+ with m.Case(InternalOp.OP_DIVE):
+ with m.If(op.is_32bit):
+ with m.If(op.is_signed):
+ # matches POWER9's divweo behavior
+ comb += o.eq(quotient_64[0:32].as_unsigned())
+ with m.Else():
+ comb += o.eq(quotient_64[0:32].as_unsigned())
+ with m.Else():
+ comb += o.eq(quotient_64)
+ with m.Case(InternalOp.OP_DIV):
+ with m.If(op.is_32bit):
+ with m.If(op.is_signed):
+ # matches POWER9's divwo behavior
+ comb += o.eq(quotient_64[0:32].as_unsigned())
+ with m.Else():
+ comb += o.eq(quotient_64[0:32].as_unsigned())
+ with m.Else():
+ comb += o.eq(quotient_64)
+ with m.Case(InternalOp.OP_MOD):
+ with m.If(op.is_32bit):
+ with m.If(op.is_signed):
+ # matches POWER9's modsw behavior
+ comb += o.eq(remainder_64[0:32].as_signed())
+ with m.Else():
+ comb += o.eq(remainder_64[0:32].as_unsigned())
+ with m.Else():
+ comb += o.eq(remainder_64)
+
+ ###### sticky overflow and context, both pass-through #####
-# simply over-ride ALUOutputStage ispec / ospec
-class DivMulOutputStage(ALUOutputStage):
- def ispec(self): return DivMulOutputData(self.pspec)
- def ospec(self): return DivMulOutputData(self.pspec)
+ comb += self.o.xer_so.data.eq(self.i.xer_so)
+ comb += self.o.ctx.eq(self.i.ctx)
+ return m
from nmutil.singlepipe import ControlBase
from nmutil.pipemodbase import PipeModBaseChain
-from soc.fu.alu.input_stage import ALUInputStage
-from soc.fu.alu.output_stage import ALUOutputStage
+from soc.fu.mul.output_stage import DivMulOutputStage
+from soc.fu.div.input_stage import DivMulInputStage
+from soc.fu.div.output_stage import DivOutputStage
from soc.fu.div.setup_stage import DivSetupStage
from soc.fu.div.core_stages import (DivCoreSetupStage, DivCoreCalculateStage,
DivCoreFinalStage)
-from soc.fu.div.output_stage import DivOutputStage
class DivStagesStart(PipeModBaseChain):
def get_chain(self):
- alu_input = ALUInputStage(self.pspec)
+ alu_input = DivMulInputStage(self.pspec)
div_setup = DivSetupStage(self.pspec)
core_setup = DivCoreSetupStage(self.pspec)
return [alu_input, div_setup, core_setup]
def get_chain(self):
core_final = DivCoreFinalStage(self.pspec)
div_out = DivOutputStage(self.pspec)
- alu_out = ALUOutputStage(self.pspec)
+ alu_out = DivMulOutputStage(self.pspec)
return [core_final, div_out, alu_out]
from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
from nmutil.pipemodbase import PipeModBase
from soc.fu.div.pipe_data import DIVInputData
-from soc.fu.alu.pipe_data import ALUOutputData
from ieee754.part.partsig import PartitionedSignal
from soc.decoder.power_enums import InternalOp
yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
- yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
print ("alu get_cu_inputs", res)
yield from ALUHelpers.set_int_ra(alu, dec2, inp)
yield from ALUHelpers.set_int_rb(alu, dec2, inp)
- yield from ALUHelpers.set_xer_ca(alu, dec2, inp)
yield from ALUHelpers.set_xer_so(alu, dec2, inp)
yield from ALUHelpers.get_cr_a(res, alu, dec2)
yield from ALUHelpers.get_xer_ov(res, alu, dec2)
- yield from ALUHelpers.get_xer_ca(res, alu, dec2)
yield from ALUHelpers.get_int_o(res, alu, dec2)
yield from ALUHelpers.get_xer_so(res, alu, dec2)
yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
- yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2)
ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))
ALUHelpers.check_xer_ov(self, res, sim_o, code)
- ALUHelpers.check_xer_ca(self, res, sim_o, code)
ALUHelpers.check_int_o(self, res, sim_o, code)
ALUHelpers.check_xer_so(self, res, sim_o, code)
--- /dev/null
+# This stage is intended to adjust the input data before sending it to
+# the actual ALU. Things like handling inverting the input, xer_ca
+# generation for subtraction, and handling of immediates should happen
+# in the base class (CommonOutputStage.elaborate).
+from soc.fu.alu.output_stage import ALUOutputStage
+from soc.fu.div.pipe_data import DivMulOutputData
+
+# simply over-ride ALUOutputStage ispec / ospec
+class DivMulOutputStage(ALUOutputStage):
+ def ispec(self): return DivMulOutputData(self.pspec)
+ def ospec(self): return DivMulOutputData(self.pspec)
+
from nmutil.singlepipe import ControlBase
from nmutil.pipemodbase import PipeModBaseChain
from soc.fu.div.input_stage import DivMulInputStage
-from soc.fu.div.output_stage import DivMulOutputStage
+from soc.fu.mul.output_stage import DivMulOutputStage
from soc.fu.mul.pre_stage import MulMainStage1
from soc.fu.mul.main_stage import MulMainStage2
from soc.fu.mul.post_stage import MulMainStage3
def eq(self, i):
eqs = [self.ctx.eq(i.ctx)]
+ assert len(self.data) == len(i.data), \
+ "length of %s mismatch against %s: %s %s" % \
+ (repr(self), repr(i), repr(self.data), repr(i.data))
for j in range(len(self.data)):
- assert type(self.data[j]) == type(i.data[j])
+ assert type(self.data[j]) == type(i.data[j]), \
+ "type mismatch in IntegerData %s %s" % \
+ (repr(self.data[j]), repr(i.data[j]))
eqs.append(self.data[j].eq(i.data[j]))
return eqs