From c25d3e23de63d0cab676a76a1bb3e497640cc2a6 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 9 Jul 2020 11:49:51 +0100 Subject: [PATCH] remove xer_ca from DIV pipeline (took a bit of messing about) --- src/soc/fu/div/core_stages.py | 6 +- src/soc/fu/div/input_stage.py | 3 + src/soc/fu/div/output_stage.py | 126 ++++++++++++++++++++++-- src/soc/fu/div/pipeline.py | 10 +- src/soc/fu/div/setup_stage.py | 1 - src/soc/fu/div/test/test_pipe_caller.py | 5 - src/soc/fu/mul/output_stage.py | 12 +++ src/soc/fu/mul/pipeline.py | 2 +- src/soc/fu/pipe_data.py | 7 +- 9 files changed, 147 insertions(+), 25 deletions(-) create mode 100644 src/soc/fu/mul/output_stage.py diff --git a/src/soc/fu/div/core_stages.py b/src/soc/fu/div/core_stages.py index 3bbde7db..fdbe8659 100644 --- a/src/soc/fu/div/core_stages.py +++ b/src/soc/fu/div/core_stages.py @@ -3,14 +3,14 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) from nmutil.pipemodbase import PipeModBase -from soc.fu.logical.pipe_data import LogicalInputData -from soc.fu.alu.pipe_data import ALUOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import InternalOp from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange -from soc.fu.div.pipe_data import CoreInputData, CoreInterstageData, CoreOutputData +from soc.fu.div.pipe_data import (CoreInputData, + CoreInterstageData, + CoreOutputData) from ieee754.div_rem_sqrt_rsqrt.core import (DivPipeCoreSetupStage, DivPipeCoreCalculateStage, DivPipeCoreFinalStage) diff --git a/src/soc/fu/div/input_stage.py b/src/soc/fu/div/input_stage.py index 0849aded..a9ad6652 100644 --- a/src/soc/fu/div/input_stage.py +++ b/src/soc/fu/div/input_stage.py @@ -7,6 +7,9 @@ from soc.fu.div.pipe_data import DIVInputData # simply over-ride ALUInputStage ispec / ospec class DivMulInputStage(ALUInputStage): + def __init__(self, pspec): + super().__init__(pspec) + def ispec(self): return DIVInputData(self.pspec) def ospec(self): return DIVInputData(self.pspec) diff --git a/src/soc/fu/div/output_stage.py b/src/soc/fu/div/output_stage.py index 67848dbe..9eb16f6c 100644 --- a/src/soc/fu/div/output_stage.py +++ b/src/soc/fu/div/output_stage.py @@ -1,12 +1,120 @@ -# This stage is intended to adjust the input data before sending it to -# the actual ALU. Things like handling inverting the input, xer_ca -# generation for subtraction, and handling of immediates should happen -# in the base class (CommonOutputStage.elaborate). -from soc.fu.alu.output_stage import ALUOutputStage +# This stage is the setup stage that converts the inputs +# into the values expected by DivPipeCore + +from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) +from nmutil.pipemodbase import PipeModBase +from soc.fu.logical.pipe_data import LogicalInputData from soc.fu.div.pipe_data import DivMulOutputData +from ieee754.part.partsig import PartitionedSignal +from soc.decoder.power_enums import InternalOp + +from soc.decoder.power_fields import DecodeFields +from soc.decoder.power_fieldsn import SignalBitRange +from soc.fu.div.pipe_data import CoreOutputData + + +class DivOutputStage(PipeModBase): + def __init__(self, pspec): + super().__init__(pspec, "output_stage") + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) + self.fields.create_specs() + self.quotient_neg = Signal() + self.remainder_neg = Signal() + self.quotient_64 = Signal(64) + self.remainder_64 = Signal(64) + + def ispec(self): + return CoreOutputData(self.pspec) + + def ospec(self): + return DivMulOutputData(self.pspec) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + op = self.i.ctx.op + abs_quotient = self.i.core.quotient_root + fract_width = self.pspec.core_config.fract_width + # fract width of `DivPipeCoreOutputData.remainder` + remainder_fract_width = fract_width * 3 + # fract width of `DivPipeCoreInputData.dividend` + dividend_fract_width = fract_width * 2 + rem_start = remainder_fract_width - dividend_fract_width + abs_remainder = self.i.core.remainder[rem_start:rem_start+64] + dividend_neg = self.i.dividend_neg + divisor_neg = self.i.divisor_neg + quotient_64 = self.quotient_64 + remainder_64 = self.remainder_64 + + comb += self.quotient_neg.eq(dividend_neg ^ divisor_neg) + # follows rules for truncating division + comb += self.remainder_neg.eq(dividend_neg) + + # negation of a 64-bit value produces the same lower 32-bit + # result as negation of just the lower 32-bits, so we don't + # need to do anything special before negating + comb += [ + quotient_64.eq(Mux(self.quotient_neg, + -abs_quotient, abs_quotient)), + remainder_64.eq(Mux(self.remainder_neg, + -abs_remainder, abs_remainder)) + ] + + xer_ov = self.o.xer_ov.data + + def calc_overflow(dive_abs_overflow, sign_bit_mask): + nonlocal comb + overflow = dive_abs_overflow | self.i.div_by_zero + with m.If(op.is_signed): + comb += xer_ov.eq(overflow + | (abs_quotient > sign_bit_mask) + | ((abs_quotient == sign_bit_mask) + & ~self.quotient_neg)) + with m.Else(): + comb += xer_ov.eq(overflow) + + with m.If(op.is_32bit): + calc_overflow(self.i.dive_abs_ov32, 0x80000000) + with m.Else(): + calc_overflow(self.i.dive_abs_ov64, 0x8000000000000000) + + ########################## + # main switch for DIV + + o = self.o.o.data + + with m.Switch(op.insn_type): + with m.Case(InternalOp.OP_DIVE): + with m.If(op.is_32bit): + with m.If(op.is_signed): + # matches POWER9's divweo behavior + comb += o.eq(quotient_64[0:32].as_unsigned()) + with m.Else(): + comb += o.eq(quotient_64[0:32].as_unsigned()) + with m.Else(): + comb += o.eq(quotient_64) + with m.Case(InternalOp.OP_DIV): + with m.If(op.is_32bit): + with m.If(op.is_signed): + # matches POWER9's divwo behavior + comb += o.eq(quotient_64[0:32].as_unsigned()) + with m.Else(): + comb += o.eq(quotient_64[0:32].as_unsigned()) + with m.Else(): + comb += o.eq(quotient_64) + with m.Case(InternalOp.OP_MOD): + with m.If(op.is_32bit): + with m.If(op.is_signed): + # matches POWER9's modsw behavior + comb += o.eq(remainder_64[0:32].as_signed()) + with m.Else(): + comb += o.eq(remainder_64[0:32].as_unsigned()) + with m.Else(): + comb += o.eq(remainder_64) + + ###### sticky overflow and context, both pass-through ##### -# simply over-ride ALUOutputStage ispec / ospec -class DivMulOutputStage(ALUOutputStage): - def ispec(self): return DivMulOutputData(self.pspec) - def ospec(self): return DivMulOutputData(self.pspec) + comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.ctx.eq(self.i.ctx) + return m diff --git a/src/soc/fu/div/pipeline.py b/src/soc/fu/div/pipeline.py index a7355dd5..d72083e5 100644 --- a/src/soc/fu/div/pipeline.py +++ b/src/soc/fu/div/pipeline.py @@ -1,16 +1,16 @@ from nmutil.singlepipe import ControlBase from nmutil.pipemodbase import PipeModBaseChain -from soc.fu.alu.input_stage import ALUInputStage -from soc.fu.alu.output_stage import ALUOutputStage +from soc.fu.mul.output_stage import DivMulOutputStage +from soc.fu.div.input_stage import DivMulInputStage +from soc.fu.div.output_stage import DivOutputStage from soc.fu.div.setup_stage import DivSetupStage from soc.fu.div.core_stages import (DivCoreSetupStage, DivCoreCalculateStage, DivCoreFinalStage) -from soc.fu.div.output_stage import DivOutputStage class DivStagesStart(PipeModBaseChain): def get_chain(self): - alu_input = ALUInputStage(self.pspec) + alu_input = DivMulInputStage(self.pspec) div_setup = DivSetupStage(self.pspec) core_setup = DivCoreSetupStage(self.pspec) return [alu_input, div_setup, core_setup] @@ -33,7 +33,7 @@ class DivStagesEnd(PipeModBaseChain): def get_chain(self): core_final = DivCoreFinalStage(self.pspec) div_out = DivOutputStage(self.pspec) - alu_out = ALUOutputStage(self.pspec) + alu_out = DivMulOutputStage(self.pspec) return [core_final, div_out, alu_out] diff --git a/src/soc/fu/div/setup_stage.py b/src/soc/fu/div/setup_stage.py index 9b0455be..25daa201 100644 --- a/src/soc/fu/div/setup_stage.py +++ b/src/soc/fu/div/setup_stage.py @@ -4,7 +4,6 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) from nmutil.pipemodbase import PipeModBase from soc.fu.div.pipe_data import DIVInputData -from soc.fu.alu.pipe_data import ALUOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import InternalOp diff --git a/src/soc/fu/div/test/test_pipe_caller.py b/src/soc/fu/div/test/test_pipe_caller.py index 8ae19d5a..fb6a4025 100644 --- a/src/soc/fu/div/test/test_pipe_caller.py +++ b/src/soc/fu/div/test/test_pipe_caller.py @@ -25,7 +25,6 @@ def get_cu_inputs(dec2, sim): yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB - yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so print ("alu get_cu_inputs", res) @@ -43,7 +42,6 @@ def set_alu_inputs(alu, dec2, sim): yield from ALUHelpers.set_int_ra(alu, dec2, inp) yield from ALUHelpers.set_int_rb(alu, dec2, inp) - yield from ALUHelpers.set_xer_ca(alu, dec2, inp) yield from ALUHelpers.set_xer_so(alu, dec2, inp) @@ -190,19 +188,16 @@ class TestRunner(FHDLTestCase): yield from ALUHelpers.get_cr_a(res, alu, dec2) yield from ALUHelpers.get_xer_ov(res, alu, dec2) - yield from ALUHelpers.get_xer_ca(res, alu, dec2) yield from ALUHelpers.get_int_o(res, alu, dec2) yield from ALUHelpers.get_xer_so(res, alu, dec2) yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2) yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2) yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2) - yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2) yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2) ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code)) ALUHelpers.check_xer_ov(self, res, sim_o, code) - ALUHelpers.check_xer_ca(self, res, sim_o, code) ALUHelpers.check_int_o(self, res, sim_o, code) ALUHelpers.check_xer_so(self, res, sim_o, code) diff --git a/src/soc/fu/mul/output_stage.py b/src/soc/fu/mul/output_stage.py new file mode 100644 index 00000000..67848dbe --- /dev/null +++ b/src/soc/fu/mul/output_stage.py @@ -0,0 +1,12 @@ +# This stage is intended to adjust the input data before sending it to +# the actual ALU. Things like handling inverting the input, xer_ca +# generation for subtraction, and handling of immediates should happen +# in the base class (CommonOutputStage.elaborate). +from soc.fu.alu.output_stage import ALUOutputStage +from soc.fu.div.pipe_data import DivMulOutputData + +# simply over-ride ALUOutputStage ispec / ospec +class DivMulOutputStage(ALUOutputStage): + def ispec(self): return DivMulOutputData(self.pspec) + def ospec(self): return DivMulOutputData(self.pspec) + diff --git a/src/soc/fu/mul/pipeline.py b/src/soc/fu/mul/pipeline.py index 3816435d..f5a0f069 100644 --- a/src/soc/fu/mul/pipeline.py +++ b/src/soc/fu/mul/pipeline.py @@ -1,7 +1,7 @@ from nmutil.singlepipe import ControlBase from nmutil.pipemodbase import PipeModBaseChain from soc.fu.div.input_stage import DivMulInputStage -from soc.fu.div.output_stage import DivMulOutputStage +from soc.fu.mul.output_stage import DivMulOutputStage from soc.fu.mul.pre_stage import MulMainStage1 from soc.fu.mul.main_stage import MulMainStage2 from soc.fu.mul.post_stage import MulMainStage3 diff --git a/src/soc/fu/pipe_data.py b/src/soc/fu/pipe_data.py index 4201d400..7d6ac539 100644 --- a/src/soc/fu/pipe_data.py +++ b/src/soc/fu/pipe_data.py @@ -26,8 +26,13 @@ class IntegerData: def eq(self, i): eqs = [self.ctx.eq(i.ctx)] + assert len(self.data) == len(i.data), \ + "length of %s mismatch against %s: %s %s" % \ + (repr(self), repr(i), repr(self.data), repr(i.data)) for j in range(len(self.data)): - assert type(self.data[j]) == type(i.data[j]) + assert type(self.data[j]) == type(i.data[j]), \ + "type mismatch in IntegerData %s %s" % \ + (repr(self.data[j]), repr(i.data[j])) eqs.append(self.data[j].eq(i.data[j])) return eqs -- 2.30.2