From: Luke Kenneth Casson Leighton Date: Thu, 9 Jul 2020 09:52:46 +0000 (+0100) Subject: add new stages etc. to get multiply working without xer_ca X-Git-Tag: div_pipeline~140 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=512e2d72912ba57913ab1b1297a085d5fae67181;p=soc.git add new stages etc. to get multiply working without xer_ca --- diff --git a/src/soc/fu/div/input_stage.py b/src/soc/fu/div/input_stage.py new file mode 100644 index 00000000..0849aded --- /dev/null +++ b/src/soc/fu/div/input_stage.py @@ -0,0 +1,12 @@ +# This stage is intended to adjust the input data before sending it to +# the actual ALU. Things like handling inverting the input, xer_ca +# generation for subtraction, and handling of immediates should happen +# in the base class (CommonInputStage.elaborate). +from soc.fu.alu.input_stage import ALUInputStage +from soc.fu.div.pipe_data import DIVInputData + +# simply over-ride ALUInputStage ispec / ospec +class DivMulInputStage(ALUInputStage): + def ispec(self): return DIVInputData(self.pspec) + def ospec(self): return DIVInputData(self.pspec) + diff --git a/src/soc/fu/div/output_stage.py b/src/soc/fu/div/output_stage.py index 1db0bbb2..67848dbe 100644 --- a/src/soc/fu/div/output_stage.py +++ b/src/soc/fu/div/output_stage.py @@ -1,120 +1,12 @@ -# This stage is the setup stage that converts the inputs -# into the values expected by DivPipeCore +# This stage is intended to adjust the input data before sending it to +# the actual ALU. Things like handling inverting the input, xer_ca +# generation for subtraction, and handling of immediates should happen +# in the base class (CommonOutputStage.elaborate). +from soc.fu.alu.output_stage import ALUOutputStage +from soc.fu.div.pipe_data import DivMulOutputData + +# simply over-ride ALUOutputStage ispec / ospec +class DivMulOutputStage(ALUOutputStage): + def ispec(self): return DivMulOutputData(self.pspec) + def ospec(self): return DivMulOutputData(self.pspec) -from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array) -from nmutil.pipemodbase import PipeModBase -from soc.fu.logical.pipe_data import LogicalInputData -from soc.fu.alu.pipe_data import ALUOutputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import InternalOp - -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange -from soc.fu.div.pipe_data import CoreOutputData - - -class DivOutputStage(PipeModBase): - def __init__(self, pspec): - super().__init__(pspec, "output_stage") - self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) - self.fields.create_specs() - self.quotient_neg = Signal() - self.remainder_neg = Signal() - self.quotient_64 = Signal(64) - self.remainder_64 = Signal(64) - - def ispec(self): - return CoreOutputData(self.pspec) - - def ospec(self): - return ALUOutputData(self.pspec) - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - op = self.i.ctx.op - abs_quotient = self.i.core.quotient_root - fract_width = self.pspec.core_config.fract_width - # fract width of `DivPipeCoreOutputData.remainder` - remainder_fract_width = fract_width * 3 - # fract width of `DivPipeCoreInputData.dividend` - dividend_fract_width = fract_width * 2 - rem_start = remainder_fract_width - dividend_fract_width - abs_remainder = self.i.core.remainder[rem_start:rem_start+64] - dividend_neg = self.i.dividend_neg - divisor_neg = self.i.divisor_neg - quotient_64 = self.quotient_64 - remainder_64 = self.remainder_64 - - comb += self.quotient_neg.eq(dividend_neg ^ divisor_neg) - # follows rules for truncating division - comb += self.remainder_neg.eq(dividend_neg) - - # negation of a 64-bit value produces the same lower 32-bit - # result as negation of just the lower 32-bits, so we don't - # need to do anything special before negating - comb += [ - quotient_64.eq(Mux(self.quotient_neg, - -abs_quotient, abs_quotient)), - remainder_64.eq(Mux(self.remainder_neg, - -abs_remainder, abs_remainder)) - ] - - xer_ov = self.o.xer_ov.data - - def calc_overflow(dive_abs_overflow, sign_bit_mask): - nonlocal comb - overflow = dive_abs_overflow | self.i.div_by_zero - with m.If(op.is_signed): - comb += xer_ov.eq(overflow - | (abs_quotient > sign_bit_mask) - | ((abs_quotient == sign_bit_mask) - & ~self.quotient_neg)) - with m.Else(): - comb += xer_ov.eq(overflow) - - with m.If(op.is_32bit): - calc_overflow(self.i.dive_abs_ov32, 0x80000000) - with m.Else(): - calc_overflow(self.i.dive_abs_ov64, 0x8000000000000000) - - ########################## - # main switch for DIV - - o = self.o.o.data - - with m.Switch(op.insn_type): - with m.Case(InternalOp.OP_DIVE): - with m.If(op.is_32bit): - with m.If(op.is_signed): - # matches POWER9's divweo behavior - comb += o.eq(quotient_64[0:32].as_unsigned()) - with m.Else(): - comb += o.eq(quotient_64[0:32].as_unsigned()) - with m.Else(): - comb += o.eq(quotient_64) - with m.Case(InternalOp.OP_DIV): - with m.If(op.is_32bit): - with m.If(op.is_signed): - # matches POWER9's divwo behavior - comb += o.eq(quotient_64[0:32].as_unsigned()) - with m.Else(): - comb += o.eq(quotient_64[0:32].as_unsigned()) - with m.Else(): - comb += o.eq(quotient_64) - with m.Case(InternalOp.OP_MOD): - with m.If(op.is_32bit): - with m.If(op.is_signed): - # matches POWER9's modsw behavior - comb += o.eq(remainder_64[0:32].as_signed()) - with m.Else(): - comb += o.eq(remainder_64[0:32].as_unsigned()) - with m.Else(): - comb += o.eq(remainder_64) - - ###### sticky overflow and context, both pass-through ##### - - comb += self.o.xer_so.data.eq(self.i.xer_so) - comb += self.o.ctx.eq(self.i.ctx) - - return m diff --git a/src/soc/fu/mul/main_stage.py b/src/soc/fu/mul/main_stage.py index ccdd0d35..3d620367 100644 --- a/src/soc/fu/mul/main_stage.py +++ b/src/soc/fu/mul/main_stage.py @@ -28,7 +28,6 @@ class MulMainStage2(PipeModBase): ###### xer and context, all pass-through ##### - comb += self.o.xer_ca.eq(self.i.xer_ca) comb += self.o.neg_res.eq(self.i.neg_res) comb += self.o.neg_res32.eq(self.i.neg_res32) comb += self.o.xer_so.eq(self.i.xer_so) diff --git a/src/soc/fu/mul/mul_input_record.py b/src/soc/fu/mul/mul_input_record.py index 8554c536..51e7352e 100644 --- a/src/soc/fu/mul/mul_input_record.py +++ b/src/soc/fu/mul/mul_input_record.py @@ -20,8 +20,6 @@ class CompMULOpSubset(Record): ('zero_a', 1), ('invert_out', 1), ('write_cr0', 1), - ('input_carry', CryIn), - ('output_carry', 1), ('is_32bit', 1), ('is_signed', 1), ('insn', 32), @@ -35,8 +33,6 @@ class CompMULOpSubset(Record): self.zero_a.reset_less = True self.invert_a.reset_less = True self.invert_out.reset_less = True - self.input_carry.reset_less = True - self.output_carry.reset_less = True self.is_32bit.reset_less = True self.is_signed.reset_less = True @@ -53,8 +49,6 @@ class CompMULOpSubset(Record): return [self.insn_type, self.invert_a, self.invert_out, - self.input_carry, - self.output_carry, self.is_32bit, self.is_signed, ] diff --git a/src/soc/fu/mul/pipe_data.py b/src/soc/fu/mul/pipe_data.py index 38741f61..eef6cd83 100644 --- a/src/soc/fu/mul/pipe_data.py +++ b/src/soc/fu/mul/pipe_data.py @@ -1,10 +1,10 @@ from soc.fu.mul.mul_input_record import CompMULOpSubset from soc.fu.pipe_data import IntegerData, CommonPipeSpec -from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData +from soc.fu.div.pipe_data import DIVInputData, DivMulOutputData from nmigen import Signal -class MulIntermediateData(ALUInputData): +class MulIntermediateData(DIVInputData): def __init__(self, pspec): super().__init__(pspec) @@ -28,5 +28,5 @@ class MulOutputData(IntegerData): class MulPipeSpec(CommonPipeSpec): - regspec = (ALUInputData.regspec, ALUOutputData.regspec) + regspec = (DIVInputData.regspec, DivMulOutputData.regspec) opsubsetkls = CompMULOpSubset diff --git a/src/soc/fu/mul/pipeline.py b/src/soc/fu/mul/pipeline.py index a557c90e..3816435d 100644 --- a/src/soc/fu/mul/pipeline.py +++ b/src/soc/fu/mul/pipeline.py @@ -1,7 +1,7 @@ from nmutil.singlepipe import ControlBase from nmutil.pipemodbase import PipeModBaseChain -from soc.fu.alu.input_stage import ALUInputStage -from soc.fu.alu.output_stage import ALUOutputStage +from soc.fu.div.input_stage import DivMulInputStage +from soc.fu.div.output_stage import DivMulOutputStage from soc.fu.mul.pre_stage import MulMainStage1 from soc.fu.mul.main_stage import MulMainStage2 from soc.fu.mul.post_stage import MulMainStage3 @@ -9,7 +9,7 @@ from soc.fu.mul.post_stage import MulMainStage3 class MulStages1(PipeModBaseChain): def get_chain(self): - inp = ALUInputStage(self.pspec) # a-invert, carry etc + inp = DivMulInputStage(self.pspec) # a-invert (no carry) main = MulMainStage1(self.pspec) # detect signed/32-bit return [inp, main] @@ -23,7 +23,7 @@ class MulStages2(PipeModBaseChain): class MulStages3(PipeModBaseChain): def get_chain(self): main3 = MulMainStage3(self.pspec) # select output bits, invert, set ov - out = ALUOutputStage(self.pspec) # do CR, XER and out-invert etc. + out = DivMulOutputStage(self.pspec) # do CR, XER and out-invert etc. return [main3, out] diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py index bdee2ec5..b200aa8f 100644 --- a/src/soc/fu/mul/post_stage.py +++ b/src/soc/fu/mul/post_stage.py @@ -2,7 +2,7 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, signed) from nmutil.pipemodbase import PipeModBase -from soc.fu.alu.pipe_data import ALUOutputData +from soc.fu.div.pipe_data import DivMulOutputData from soc.fu.mul.pipe_data import MulOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import InternalOp @@ -16,16 +16,15 @@ class MulMainStage3(PipeModBase): return MulOutputData(self.pspec) # pipeline stage output format def ospec(self): - return ALUOutputData(self.pspec) # defines pipeline stage output format + return DivMulOutputData(self.pspec) # defines stage output format def elaborate(self, platform): m = Module() comb = m.d.comb # convenience variables - cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0 - ov_o = self.o.xer_ov - o_i, cry_i, op = self.i.o, self.i.xer_ca, self.i.ctx.op + o, cr0 = self.o.o, self.o.cr0 + ov_o, o_i, op = self.o.xer_ov, self.i.o, self.i.ctx.op # check if op is 32-bit, and get sign bit from operand a is_32bit = Signal(reset_less=True) @@ -64,13 +63,6 @@ class MulMainStage3(PipeModBase): comb += ov_o.data.eq(ov) comb += ov_o.ok.eq(1) - # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5 - ca = Signal(2, reset_less=True) - comb += ca[0].eq(mul_o[-1]) # XER.CA - XXX more? - comb += ca[1].eq(mul_o[32] ^ (self.i.neg_res32)) # XER.CA32 - comb += cry_o.data.eq(ca) - comb += cry_o.ok.eq(1) - ###### sticky overflow and context, both pass-through ##### comb += self.o.xer_so.data.eq(self.i.xer_so) diff --git a/src/soc/fu/mul/pre_stage.py b/src/soc/fu/mul/pre_stage.py index 84363090..94563874 100644 --- a/src/soc/fu/mul/pre_stage.py +++ b/src/soc/fu/mul/pre_stage.py @@ -2,7 +2,7 @@ from nmigen import (Module, Signal, Mux) from nmutil.pipemodbase import PipeModBase -from soc.fu.alu.pipe_data import ALUInputData +from soc.fu.div.pipe_data import DIVInputData from soc.fu.mul.pipe_data import MulIntermediateData from ieee754.part.partsig import PartitionedSignal from nmutil.util import eq32 @@ -12,7 +12,7 @@ class MulMainStage1(PipeModBase): super().__init__(pspec, "mul1") def ispec(self): - return ALUInputData(self.pspec) # defines pipeline stage input format + return DIVInputData(self.pspec) # defines pipeline stage input format def ospec(self): return MulIntermediateData(self.pspec) # pipeline stage output format @@ -58,7 +58,6 @@ class MulMainStage1(PipeModBase): ###### XER and context, both pass-through ##### - comb += self.o.xer_ca.eq(self.i.xer_ca) comb += self.o.xer_so.eq(self.i.xer_so) comb += self.o.ctx.eq(self.i.ctx) diff --git a/src/soc/fu/mul/test/test_pipe_caller.py b/src/soc/fu/mul/test/test_pipe_caller.py index cd93e129..cda81076 100644 --- a/src/soc/fu/mul/test/test_pipe_caller.py +++ b/src/soc/fu/mul/test/test_pipe_caller.py @@ -25,7 +25,6 @@ def get_cu_inputs(dec2, sim): yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB - yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so print ("alu get_cu_inputs", res) @@ -44,7 +43,6 @@ def set_alu_inputs(alu, dec2, sim): yield from ALUHelpers.set_int_ra(alu, dec2, inp) yield from ALUHelpers.set_int_rb(alu, dec2, inp) - yield from ALUHelpers.set_xer_ca(alu, dec2, inp) yield from ALUHelpers.set_xer_so(alu, dec2, inp) @@ -245,19 +243,16 @@ class TestRunner(FHDLTestCase): yield from ALUHelpers.get_cr_a(res, alu, dec2) yield from ALUHelpers.get_xer_ov(res, alu, dec2) - yield from ALUHelpers.get_xer_ca(res, alu, dec2) yield from ALUHelpers.get_int_o(res, alu, dec2) yield from ALUHelpers.get_xer_so(res, alu, dec2) yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2) yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2) yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2) - yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2) yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2) ALUHelpers.check_int_o(self, res, sim_o, code) ALUHelpers.check_xer_ov(self, res, sim_o, code) - ALUHelpers.check_xer_ca(self, res, sim_o, code) ALUHelpers.check_xer_so(self, res, sim_o, code) ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))