X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Ffu%2Falu%2Fmain_stage.py;h=4d5fe2313bb9184e63fff9b3f81422650d1d9ba4;hb=HEAD;hp=16950629c743a301c62b7712420aeb436cce0e76;hpb=b3d3ce90ac56e1d6b708c503cd37a1739b591428;p=soc.git diff --git a/src/soc/fu/alu/main_stage.py b/src/soc/fu/alu/main_stage.py index 16950629..1f17943c 100644 --- a/src/soc/fu/alu/main_stage.py +++ b/src/soc/fu/alu/main_stage.py @@ -4,16 +4,20 @@ # however should not gate the carry or overflow, that's up to the # output stage +# License: LGPLv3+ +# Copyright (C) 2020 Luke Kenneth Casson Leighton # Copyright (C) 2020 Michael Nolan +# (michael: note that there are multiple copyright holders) + from nmigen import (Module, Signal, Cat, Repl, Mux, Const) from nmutil.pipemodbase import PipeModBase -from nmutil.extend import exts +from nmutil.extend import exts, extz from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData -from ieee754.part.partsig import PartitionedSignal -from soc.decoder.power_enums import MicrOp +from ieee754.part.partsig import SimdSignal +from openpower.decoder.power_enums import MicrOp -from soc.decoder.power_fields import DecodeFields -from soc.decoder.power_fieldsn import SignalBitRange +from openpower.decoder.power_fields import DecodeFields +from openpower.decoder.power_fieldsn import SignalBitRange # microwatt calc_ov function. @@ -34,12 +38,13 @@ class ALUMainStage(PipeModBase): return ALUOutputData(self.pspec) # defines pipeline stage output format def elaborate(self, platform): + XLEN = self.pspec.XLEN m = Module() comb = m.d.comb # convenience variables cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0 - ov_o = self.o.xer_ov + xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op # get L-field for OP_CMP @@ -60,9 +65,16 @@ class ALUMainStage(PipeModBase): a_i = Signal.like(a) b_i = Signal.like(b) - with m.If(is_32bit): - comb += a_i.eq(exts(a, 32, 64)) - comb += b_i.eq(exts(b, 32, 64)) + with m.If(op.insn_type == MicrOp.OP_CMP): # another temporary hack + comb += a_i.eq(a) # reaaaally need to move CMP + comb += b_i.eq(b) # into trap pipeline + with m.Elif(is_32bit): + with m.If(op.is_signed): + comb += a_i.eq(exts(a, 32, XLEN)) + comb += b_i.eq(exts(b, 32, XLEN)) + with m.Else(): + comb += a_i.eq(extz(a, 32, XLEN)) + comb += b_i.eq(extz(b, 32, XLEN)) with m.Else(): comb += a_i.eq(a) comb += b_i.eq(b) @@ -83,12 +95,51 @@ class ALUMainStage(PipeModBase): #### CMP, CMPL v3.0B p85-86 with m.Case(MicrOp.OP_CMP): + a_n = Signal(XLEN) # temporary - inverted a + tval = Signal(5) + a_lt = Signal() + carry_32 = Signal() + carry_64 = Signal() + zerolo = Signal() + zerohi = Signal() + msb_a = Signal() + msb_b = Signal() + newcrf = Signal(4) + # this is supposed to be inverted (b-a, not a-b) - # however we have a trick: instead of adding either 2x 64-bit - # MUXes to invert a and b, or messing with a 64-bit output, - # swap +ve and -ve test in the *output* stage using an XOR gate - comb += o.data.eq(add_o[1:-1]) - comb += o.ok.eq(0) # use o.data but do *not* actually output + comb += a_n.eq(~a) # sigh a gets inverted + if XLEN == 64: + comb += carry_32.eq(add_o[33] ^ a[32] ^ b[32]) + else: + comb += carry_32.eq(add_o[XLEN+1]) + comb += carry_64.eq(add_o[XLEN+1]) + + comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool())) + comb += zerohi.eq(~((a_n[32:XLEN] ^ b[32:XLEN]).bool())) + + with m.If(zerolo & (is_32bit | zerohi)): + # values are equal + comb += tval[2].eq(1) + with m.Else(): + comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[XLEN-1])) + comb += msb_b.eq(Mux(is_32bit, b[31], b[XLEN-1])) + C0 = Const(0, 1) + with m.If(msb_a != msb_b): + # Subtraction might overflow, but + # comparison is clear from MSB difference. + # for signed, 0 is greater; for unsigned, 1 is greater + comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a)) + with m.Else(): + # Subtraction cannot overflow since MSBs are equal. + # carry = 1 indicates RA is smaller (signed or unsigned) + comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64)) + comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt)) + comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2])) + with m.If(op.is_signed): + comb += cr0.data[2:4].eq(tval[3:5]) + with m.Else(): + comb += cr0.data[2:4].eq(tval[0:2]) + comb += cr0.ok.eq(1) ################### #### add v3.0B p67, p69-72 @@ -102,13 +153,21 @@ class ALUMainStage(PipeModBase): # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5 ca = Signal(2, reset_less=True) comb += ca[0].eq(add_o[-1]) # XER.CA - comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32 + if XLEN == 64: + comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32 + else: + comb += ca[1].eq(add_o[-1]) # XER.CA32 comb += cry_o.data.eq(ca) comb += cry_o.ok.eq(1) # 32-bit (ov[1]) and 64-bit (ov[0]) overflow ov = Signal(2, reset_less=True) comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2])) - comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32])) + if XLEN == 64: + comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], + add_o[32])) + else: + comb += ov[1].eq(calc_ov(a_i[-1], b_i[-1], ca[0], + add_o[-2])) comb += ov_o.data.eq(ov) comb += ov_o.ok.eq(1) @@ -117,11 +176,11 @@ class ALUMainStage(PipeModBase): with m.Case(MicrOp.OP_EXTS): with m.If(op.data_len == 1): - comb += o.data.eq(exts(a, 8, 64)) + comb += o.data.eq(exts(a, 8, XLEN)) with m.If(op.data_len == 2): - comb += o.data.eq(exts(a, 16, 64)) + comb += o.data.eq(exts(a, 16, XLEN)) with m.If(op.data_len == 4): - comb += o.data.eq(exts(a, 32, 64)) + comb += o.data.eq(exts(a, 32, XLEN)) comb += o.ok.eq(1) # output register ################### @@ -140,7 +199,7 @@ class ALUMainStage(PipeModBase): ###### sticky overflow and context, both pass-through ##### - comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.xer_so.data.eq(xer_so_i) comb += self.o.ctx.eq(self.i.ctx) return m