From 9da097211552c19fc99c0947c732ed9df8f2cdb6 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 30 Aug 2020 20:49:58 +0100 Subject: [PATCH] redo OP_CMP based on microwatt. L=1 had been ignored --- src/soc/fu/alu/main_stage.py | 60 ++++++++++++++++++++----- src/soc/fu/alu/test/test_pipe_caller.py | 18 ++++++++ src/soc/fu/common_output_stage.py | 16 ++----- src/soc/litex/florent/sim.py | 2 +- 4 files changed, 73 insertions(+), 23 deletions(-) diff --git a/src/soc/fu/alu/main_stage.py b/src/soc/fu/alu/main_stage.py index 16950629..f2e751d6 100644 --- a/src/soc/fu/alu/main_stage.py +++ b/src/soc/fu/alu/main_stage.py @@ -7,7 +7,7 @@ # Copyright (C) 2020 Michael Nolan from nmigen import (Module, Signal, Cat, Repl, Mux, Const) from nmutil.pipemodbase import PipeModBase -from nmutil.extend import exts +from nmutil.extend import exts, extz from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData from ieee754.part.partsig import PartitionedSignal from soc.decoder.power_enums import MicrOp @@ -39,7 +39,7 @@ class ALUMainStage(PipeModBase): # convenience variables cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0 - ov_o = self.o.xer_ov + xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op # get L-field for OP_CMP @@ -61,8 +61,12 @@ class ALUMainStage(PipeModBase): a_i = Signal.like(a) b_i = Signal.like(b) with m.If(is_32bit): - comb += a_i.eq(exts(a, 32, 64)) - comb += b_i.eq(exts(b, 32, 64)) + with m.If(op.is_signed): + comb += a_i.eq(exts(a, 32, 64)) + comb += b_i.eq(exts(b, 32, 64)) + with m.Else(): + comb += a_i.eq(extz(a, 32, 64)) + comb += b_i.eq(extz(b, 32, 64)) with m.Else(): comb += a_i.eq(a) comb += b_i.eq(b) @@ -83,12 +87,48 @@ class ALUMainStage(PipeModBase): #### CMP, CMPL v3.0B p85-86 with m.Case(MicrOp.OP_CMP): + a_n = Signal(64) # temporary - inverted a + tval = Signal(5) + a_lt = Signal() + carry_32 = Signal() + carry_64 = Signal() + zerolo = Signal() + zerohi = Signal() + msb_a = Signal() + msb_b = Signal() + newcrf = Signal(4) + # this is supposed to be inverted (b-a, not a-b) - # however we have a trick: instead of adding either 2x 64-bit - # MUXes to invert a and b, or messing with a 64-bit output, - # swap +ve and -ve test in the *output* stage using an XOR gate - comb += o.data.eq(add_o[1:-1]) - comb += o.ok.eq(0) # use o.data but do *not* actually output + comb += a_n.eq(~a) # sigh a gets inverted + comb += carry_32.eq(add_o[33] ^ a_n[32] ^ b[32]) + comb += carry_64.eq(add_o[65]) + + comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool())) + comb += zerohi.eq(~((a_n[32:64] ^ b[32:64]).bool())) + + with m.If(zerolo & (is_32bit | zerohi)): + # values are equal + comb += tval[2].eq(1) + with m.Else(): + comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[63])) + comb += msb_b.eq(Mux(is_32bit, b[31], b[63])) + C0 = Const(0, 1) + with m.If(msb_a != msb_b): + # Subtraction might overflow, but + # comparison is clear from MSB difference. + # for signed, 0 is greater; for unsigned, 1 is greater + comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a)) + with m.Else(): + # Subtraction cannot overflow since MSBs are equal. + # carry = 1 indicates RA is smaller (signed or unsigned) + comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64)) + comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt)) + comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2])) + with m.If(op.is_signed): + comb += cr0.data[2:4].eq(tval[3:5]) + with m.Else(): + comb += cr0.data[2:4].eq(tval[0:2]) + comb += cr0.ok.eq(1) ################### #### add v3.0B p67, p69-72 @@ -140,7 +180,7 @@ class ALUMainStage(PipeModBase): ###### sticky overflow and context, both pass-through ##### - comb += self.o.xer_so.data.eq(self.i.xer_so) + comb += self.o.xer_so.data.eq(xer_so_i) comb += self.o.ctx.eq(self.i.ctx) return m diff --git a/src/soc/fu/alu/test/test_pipe_caller.py b/src/soc/fu/alu/test/test_pipe_caller.py index 60a1343f..d059c70d 100644 --- a/src/soc/fu/alu/test/test_pipe_caller.py +++ b/src/soc/fu/alu/test/test_pipe_caller.py @@ -227,6 +227,24 @@ class ALUTestCase(TestAccumulatorBase): initial_regs[5] = 0xffffffffaaaaaaaa self.add_case(Program(lst, bigendian), initial_regs, {}) + def case_cmplw_microwatt_1(self): + """microwatt 1.bin: + 10d94: 40 20 96 7c cmplw cr1,r22,r4 + gpr: 00000000ffff6dc1 <- r4 + gpr: 0000000000000000 <- r22 + """ + + lst = ["cmpl 1, 0, 22, 4"] + initial_regs = [0] * 32 + initial_regs[4] = 0xffff6dc1 + initial_regs[22] = 0 + XER = 0xe00c0000 + CR = 0x50759999 + + self.add_case(Program(lst, bigendian), initial_regs, + initial_sprs = {'XER': XER}, + initial_cr = CR) + def case_extsb(self): insns = ["extsb", "extsh", "extsw"] for i in range(10): diff --git a/src/soc/fu/common_output_stage.py b/src/soc/fu/common_output_stage.py index adb795c5..401660c0 100644 --- a/src/soc/fu/common_output_stage.py +++ b/src/soc/fu/common_output_stage.py @@ -76,21 +76,13 @@ class CommonOutputStage(PipeModBase): comb += is_cmp.eq(op.insn_type == MicrOp.OP_CMP) comb += is_cmpeqb.eq(op.insn_type == MicrOp.OP_CMPEQB) - # nope - if *processor* mode is 32-bit - #with m.If(op.is_32bit): - # comb += msb_test.eq(target[-1] ^ is_cmp) # 64-bit MSB - #with m.Else(): - # comb += msb_test.eq(target[31] ^ is_cmp) # 32-bit MSB + comb += msb_test.eq(target[-1]) # 64-bit MSB comb += is_nzero.eq(target.bool()) - with m.If(is_cmp): # invert pos/neg tests - comb += is_positive.eq(msb_test) - comb += is_negative.eq(is_nzero & ~msb_test) - with m.Else(): - comb += is_negative.eq(msb_test) - comb += is_positive.eq(is_nzero & ~msb_test) + comb += is_negative.eq(msb_test) + comb += is_positive.eq(is_nzero & ~msb_test) - with m.If(is_cmpeqb): + with m.If(is_cmpeqb | is_cmp): comb += cr0.eq(self.i.cr0.data) with m.Else(): comb += cr0.eq(Cat(so, ~is_nzero, is_positive, is_negative)) diff --git a/src/soc/litex/florent/sim.py b/src/soc/litex/florent/sim.py index 6d6e18a0..d3c79017 100755 --- a/src/soc/litex/florent/sim.py +++ b/src/soc/litex/florent/sim.py @@ -308,7 +308,7 @@ class LibreSoCSim(SoCSDRAM): ) if cpu == "libresoc": - self.comb += active_dbg_cr.eq((0x10300 <= pc) & (pc <= 0x1094c)) + self.comb += active_dbg_cr.eq((0x10300 <= pc) & (pc <= 0x10e00)) #self.comb += active_dbg_cr.eq(1) # get the CR -- 2.30.2