Merge branch 'master' of ssh://git.libre-riscv.org:922/soc
authorJacob Lifshay <programmerjake@gmail.com>
Wed, 8 Jul 2020 02:39:36 +0000 (19:39 -0700)
committerJacob Lifshay <programmerjake@gmail.com>
Wed, 8 Jul 2020 02:39:36 +0000 (19:39 -0700)
73 files changed:
libreriscv
src/soc/bus/test/test_minerva.py
src/soc/config/test/test_fetch.py
src/soc/consts.py [new file with mode: 0644]
src/soc/decoder/decode2execute1.py
src/soc/decoder/formal/proof_decoder2.py
src/soc/decoder/helpers.py
src/soc/decoder/isa/caller.py
src/soc/decoder/power_decoder.py
src/soc/decoder/power_decoder2.py
src/soc/decoder/power_enums.py
src/soc/decoder/power_regspec_map.py
src/soc/decoder/selectable_int.py
src/soc/experiment/alu_hier.py
src/soc/experiment/formal/.gitignore [new file with mode: 0644]
src/soc/fu/alu/alu_input_record.py
src/soc/fu/alu/test/test_pipe_caller.py
src/soc/fu/branch/br_input_record.py
src/soc/fu/branch/formal/proof_main_stage.py
src/soc/fu/branch/main_stage.py
src/soc/fu/branch/pipe_data.py
src/soc/fu/branch/test/test_pipe_caller.py
src/soc/fu/common_output_stage.py
src/soc/fu/compunits/compunits.py
src/soc/fu/compunits/test/test_alu_compunit.py
src/soc/fu/compunits/test/test_branch_compunit.py
src/soc/fu/compunits/test/test_compunit.py
src/soc/fu/compunits/test/test_cr_compunit.py
src/soc/fu/compunits/test/test_ldst_compunit.py
src/soc/fu/compunits/test/test_logical_compunit.py
src/soc/fu/compunits/test/test_shiftrot_compunit.py
src/soc/fu/compunits/test/test_spr_compunit.py [new file with mode: 0644]
src/soc/fu/compunits/test/test_trap_compunit.py [new file with mode: 0644]
src/soc/fu/cr/cr_input_record.py
src/soc/fu/cr/test/test_pipe_caller.py
src/soc/fu/div/pipeline.py
src/soc/fu/div/setup_stage.py
src/soc/fu/div/test/test_pipe_caller.py
src/soc/fu/ldst/ldst_input_record.py
src/soc/fu/logical/logical_input_record.py
src/soc/fu/logical/test/test_pipe_caller.py
src/soc/fu/mul/main_stage.py
src/soc/fu/mul/mul_input_record.py [new file with mode: 0644]
src/soc/fu/mul/pipe_data.py
src/soc/fu/mul/pipeline.py
src/soc/fu/mul/post_stage.py [new file with mode: 0644]
src/soc/fu/mul/pre_stage.py [new file with mode: 0644]
src/soc/fu/mul/test/test_pipe_caller.py
src/soc/fu/pipe_data.py
src/soc/fu/shift_rot/sr_input_record.py
src/soc/fu/shift_rot/test/test_pipe_caller.py
src/soc/fu/spr/main_stage.py [new file with mode: 0644]
src/soc/fu/spr/pipe_data.py
src/soc/fu/spr/pipeline.py [new file with mode: 0644]
src/soc/fu/spr/spr_input_record.py [new file with mode: 0644]
src/soc/fu/spr/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/test/common.py
src/soc/fu/trap/main_stage.py
src/soc/fu/trap/pipe_data.py
src/soc/fu/trap/test/test_pipe_caller.py
src/soc/fu/trap/trap_input_record.py
src/soc/regfile/formal/.gitignore [new file with mode: 0644]
src/soc/regfile/regfiles.py
src/soc/regfile/util.py
src/soc/simple/core.py
src/soc/simple/issuer.py
src/soc/simple/test/test_core.py
src/soc/simple/test/test_issuer.py
src/soc/simple/test/test_microwatt.py [new file with mode: 0644]
src/soc/simulator/program.py
src/soc/simulator/qemu.py
src/soc/simulator/test_mul_sim.py [new file with mode: 0644]
src/soc/simulator/test_sim.py

index 1d09d2455985e602a5799e1fafac5cea6b1cb72d..09d89525805d989982838a01193ab0bdc54fb662 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 1d09d2455985e602a5799e1fafac5cea6b1cb72d
+Subproject commit 09d89525805d989982838a01193ab0bdc54fb662
index 02c83281612c992227a4b792db9524e6b2265cb4..2bb920a317a763f52740a423f86cd33d4ea433e4 100644 (file)
@@ -7,13 +7,20 @@ from soc.minerva.units.fetch import BareFetchUnit, CachedFetchUnit
 class TestSRAMBareLoadStoreUnit(BareLoadStoreUnit):
     def __init__(self, pspec):
         super().__init__(pspec)
+        # small 32-entry Memory
+        if (hasattr(pspec, "dmem_test_depth") and
+            isinstance(pspec.dmem_test_depth, int)):
+            depth = pspec.dmem_test_depth
+        else:
+            depth = 32
+        print ("TestSRAMBareLoadStoreUnit depth", depth)
+
+        self.mem = Memory(width=self.data_wid, depth=depth)
 
     def elaborate(self, platform):
         m = super().elaborate(platform)
         comb = m.d.comb
-        # small 16-entry Memory
-        self.mem = memory = Memory(width=self.data_wid, depth=32)
-        m.submodules.sram = sram = SRAM(memory=memory, granularity=8,
+        m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
                                         features={'cti', 'bte', 'err'})
         dbus = self.dbus
 
@@ -37,8 +44,14 @@ class TestSRAMBareLoadStoreUnit(BareLoadStoreUnit):
 class TestSRAMBareFetchUnit(BareFetchUnit):
     def __init__(self, pspec):
         super().__init__(pspec)
-        # small 16-entry Memory
-        self.mem = Memory(width=self.data_wid, depth=32)
+        # default: small 32-entry Memory
+        if (hasattr(pspec, "imem_test_depth") and
+            isinstance(pspec.imem_test_depth, int)):
+            depth = pspec.imem_test_depth
+        else:
+            depth = 32
+        print ("TestSRAMBareFetchUnit depth", depth)
+        self.mem = Memory(width=self.data_wid, depth=depth)
 
     def _get_memory(self):
         return self.mem
index 00154dfde5ed2b49e9a32b56db5b02edcc9e05eb..f6f0901a2efc95df35beb3a20affb47f89874655 100644 (file)
@@ -7,8 +7,10 @@ from soc.config.ifetch import ConfigFetchUnit
 from collections import namedtuple
 from nmigen.cli import rtlil
 
-from soc.config.test.test_loadstore import TestMemPspec 
+from soc.config.test.test_loadstore import TestMemPspec
 
+import sys
+sys.setrecursionlimit(10**6)
 
 def read_from_addr(dut, addr):
     yield dut.a_pc_i.eq(addr)
@@ -29,12 +31,13 @@ def read_from_addr(dut, addr):
     return res
 
 
-def tst_lsmemtype(ifacetype):
+def tst_lsmemtype(ifacetype, sram_depth=32):
     m = Module()
-    pspec = TestMemPspec(ldst_ifacetype=ifacetype, 
+    pspec = TestMemPspec(ldst_ifacetype=ifacetype,
                          imem_ifacetype=ifacetype, addr_wid=64,
                                                    mask_wid=4,
-                                                   reg_wid=32)
+                                                   reg_wid=32,
+                         imem_test_depth=sram_depth)
     dut = ConfigFetchUnit(pspec).fu
     vl = rtlil.convert(dut, ports=[]) # TODOdut.ports())
     with open("test_fetch_%s.il" % ifacetype, "w") as f:
@@ -64,5 +67,5 @@ def tst_lsmemtype(ifacetype):
         sim.run()
 
 if __name__ == '__main__':
-    tst_lsmemtype('test_bare_wb')
+    tst_lsmemtype('test_bare_wb', sram_depth=32768)
     tst_lsmemtype('testmem')
diff --git a/src/soc/consts.py b/src/soc/consts.py
new file mode 100644 (file)
index 0000000..f29ddcf
--- /dev/null
@@ -0,0 +1,29 @@
+# Listed in V3.0B Book III Chap 4.2.1
+# MSR bit numbers
+
+class MSR:
+    SF  = (63 - 0)     # Sixty-Four bit mode
+    HV  = (63 - 3)     # Hypervisor state
+    S   = (63 - 41)    # Secure state
+    EE  = (63 - 48)    # External interrupt Enable
+    PR  = (63 - 49)    # PRoblem state
+    FP  = (63 - 50)    # FP available
+    ME  = (63 - 51)    # Machine Check int enable
+    IR  = (63 - 58)    # Instruction Relocation
+    DR  = (63 - 59)    # Data Relocation
+    PMM = (63 - 60)    # Performance Monitor Mark
+    RI  = (63 - 62)    # Recoverable Interrupt
+    LE  = (63 - 63)    # Little Endian
+
+# Listed in V3.0B Book III 7.5.9 "Program Interrupt"
+
+# note that these correspond to trap_input_record.traptype bits 0,1,2,3
+# (TODO: add more?)
+
+class PI:
+    FP    = (63 - 43)    # 1 if FP exception
+    ILLEG = (63 - 44)    # 1 if illegal instruction (not doing hypervisor)
+    PRIV  = (63 - 45)    # 1 if privileged interrupt
+    TRAP  = (63 - 46)    # 1 if exception is "trap" type
+    ADR   = (63 - 47)    # 0 if SRR0 = address of instruction causing exception
+
index 966def93ae580fe48ee9a9ad4851dc7845fa4dd8..b4f75200bcb12467846076b74a2c5227d29491a5 100644 (file)
@@ -5,7 +5,7 @@ based on Anton Blanchard microwatt decode2.vhdl
 """
 from nmigen import Signal, Record
 from nmutil.iocontrol import RecordObject
-from soc.decoder.power_enums import InternalOp, CryIn, Function
+from soc.decoder.power_enums import InternalOp, CryIn, Function, SPR
 
 
 class Data(Record):
@@ -23,49 +23,26 @@ class Data(Record):
         return [self.data, self.ok]
 
 
-class Decode2ToExecute1Type(RecordObject):
+class Decode2ToOperand(RecordObject):
 
-    def __init__(self, name=None, asmcode=True):
+    def __init__(self, name=None):
 
         RecordObject.__init__(self, name=name)
 
-        self.valid = Signal(reset_less=True)
         self.insn_type = Signal(InternalOp, reset_less=True)
         self.fn_unit = Signal(Function, reset_less=True)
-        if asmcode:
-            self.asmcode = Signal(8, reset_less=True) # only for simulator
-        self.nia = Signal(64, reset_less=True)
-        self.write_reg = Data(5, name="rego")
-        self.write_ea = Data(5, name="ea") # for LD/ST in update mode
-        self.read_reg1 = Data(5, name="reg1")
-        self.read_reg2 = Data(5, name="reg2")
-        self.read_reg3 = Data(5, name="reg3")
         self.imm_data = Data(64, name="imm")
-        self.write_spr = Data(10, name="spro")
-        self.read_spr1 = Data(10, name="spr1")
-        self.read_spr2 = Data(10, name="spr2")
-
-        self.read_fast1 = Data(3, name="fast1")
-        self.read_fast2 = Data(3, name="fast2")
-        self.write_fast1 = Data(3, name="fasto1")
-        self.write_fast2 = Data(3, name="fasto2")
 
-        self.read_cr1 = Data(3, name="cr_in1")
-        self.read_cr2 = Data(3, name="cr_in2")
-        self.read_cr3 = Data(3, name="cr_in2")
-        self.read_cr_whole = Signal(reset_less=True)
-        self.write_cr = Data(3, name="cr_out")
-        self.write_cr_whole = Signal(reset_less=True)
         self.lk = Signal(reset_less=True)
         self.rc = Data(1, "rc")
         self.oe = Data(1, "oe")
         self.invert_a = Signal(reset_less=True)
         self.zero_a = Signal(reset_less=True)
-        self.invert_out = Signal(reset_less=True)
         self.input_carry = Signal(CryIn, reset_less=True)
         self.output_carry = Signal(reset_less=True)
         self.input_cr = Signal(reset_less=True)  # instr. has a CR as input
         self.output_cr = Signal(reset_less=True) # instr. has a CR as output
+        self.invert_out = Signal(reset_less=True)
         self.is_32bit = Signal(reset_less=True)
         self.is_signed = Signal(reset_less=True)
         self.insn = Signal(32, reset_less=True)
@@ -73,6 +50,43 @@ class Decode2ToExecute1Type(RecordObject):
         self.byte_reverse  = Signal(reset_less=True)
         self.sign_extend  = Signal(reset_less=True)# do we need this?
         self.update  = Signal(reset_less=True) # LD/ST is "update" variant
-        self.traptype  = Signal(4, reset_less=True) # see trap main_stage.py
+        self.traptype  = Signal(5, reset_less=True) # see trap main_stage.py
         self.trapaddr  = Signal(13, reset_less=True)
+        self.read_cr_whole = Signal(reset_less=True)
+        self.write_cr_whole = Signal(reset_less=True)
+        self.write_cr0 = Signal(reset_less=True)
+
+
+class Decode2ToExecute1Type(RecordObject):
+
+    def __init__(self, name=None, asmcode=True):
+
+        RecordObject.__init__(self, name=name)
+
+        if asmcode:
+            self.asmcode = Signal(8, reset_less=True) # only for simulator
+        self.nia = Signal(64, reset_less=True)
+        self.write_reg = Data(5, name="rego")
+        self.write_ea = Data(5, name="ea") # for LD/ST in update mode
+        self.read_reg1 = Data(5, name="reg1")
+        self.read_reg2 = Data(5, name="reg2")
+        self.read_reg3 = Data(5, name="reg3")
+        self.write_spr = Data(SPR, name="spro")
+        self.read_spr1 = Data(SPR, name="spr1")
+        #self.read_spr2 = Data(SPR, name="spr2") # only one needed
+
+        self.xer_in = Signal(reset_less=True)   # xer might be read
+        self.xer_out = Signal(reset_less=True)  # xer might be written
+
+        self.read_fast1 = Data(3, name="fast1")
+        self.read_fast2 = Data(3, name="fast2")
+        self.write_fast1 = Data(3, name="fasto1")
+        self.write_fast2 = Data(3, name="fasto2")
+
+        self.read_cr1 = Data(3, name="cr_in1")
+        self.read_cr2 = Data(3, name="cr_in2")
+        self.read_cr3 = Data(3, name="cr_in2")
+        self.write_cr = Data(3, name="cr_out")
 
+        # decode operand data
+        self.do = Decode2ToOperand(name)
index d36ec447dadb966a004bc628b21ba82538ed0427..b19588f6bca30fe729712c2a4d590811a1251de2 100644 (file)
@@ -60,8 +60,7 @@ class Driver(Elaboratable):
                 comb += Assert(pdecode2.e.read_spr1.ok == 1)
         with m.If((op == InternalOp.OP_MFSPR) |
                   (op == InternalOp.OP_MTSPR)):
-            comb += Assert(pdecode2.e.read_spr1.data ==
-                           self.instr_bits(11, 20))
+            comb += Assert(pdecode2.e.read_spr1.data == self.instr_bits(11, 20))
             comb += Assert(pdecode2.e.read_spr1.ok == 1)
 
     def test_in2(self):
@@ -71,8 +70,7 @@ class Driver(Elaboratable):
         dec = pdecode2.dec
         with m.If(dec.op.in2_sel == In2Sel.RB):
             comb += Assert(pdecode2.e.read_reg2.ok == 1)
-            comb += Assert(pdecode2.e.read_reg2.data ==
-                           dec.RB)
+            comb += Assert(pdecode2.e.read_reg2.data == dec.RB)
         with m.Elif(dec.op.in2_sel == In2Sel.NONE):
             comb += Assert(pdecode2.e.imm_data.ok == 0)
             comb += Assert(pdecode2.e.read_reg2.ok == 0)
@@ -93,20 +91,16 @@ class Driver(Elaboratable):
                     comb += Assert(pdecode2.e.imm_data.data ==
                                    self.exts(dec.SI, 16, 64))
                 with m.Case(In2Sel.CONST_UI_HI):
-                    comb += Assert(pdecode2.e.imm_data.data ==
-                                   (dec.UI << 16))
+                    comb += Assert(pdecode2.e.imm_data.data == (dec.UI << 16))
                 with m.Case(In2Sel.CONST_SI_HI):
                     comb += Assert(pdecode2.e.imm_data.data ==
                                    self.exts(dec.SI << 16, 32, 64))
                 with m.Case(In2Sel.CONST_LI):
-                    comb += Assert(pdecode2.e.imm_data.data ==
-                                   (dec.LI << 2))
+                    comb += Assert(pdecode2.e.imm_data.data == (dec.LI << 2))
                 with m.Case(In2Sel.CONST_BD):
-                    comb += Assert(pdecode2.e.imm_data.data ==
-                                   (dec.BD << 2))
+                    comb += Assert(pdecode2.e.imm_data.data == (dec.BD << 2))
                 with m.Case(In2Sel.CONST_DS):
-                    comb += Assert(pdecode2.e.imm_data.data ==
-                                   (dec.DS << 2))
+                    comb += Assert(pdecode2.e.imm_data.data == (dec.DS << 2))
                 with m.Case(In2Sel.CONST_M1):
                     comb += Assert(pdecode2.e.imm_data.data == ~0)
                 with m.Case(In2Sel.CONST_SH):
@@ -143,8 +137,7 @@ class Driver(Elaboratable):
         pdecode2 = m.submodules.pdecode2
         with m.If(pdecode2.dec.op.in3_sel == In3Sel.RS):
             comb += Assert(pdecode2.e.read_reg3.ok == 1)
-            comb += Assert(pdecode2.e.read_reg3.data ==
-                           self.instr_bits(6,10))
+            comb += Assert(pdecode2.e.read_reg3.data == self.instr_bits(6,10))
 
     def test_out(self):
         m = self.m
@@ -165,8 +158,7 @@ class Driver(Elaboratable):
             with m.If(sel == OutSel.RT):
                 comb += Assert(data == self.instr_bits(6, 10))
             with m.If(sel == OutSel.RA):
-                comb += Assert(data ==
-                               self.instr_bits(11, 15))
+                comb += Assert(data == self.instr_bits(11, 15))
 
     def test_rc(self):
         m = self.m
@@ -180,11 +172,9 @@ class Driver(Elaboratable):
         with m.If(sel == RC.ONE):
             comb += Assert(pdecode2.e.rc.data == 1)
         with m.If(sel == RC.RC):
-            comb += Assert(pdecode2.e.rc.data ==
-                           dec.Rc)
+            comb += Assert(pdecode2.e.rc.data == dec.Rc)
             comb += Assert(pdecode2.e.oe.ok == 1)
-            comb += Assert(pdecode2.e.oe.data ==
-                           dec.OE)
+            comb += Assert(pdecode2.e.oe.data == dec.OE)
 
     def test_single_bits(self):
         m = self.m
index 53edaca2cc6927bdd8e103069054788dba8ec69d..8cab8d50176856c733567a2522d553f44e1aef52 100644 (file)
@@ -27,6 +27,17 @@ def EXTS64(value):
     return SelectableInt(exts(value.value, value.bits) & ((1 << 64)-1), 64)
 
 
+# signed version of MUL
+def MULS(a, b):
+    a_s = a.value & (1<<(a.bits-1)) != 0
+    b_s = b.value & (1<<(b.bits-1)) != 0
+    result = abs(a) * abs(b)
+    print ("MULS", result, a_s, b_s)
+    if a_s == b_s:
+        return result
+    return -result
+
+
 # XXX should this explicitly extend from 32 to 64?
 def EXTZ64(value):
     if isinstance(value, SelectableInt):
index d093ad1b5c74b85a3f09b3d88033673c43897d82..133d4b0928143f0da35a6244bc3766f4dfe8c4c6 100644 (file)
@@ -12,6 +12,8 @@ from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
 from soc.decoder.power_enums import (spr_dict, spr_byname, XER_bits,
                                      insns, InternalOp)
 from soc.decoder.helpers import exts, trunc_div, trunc_rem
+from soc.consts import PI, MSR
+
 from collections import namedtuple
 import math
 import sys
@@ -200,21 +202,34 @@ class SPR(dict):
             self[key] = v
 
     def __getitem__(self, key):
+        print ("get spr", key)
+        print ("dict", self.items())
         # if key in special_sprs get the special spr, otherwise return key
         if isinstance(key, SelectableInt):
             key = key.value
+        if isinstance(key, int):
+            key = spr_dict[key].SPR
         key = special_sprs.get(key, key)
         if key in self:
-            return dict.__getitem__(self, key)
+            res = dict.__getitem__(self, key)
         else:
-            info = spr_dict[key]
+            if isinstance(key, int):
+                info = spr_dict[key]
+            else:
+                info = spr_byname[key]
             dict.__setitem__(self, key, SelectableInt(0, info.length))
-            return dict.__getitem__(self, key)
+            res = dict.__getitem__(self, key)
+        print ("spr returning", key, res)
+        return res
 
     def __setitem__(self, key, value):
         if isinstance(key, SelectableInt):
             key = key.value
+        if isinstance(key, int):
+            key = spr_dict[key].SPR
+            print ("spr key", key)
         key = special_sprs.get(key, key)
+        print ("setting spr", key, value)
         dict.__setitem__(self, key, value)
 
     def __call__(self, ridx):
@@ -229,8 +244,10 @@ class ISACaller:
     def __init__(self, decoder2, regfile, initial_sprs=None, initial_cr=0,
                        initial_mem=None, initial_msr=0,
                        initial_insns=None, respect_pc=False,
-                       disassembly=None):
+                       disassembly=None,
+                       initial_pc=0):
 
+        self.halted = False
         self.respect_pc = respect_pc
         if initial_sprs is None:
             initial_sprs = {}
@@ -244,15 +261,19 @@ class ISACaller:
 
         # "fake program counter" mode (for unit testing)
         self.fake_pc = 0
+        disasm_start = 0
         if not respect_pc:
             if isinstance(initial_mem, tuple):
                 self.fake_pc = initial_mem[0]
+                disasm_start = self.fake_pc
+        else:
+            disasm_start = initial_pc
 
         # disassembly: we need this for now (not given from the decoder)
         self.disassembly = {}
         if disassembly:
             for i, code in enumerate(disassembly):
-                self.disassembly[i*4 + self.fake_pc] = code
+                self.disassembly[i*4 + disasm_start] = code
 
         # set up registers, instruction memory, data memory, PC, SPRs, MSR
         self.gpr = GPR(decoder2, regfile)
@@ -297,6 +318,8 @@ class ISACaller:
                           'SO': XER_bits['SO']
                           })
 
+        # update pc to requested start point
+        self.set_pc(initial_pc)
 
         # field-selectable versions of Condition Register TODO check bitranges?
         self.crl = []
@@ -309,14 +332,14 @@ class ISACaller:
         self.decoder = decoder2.dec
         self.dec2 = decoder2
 
-    def TRAP(self, trap_addr=0x700):
-        print ("TRAP: TODO")
-        #self.namespace['NIA'] = trap_addr
-        #self.SRR0 = self.namespace['CIA'] + 4
-        #self.SRR1 = self.namespace['MSR']
-        #self.namespace['MSR'][45] = 1
+    def TRAP(self, trap_addr=0x700, trap_bit=PI.TRAP):
+        print ("TRAP:", hex(trap_addr))
         # store CIA(+4?) in SRR0, set NIA to 0x700
         # store MSR in SRR1, set MSR to um errr something, have to check spec
+        self.spr['SRR0'] = self.pc.CIA
+        self.spr['SRR1'] = self.namespace['MSR']
+        self.trap_nia = SelectableInt(trap_addr, 64)
+        self.namespace['MSR'][63-trap_bit] = 1
 
     def memassign(self, ea, sz, val):
         self.mem.memassign(ea, sz, val)
@@ -344,13 +367,13 @@ class ISACaller:
         self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
 
     def handle_carry_(self, inputs, outputs, already_done):
-        inv_a = yield self.dec2.e.invert_a
+        inv_a = yield self.dec2.e.do.invert_a
         if inv_a:
             inputs[0] = ~inputs[0]
 
-        imm_ok = yield self.dec2.e.imm_data.ok
+        imm_ok = yield self.dec2.e.do.imm_data.ok
         if imm_ok:
-            imm = yield self.dec2.e.imm_data.data
+            imm = yield self.dec2.e.do.imm_data.data
             inputs.append(SelectableInt(imm, 64))
         assert len(outputs) >= 1
         print ("outputs", repr(outputs))
@@ -380,13 +403,13 @@ class ISACaller:
             self.spr['XER'][XER_bits['CA32']] = cy32
 
     def handle_overflow(self, inputs, outputs, div_overflow):
-        inv_a = yield self.dec2.e.invert_a
+        inv_a = yield self.dec2.e.do.invert_a
         if inv_a:
             inputs[0] = ~inputs[0]
 
-        imm_ok = yield self.dec2.e.imm_data.ok
+        imm_ok = yield self.dec2.e.do.imm_data.ok
         if imm_ok:
-            imm = yield self.dec2.e.imm_data.data
+            imm = yield self.dec2.e.do.imm_data.data
             inputs.append(SelectableInt(imm, 64))
         assert len(outputs) >= 1
         print ("handle_overflow", inputs, outputs, div_overflow)
@@ -445,7 +468,7 @@ class ISACaller:
         if ins is None:
             raise KeyError("no instruction at 0x%x" % pc)
         print("setup: 0x%x 0x%x %s" % (pc, ins & 0xffffffff, bin(ins)))
-        print ("NIA, CIA", self.pc.CIA.value, self.pc.NIA.value)
+        print ("CIA NIA", self.respect_pc, self.pc.CIA.value, self.pc.NIA.value)
 
         yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff)
         yield self.dec2.dec.bigendian.eq(0)  # little / big?
@@ -461,7 +484,7 @@ class ISACaller:
 
         if not self.respect_pc:
             self.fake_pc += 4
-        print ("NIA, CIA", self.pc.CIA.value, self.pc.NIA.value)
+        print ("execute one, CIA NIA", self.pc.CIA.value, self.pc.NIA.value)
 
     def get_assembly_name(self):
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
@@ -470,11 +493,11 @@ class ISACaller:
         asmop = insns.get(asmcode, None)
 
         # sigh reconstruct the assembly instruction name
-        ov_en = yield self.dec2.e.oe.oe
-        ov_ok = yield self.dec2.e.oe.ok
+        ov_en = yield self.dec2.e.do.oe.oe
+        ov_ok = yield self.dec2.e.do.oe.ok
         if ov_en & ov_ok:
             asmop += "."
-        lk = yield self.dec2.e.lk
+        lk = yield self.dec2.e.do.lk
         if lk:
             asmop += "l"
         int_op = yield self.dec2.dec.op.internal_op
@@ -485,7 +508,7 @@ class ISACaller:
             if AA:
                 asmop += "a"
         if int_op == InternalOp.OP_MFCR.value:
-            dec_insn = yield self.dec2.e.insn
+            dec_insn = yield self.dec2.e.do.insn
             if dec_insn & (1<<20) != 0: # sigh
                 asmop = 'mfocrf'
             else:
@@ -493,7 +516,7 @@ class ISACaller:
         # XXX TODO: for whatever weird reason this doesn't work
         # https://bugs.libre-soc.org/show_bug.cgi?id=390
         if int_op == InternalOp.OP_MTCRF.value:
-            dec_insn = yield self.dec2.e.insn
+            dec_insn = yield self.dec2.e.do.insn
             if dec_insn & (1<<20) != 0: # sigh
                 asmop = 'mtocrf'
             else:
@@ -501,18 +524,38 @@ class ISACaller:
         return asmop
 
     def call(self, name):
+        if self.halted:
+            print ("halted - not executing", name)
+            return
+
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
         # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
         asmop = yield from self.get_assembly_name()
         print  ("call", name, asmop)
+
+        # check halted condition
+        if name == 'attn':
+            self.halted = True
+            return
+
+        # check illegal instruction
+        illegal = False
         if name not in ['mtcrf', 'mtocrf']:
-            assert name == asmop, "name %s != %s" % (name, asmop)
+            illegal = name != asmop
+
+        if illegal:
+            print ("name %s != %s - calling ILLEGAL trap" % (name, asmop))
+            self.TRAP(0x700, PI.ILLEG)
+            self.namespace['NIA'] = self.trap_nia
+            self.pc.update(self.namespace)
+            return
 
         info = self.instrs[name]
         yield from self.prep_namespace(info.form, info.op_fields)
 
         # preserve order of register names
-        input_names = create_args(list(info.read_regs) + list(info.uninit_regs))
+        input_names = create_args(list(info.read_regs) +
+                                  list(info.uninit_regs))
         print(input_names)
 
         # main registers (RT, RA ...)
@@ -531,10 +574,20 @@ class ISACaller:
             else:
                 inputs.append(self.namespace[special])
 
+        # clear trap (trap) NIA
+        self.trap_nia = None
+
         print(inputs)
         results = info.func(self, *inputs)
         print(results)
 
+        # "inject" decorator takes namespace from function locals: we need to
+        # overwrite NIA being overwritten (sigh)
+        if self.trap_nia is not None:
+            self.namespace['NIA'] = self.trap_nia
+
+        print ("after func", self.namespace['CIA'], self.namespace['NIA'])
+
         # detect if CA/CA32 already in outputs (sra*, basically)
         already_done = 0
         if info.write_regs:
@@ -546,7 +599,7 @@ class ISACaller:
                     already_done |= 2
 
         print ("carry already done?", bin(already_done))
-        carry_en = yield self.dec2.e.output_carry
+        carry_en = yield self.dec2.e.do.output_carry
         if carry_en:
             yield from self.handle_carry_(inputs, results, already_done)
 
@@ -557,13 +610,13 @@ class ISACaller:
                 if name == 'overflow':
                     overflow = output
 
-        ov_en = yield self.dec2.e.oe.oe
-        ov_ok = yield self.dec2.e.oe.ok
+        ov_en = yield self.dec2.e.do.oe.oe
+        ov_ok = yield self.dec2.e.do.oe.ok
         print ("internal overflow", overflow)
         if ov_en & ov_ok:
             yield from self.handle_overflow(inputs, results, overflow)
 
-        rc_en = yield self.dec2.e.rc.data
+        rc_en = yield self.dec2.e.do.rc.data
         if rc_en:
             self.handle_comparison(results)
 
@@ -595,7 +648,8 @@ class ISACaller:
                         output = SelectableInt(output.value, 64)
                     self.gpr[regnum] = output
 
-        # update program counter
+        print ("end of call", self.namespace['CIA'], self.namespace['NIA'])
+        # UPDATE program counter
         self.pc.update(self.namespace)
 
 
@@ -623,6 +677,9 @@ def inject():
             saved_values = func_globals.copy()  # Shallow copy of dict.
             func_globals.update(context)
             result = func(*args, **kwargs)
+            print ("globals after", func_globals['CIA'], func_globals['NIA'])
+            print ("args[0]", args[0].namespace['CIA'],
+                              args[0].namespace['NIA'])
             args[0].namespace = func_globals
             #exec (func.__code__, func_globals)
 
index ea714632803ada2d90b3d000d1b98c6f1456711c..1be082671092f1dd3cfdd95f62dcc2da285b3d62 100644 (file)
@@ -140,13 +140,14 @@ class PowerOp:
         # TODO: this conversion process from a dict to an object
         # should really be done using e.g. namedtuple and then
         # call eq not _eq
-        if row['CR in'] == '1':
-            import pdb; pdb.set_trace()
+        if False: # debugging
+            if row['CR in'] == '1':
+                import pdb; pdb.set_trace()
+                print(row)
+            if row['CR out'] == '0':
+                import pdb; pdb.set_trace()
+                print(row)
             print(row)
-        if row['CR out'] == '0':
-            import pdb; pdb.set_trace()
-            print(row)
-        print(row)
         res = [self.function_unit.eq(Function[row['unit']]),
                self.form.eq(Form[row['form']]),
                self.internal_op.eq(InternalOp[row['internal op']]),
@@ -160,7 +161,8 @@ class PowerOp:
                self.rc_sel.eq(RC[row['rc']]),
                self.cry_in.eq(CryIn[row['cry in']]),
                ]
-        print (row.keys())
+        if False:
+            print (row.keys())
         asmcode = row['comment']
         if hasattr(self, "asmcode") and asmcode in asmidx:
             res.append(self.asmcode.eq(asmidx[asmcode]))
index e6ee584035d77db781e75a4ddd5b119496f573fb..20c40075617bae978113b3fd8259552c3800afc0 100644 (file)
@@ -2,7 +2,10 @@
 
 based on Anton Blanchard microwatt decode2.vhdl
 
+Note: OP_TRAP is used for exceptions and interrupts (micro-code style) by
+over-riding the internal opcode when an exception is needed.
 """
+
 from nmigen import Module, Elaboratable, Signal, Mux, Const, Cat, Repl, Record
 from nmigen.cli import rtlil
 
@@ -26,6 +29,10 @@ TT_FP = 1<<0
 TT_PRIV = 1<<1
 TT_TRAP = 1<<2
 TT_ADDR = 1<<3
+TT_ILLEG = 1<<4
+
+def decode_spr_num(spr):
+    return Cat(spr[5:10], spr[0:5])
 
 
 def instr_is_priv(m, op, insn):
@@ -37,6 +44,7 @@ def instr_is_priv(m, op, insn):
         with m.Case(InternalOp.OP_ATTN)  : comb += is_priv_insn.eq(1)
         with m.Case(InternalOp.OP_MFMSR) : comb += is_priv_insn.eq(1)
         with m.Case(InternalOp.OP_MTMSRD): comb += is_priv_insn.eq(1)
+        with m.Case(InternalOp.OP_MTMSR): comb += is_priv_insn.eq(1)
         with m.Case(InternalOp.OP_RFID)  : comb += is_priv_insn.eq(1)
         with m.Case(InternalOp.OP_TLBIE) : comb += is_priv_insn.eq(1)
     with m.If(op == OP_MFSPR | op == OP_MTSPR):
@@ -45,6 +53,22 @@ def instr_is_priv(m, op, insn):
     return is_priv_insn
 
 
+class SPRMap(Elaboratable):
+    """SPRMap: maps POWER9 SPR numbers to internal enum values
+    """
+    def __init__(self):
+        self.spr_i = Signal(10, reset_less=True)
+        self.spr_o = Signal(SPR, reset_less=True)
+
+    def elaborate(self, platform):
+        m = Module()
+        with m.Switch(self.spr_i):
+            for i, x in enumerate(SPR):
+                with m.Case(x.value):
+                    m.d.comb += self.spr_o.eq(i)
+        return m
+
+
 class DecodeA(Elaboratable):
     """DecodeA from instruction
 
@@ -58,12 +82,13 @@ class DecodeA(Elaboratable):
         self.insn_in = Signal(32, reset_less=True)
         self.reg_out = Data(5, name="reg_a")
         self.immz_out = Signal(reset_less=True)
-        self.spr_out = Data(10, "spr_a")
+        self.spr_out = Data(SPR, "spr_a")
         self.fast_out = Data(3, "fast_a")
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
+        m.submodules.sprmap = sprmap = SPRMap()
 
         # select Register A field
         ra = Signal(5, reset_less=True)
@@ -100,9 +125,34 @@ class DecodeA(Elaboratable):
 
         # MFSPR move from SPRs
         with m.If(op.internal_op == InternalOp.OP_MFSPR):
-            # XXX TODO: fast/slow SPR decoding and mapping
-            comb += self.spr_out.data.eq(self.dec.SPR) # SPR field, XFX
-            comb += self.spr_out.ok.eq(1)
+            spr = Signal(10, reset_less=True)
+            comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
+            with m.Switch(spr):
+                # fast SPRs
+                with m.Case(SPR.CTR.value):
+                    comb += self.fast_out.data.eq(FastRegs.CTR)
+                    comb += self.fast_out.ok.eq(1)
+                with m.Case(SPR.LR.value):
+                    comb += self.fast_out.data.eq(FastRegs.LR)
+                    comb += self.fast_out.ok.eq(1)
+                with m.Case(SPR.TAR.value):
+                    comb += self.fast_out.data.eq(FastRegs.TAR)
+                    comb += self.fast_out.ok.eq(1)
+                with m.Case(SPR.SRR0.value):
+                    comb += self.fast_out.data.eq(FastRegs.SRR0)
+                    comb += self.fast_out.ok.eq(1)
+                with m.Case(SPR.SRR1.value):
+                    comb += self.fast_out.data.eq(FastRegs.SRR1)
+                    comb += self.fast_out.ok.eq(1)
+                with m.Case(SPR.XER.value):
+                    pass # do nothing
+                # XXX TODO: map to internal SPR numbers
+                # XXX TODO: dec and tb not to go through mapping.
+                with m.Default():
+                    comb += sprmap.spr_i.eq(spr)
+                    comb += self.spr_out.data.eq(sprmap.spr_o)
+                    comb += self.spr_out.ok.eq(1)
+
 
         return m
 
@@ -226,12 +276,13 @@ class DecodeOut(Elaboratable):
         self.sel_in = Signal(OutSel, reset_less=True)
         self.insn_in = Signal(32, reset_less=True)
         self.reg_out = Data(5, "reg_o")
-        self.spr_out = Data(10, "spr_o")
+        self.spr_out = Data(SPR, "spr_o")
         self.fast_out = Data(3, "fast_o")
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
+        m.submodules.sprmap = sprmap = SPRMap()
         op = self.dec.op
 
         # select Register out field
@@ -243,26 +294,35 @@ class DecodeOut(Elaboratable):
                 comb += self.reg_out.data.eq(self.dec.RA)
                 comb += self.reg_out.ok.eq(1)
             with m.Case(OutSel.SPR):
-                comb += self.spr_out.data.eq(self.dec.SPR) # from XFX
-                comb += self.spr_out.ok.eq(1)
+                spr = Signal(10, reset_less=True)
+                comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
                 # TODO MTSPR 1st spr (fast)
                 with m.If(op.internal_op == InternalOp.OP_MTSPR):
-                    pass
-                    """
-                    sprn := decode_spr_num(f_in.insn);
-                    v.ispr1 := fast_spr_num(sprn);
-                    -- Make slow SPRs single issue
-                    if is_fast_spr(v.ispr1) = '0' then
-                        v.decode.sgl_pipe := '1';
-                        -- send MMU-related SPRs to loadstore1
-                        case sprn is
-                        when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
-                            v.decode.unit := LDST;
-                        when others =>
-                        end case;
-                    end if;
-                    """
-
+                    with m.Switch(spr):
+                        # fast SPRs
+                        with m.Case(SPR.CTR.value):
+                            comb += self.fast_out.data.eq(FastRegs.CTR)
+                            comb += self.fast_out.ok.eq(1)
+                        with m.Case(SPR.LR.value):
+                            comb += self.fast_out.data.eq(FastRegs.LR)
+                            comb += self.fast_out.ok.eq(1)
+                        with m.Case(SPR.TAR.value):
+                            comb += self.fast_out.data.eq(FastRegs.TAR)
+                            comb += self.fast_out.ok.eq(1)
+                        with m.Case(SPR.SRR0.value):
+                            comb += self.fast_out.data.eq(FastRegs.SRR0)
+                            comb += self.fast_out.ok.eq(1)
+                        with m.Case(SPR.SRR1.value):
+                            comb += self.fast_out.data.eq(FastRegs.SRR1)
+                            comb += self.fast_out.ok.eq(1)
+                        with m.Case(SPR.XER.value):
+                            pass # do nothing
+                        # XXX TODO: map to internal SPR numbers
+                        # XXX TODO: dec and tb not to go through mapping.
+                        with m.Default():
+                            comb += sprmap.spr_i.eq(spr)
+                            comb += self.spr_out.data.eq(sprmap.spr_o)
+                            comb += self.spr_out.ok.eq(1)
 
         # BC or BCREG: potential implicit register (CTR) NOTE: same in DecodeA
         op = self.dec.op
@@ -484,6 +544,7 @@ class PowerDecode2(Elaboratable):
 
         self.dec = dec
         self.e = Decode2ToExecute1Type()
+        self.valid = Signal() # sync signal
 
     def ports(self):
         return self.dec.ports() + self.e.ports()
@@ -491,7 +552,7 @@ class PowerDecode2(Elaboratable):
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
-        e, op = self.e, self.dec.op
+        e, op, do = self.e, self.dec.op, self.e.do
 
         # set up submodule decoders
         m.submodules.dec = self.dec
@@ -506,7 +567,7 @@ class PowerDecode2(Elaboratable):
         m.submodules.dec_cr_out = dec_cr_out = DecodeCROut(self.dec)
 
         # copy instruction through...
-        for i in [e.insn, dec_a.insn_in, dec_b.insn_in,
+        for i in [do.insn, dec_a.insn_in, dec_b.insn_in,
                   dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in, dec_rc.insn_in,
                   dec_oe.insn_in, dec_cr_in.insn_in, dec_cr_out.insn_in]:
             comb += i.eq(self.dec.opcode_in)
@@ -517,19 +578,17 @@ class PowerDecode2(Elaboratable):
         comb += dec_c.sel_in.eq(op.in3_sel)
         comb += dec_o.sel_in.eq(op.out_sel)
         comb += dec_o2.sel_in.eq(op.out_sel)
-        comb += dec_o2.lk.eq(e.lk)
+        comb += dec_o2.lk.eq(do.lk)
         comb += dec_rc.sel_in.eq(op.rc_sel)
         comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel
         comb += dec_cr_in.sel_in.eq(op.cr_in)
         comb += dec_cr_out.sel_in.eq(op.cr_out)
         comb += dec_cr_out.rc_in.eq(dec_rc.rc_out.data)
 
-
+        # set up instruction, pick fn unit
         comb += e.nia.eq(0)    # XXX TODO (or remove? not sure yet)
-        fu = op.function_unit
-        itype = Mux(fu == Function.NONE, InternalOp.OP_ILLEGAL, op.internal_op)
-        comb += e.insn_type.eq(itype)
-        comb += e.fn_unit.eq(fu)
+        comb += do.insn_type.eq(op.internal_op) # no op: defaults to OP_ILLEGAL
+        comb += do.fn_unit.eq(op.function_unit)
 
         # registers a, b, c and out and out2 (LD/ST EA)
         comb += e.read_reg1.eq(dec_a.reg_out)
@@ -537,12 +596,12 @@ class PowerDecode2(Elaboratable):
         comb += e.read_reg3.eq(dec_c.reg_out)
         comb += e.write_reg.eq(dec_o.reg_out)
         comb += e.write_ea.eq(dec_o2.reg_out)
-        comb += e.imm_data.eq(dec_b.imm_out) # immediate in RB (usually)
-        comb += e.zero_a.eq(dec_a.immz_out)  # RA==0 detected
+        comb += do.imm_data.eq(dec_b.imm_out) # immediate in RB (usually)
+        comb += do.zero_a.eq(dec_a.immz_out)  # RA==0 detected
 
         # rc and oe out
-        comb += e.rc.eq(dec_rc.rc_out)
-        comb += e.oe.eq(dec_oe.oe_out)
+        comb += do.rc.eq(dec_rc.rc_out)
+        comb += do.oe.eq(dec_oe.oe_out)
 
         # SPRs out
         comb += e.read_spr1.eq(dec_a.spr_out)
@@ -554,54 +613,96 @@ class PowerDecode2(Elaboratable):
         comb += e.write_fast1.eq(dec_o.fast_out)
         comb += e.write_fast2.eq(dec_o2.fast_out)
 
+        # condition registers (CR)
         comb += e.read_cr1.eq(dec_cr_in.cr_bitfield)
         comb += e.read_cr2.eq(dec_cr_in.cr_bitfield_b)
         comb += e.read_cr3.eq(dec_cr_in.cr_bitfield_o)
-        comb += e.read_cr_whole.eq(dec_cr_in.whole_reg)
-
         comb += e.write_cr.eq(dec_cr_out.cr_bitfield)
-        comb += e.write_cr_whole.eq(dec_cr_out.whole_reg)
+
+        comb += do.read_cr_whole.eq(dec_cr_in.whole_reg)
+        comb += do.write_cr_whole.eq(dec_cr_out.whole_reg)
+        comb += do.write_cr0.eq(dec_cr_out.cr_bitfield.ok)
 
         # decoded/selected instruction flags
-        comb += e.data_len.eq(op.ldst_len)
-        comb += e.invert_a.eq(op.inv_a)
-        comb += e.invert_out.eq(op.inv_out)
-        comb += e.input_carry.eq(op.cry_in)   # carry comes in
-        comb += e.output_carry.eq(op.cry_out) # carry goes out
-        comb += e.is_32bit.eq(op.is_32b)
-        comb += e.is_signed.eq(op.sgn)
+        comb += do.data_len.eq(op.ldst_len)
+        comb += do.invert_a.eq(op.inv_a)
+        comb += do.invert_out.eq(op.inv_out)
+        comb += do.input_carry.eq(op.cry_in)   # carry comes in
+        comb += do.output_carry.eq(op.cry_out) # carry goes out
+        comb += do.is_32bit.eq(op.is_32b)
+        comb += do.is_signed.eq(op.sgn)
         with m.If(op.lk):
-            comb += e.lk.eq(self.dec.LK) # XXX TODO: accessor
-
-        comb += e.byte_reverse.eq(op.br)
-        comb += e.sign_extend.eq(op.sgn_ext)
-        comb += e.update.eq(op.upd) # LD/ST "update" mode.
+            comb += do.lk.eq(self.dec.LK) # XXX TODO: accessor
 
+        comb += do.byte_reverse.eq(op.br)
+        comb += do.sign_extend.eq(op.sgn_ext)
+        comb += do.update.eq(op.upd) # LD/ST "update" mode.
 
         # These should be removed eventually
-        comb += e.input_cr.eq(op.cr_in)   # condition reg comes in
-        comb += e.output_cr.eq(op.cr_out) # condition reg goes in
+        comb += do.input_cr.eq(op.cr_in)   # condition reg comes in
+        comb += do.output_cr.eq(op.cr_out) # condition reg goes in
+
+        # sigh this is exactly the sort of thing for which the
+        # decoder is designed to not need.  MTSPR, MFSPR and others need
+        # access to the XER bits.  however setting e.oe is not appropriate
+        with m.If(op.internal_op == InternalOp.OP_MFSPR):
+            comb += e.xer_in.eq(1)
+        with m.If(op.internal_op == InternalOp.OP_MTSPR):
+            comb += e.xer_out.eq(1)
 
         # set the trapaddr to 0x700 for a td/tw/tdi/twi operation
         with m.If(op.internal_op == InternalOp.OP_TRAP):
-            comb += e.trapaddr.eq(0x70)    # addr=0x700 (strip first nibble)
+            comb += do.trapaddr.eq(0x70)    # addr=0x700 (strip first nibble)
+
+        # illegal instruction must redirect to trap. this is done by
+        # *overwriting* the decoded instruction and starting again.
+        # (note: the same goes for interrupts and for privileged operations,
+        # just with different trapaddr and traptype)
+        with m.If(op.internal_op == InternalOp.OP_ILLEGAL):
+            # illegal instruction trap
+            self.trap(m, TT_ILLEG, 0x700)
+
+        # trap: (note e.insn_type so this includes OP_ILLEGAL) set up fast regs
+        # Note: OP_SC could actually be modified to just be a trap
+        with m.If((do.insn_type == InternalOp.OP_TRAP) |
+                  (do.insn_type == InternalOp.OP_SC)):
+            # TRAP write fast1 = SRR0
+            comb += e.write_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
+            comb += e.write_fast1.ok.eq(1)
+            # TRAP write fast2 = SRR1
+            comb += e.write_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
+            comb += e.write_fast2.ok.eq(1)
+
+        # RFID: needs to read SRR0/1
+        with m.If(do.insn_type == InternalOp.OP_RFID):
+            # TRAP read fast1 = SRR0
+            comb += e.read_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
+            comb += e.read_fast1.ok.eq(1)
+            # TRAP read fast2 = SRR1
+            comb += e.read_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
+            comb += e.read_fast2.ok.eq(1)
 
         return m
 
-        # privileged instruction
+        # TODO: get msr, then can do privileged instruction
         with m.If(instr_is_priv(m, op.internal_op, e.insn) & msr[MSR_PR]):
-            # don't request registers RA/RT
-            comb += e.read_reg1.eq(0)
-            comb += e.read_reg2.eq(0)
-            comb += e.read_reg3.eq(0)
-            comb += e.write_reg.eq(0)
-            comb += e.write_ea.eq(0)
             # privileged instruction trap
-            comb += op.internal_op.eq(InternalOp.OP_TRAP)
-            comb += e.traptype.eq(TT_PRIV) # request privileged instruction
-            comb += e.trapaddr.eq(0x70)    # addr=0x700 (strip first nibble)
+            self.trap(m, TT_PRIV, 0x700)
         return m
 
+    def trap(self, m, traptype, trapaddr):
+        """trap: this basically "rewrites" the decoded instruction as a trap
+        """
+        comb = m.d.comb
+        e, op, do = self.e, self.dec.op, self.e.do
+        comb += e.eq(0) # reset eeeeeverything
+        # start again
+        comb += do.insn.eq(self.dec.opcode_in)
+        comb += do.insn_type.eq(InternalOp.OP_TRAP)
+        comb += do.fn_unit.eq(Function.TRAP)
+        comb += do.trapaddr.eq(trapaddr >> 4) # cut bottom 4 bits
+        comb += do.traptype.eq(traptype) # request type
+
     def regspecmap_read(self, regfile, regname):
         """regspecmap_read: provides PowerDecode2 with an encoding relationship
         to Function Unit port regfiles (read-enable, read regnum, write regnum)
index 931101ecb0aea1c5237ea4b34da5ae01164ecadd..fd928baf47e4c5bf8552914edfbd346054e66296 100644 (file)
@@ -55,6 +55,7 @@ class Function(Enum):
     TRAP = 1<<7
     MUL = 1<<8
     DIV = 1<<9
+    SPR = 1<<10
 
 
 @unique
@@ -107,7 +108,7 @@ _insns = [
     "lhbrx", "lhz", "lhzu", "lhzux", "lhzx", "lwa", "lwarx", "lwaux",
     "lwax", "lwbrx", "lwz", "lwzu", "lwzux", "lwzx", "mcrf", "mcrxr",
     "mcrxrx", "mfcr/mfocrf", "mfmsr", "mfspr", "modsd", "modsw", "modud",
-    "moduw", "mtcrf/mtocrf", "mtmsrd", "mtspr", "mulhd", "mulhdu",
+    "moduw", "mtcrf/mtocrf", "mtmsr", "mtmsrd", "mtspr", "mulhd", "mulhdu",
     "mulhw", "mulhwu", "mulld", "mulldo", "mulli", "mullw", "mullwo",
     "nand", "neg", "nego", "nop", "nor", "or", "orc", "ori", "oris",
     "popcntb", "popcntd", "popcntw", "prtyd", "prtyw", "rfid", "rldcl",
@@ -202,6 +203,7 @@ class InternalOp(Enum):
     OP_MFMSR = 71
     OP_MTMSRD = 72
     OP_SC = 73
+    OP_MTMSR = 74
 
 
 @unique
@@ -292,12 +294,13 @@ class CROutSel(Enum):
 # http://bugs.libre-riscv.org/show_bug.cgi?id=261
 
 spr_csv = get_csv("sprs.csv")
-spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length')
+spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx')
 spr_dict = {}
 spr_byname = {}
 for row in spr_csv:
     info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'],
-                    priv_mfspr=row['priv_mfspr'], length=int(row['len']))
+                    priv_mfspr=row['priv_mfspr'], length=int(row['len']),
+                    idx=int(row['Idx']))
     spr_dict[int(row['Idx'])] = info
     spr_byname[row['SPR']] = info
 fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv]
index a88375d0dcc1d02ffec2df6b855a4eedc9b9fa94..7413cdd68edc4563e02eed48d1f31b8a7fc7d0fd 100644 (file)
@@ -20,16 +20,18 @@ some point that 8-bit mask from the instruction could actually be passed
 directly through to full_cr (TODO).
 
 For the INT and CR numbering, these are expressed in binary in the
-instruction (note however that XFX in MTCR is unary-masked!)
+instruction and need to be converted to unary (1<<read_reg1.data).
+Note however that XFX in MTCR is unary-masked!
 
-XER is implicitly-encoded based on whether the operation has carry or
-overflow.
+XER regs are implicitly-encoded (hard-coded) based on whether the
+operation has carry or overflow.
 
 FAST regfile is, again, implicitly encoded, back in PowerDecode2, based
-on the type of operation (see DecodeB for an example).
+on the type of operation (see DecodeB for an example, where fast_out
+is set, then carried into read_fast2 in PowerDecode2).
 
 The SPR regfile on the other hand is *binary*-encoded, and, furthermore,
-has to be "remapped".
+has to be "remapped" to internal SPR Enum indices (see SPRMap in PowerDecode2)
 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
 """
 from nmigen import Const
@@ -41,6 +43,8 @@ def regspec_decode_read(e, regfile, name):
     """regspec_decode_read
     """
 
+    # INT regfile
+
     if regfile == 'INT':
         # Int register numbering is *unary* encoded
         if name == 'ra': # RA
@@ -50,11 +54,13 @@ def regspec_decode_read(e, regfile, name):
         if name == 'rc': # RS
             return e.read_reg3.ok, 1<<e.read_reg3.data
 
+    # CR regfile
+
     if regfile == 'CR':
         # CRRegs register numbering is *unary* encoded
         # *sigh*.  numbering inverted on part-CRs.  because POWER.
         if name == 'full_cr': # full CR
-            return e.read_cr_whole, 0b11111111
+            return e.do.read_cr_whole, 0b11111111
         if name == 'cr_a': # CR A
             return e.read_cr1.ok, 1<<(7-e.read_cr1.data)
         if name == 'cr_b': # CR B
@@ -62,17 +68,21 @@ def regspec_decode_read(e, regfile, name):
         if name == 'cr_c': # CR C
             return e.read_cr3.ok, 1<<(7-e.read_cr3.data)
 
+    # XER regfile
+
     if regfile == 'XER':
         # XERRegs register numbering is *unary* encoded
         SO = 1<<XERRegs.SO
         CA = 1<<XERRegs.CA
         OV = 1<<XERRegs.OV
         if name == 'xer_so':
-            return e.oe.oe[0] & e.oe.oe_ok, SO
+            return (e.do.oe.oe[0] & e.do.oe.oe_ok) | e.xer_in, SO
         if name == 'xer_ov':
-            return e.oe.oe[0] & e.oe.oe_ok, OV
+            return (e.do.oe.oe[0] & e.do.oe.oe_ok) | e.xer_in, OV
         if name == 'xer_ca':
-            return (e.input_carry == CryIn.CA.value), CA
+            return (e.do.input_carry == CryIn.CA.value) | e.xer_in, CA
+
+    # FAST regfile
 
     if regfile == 'FAST':
         # FAST register numbering is *unary* encoded
@@ -88,13 +98,18 @@ def regspec_decode_read(e, regfile, name):
         if name == 'msr':
             return Const(1), MSR # TODO: detect read-conditions
         # TODO: remap the SPR numbers to FAST regs
-        if name == 'spr1':
+        if name == 'fast1':
             return e.read_fast1.ok, 1<<e.read_fast1.data
-        if name == 'spr2':
+        if name == 'fast2':
             return e.read_fast2.ok, 1<<e.read_fast2.data
 
+    # SPR regfile
+
     if regfile == 'SPR':
-        assert False, "regfile TODO %s %s" % (regfile, name)
+        # SPR register numbering is *binary* encoded
+        if name == 'spr1':
+            return e.read_spr1.ok, e.read_spr1.data
+
     assert False, "regspec not found %s %s" % (regfile, name)
 
 
@@ -102,6 +117,8 @@ def regspec_decode_write(e, regfile, name):
     """regspec_decode_write
     """
 
+    # INT regfile
+
     if regfile == 'INT':
         # Int register numbering is *unary* encoded
         if name == 'o': # RT
@@ -109,25 +126,31 @@ def regspec_decode_write(e, regfile, name):
         if name == 'o1': # RA (update mode: LD/ST EA)
             return e.write_ea, 1<<e.write_ea.data
 
+    # CR regfile
+
     if regfile == 'CR':
         # CRRegs register numbering is *unary* encoded
         # *sigh*.  numbering inverted on part-CRs.  because POWER.
         if name == 'full_cr': # full CR
-            return e.write_cr_whole, 0b11111111
+            return e.do.write_cr_whole, 0b11111111
         if name == 'cr_a': # CR A
             return e.write_cr, 1<<(7-e.write_cr.data)
 
+    # XER regfile
+
     if regfile == 'XER':
         # XERRegs register numbering is *unary* encoded
         SO = 1<<XERRegs.SO
         CA = 1<<XERRegs.CA
         OV = 1<<XERRegs.OV
         if name == 'xer_so':
-            return None, SO # hmmm
+            return e.xer_out, SO # hmmm
         if name == 'xer_ov':
-            return None, OV # hmmm
+            return e.xer_out, OV # hmmm
         if name == 'xer_ca':
-            return None, CA # hmmm
+            return e.xer_out, CA # hmmm
+
+    # FAST regfile
 
     if regfile == 'FAST':
         # FAST register numbering is *unary* encoded
@@ -143,12 +166,17 @@ def regspec_decode_write(e, regfile, name):
         if name == 'msr':
             return None, MSR # hmmm
         # TODO: remap the SPR numbers to FAST regs
-        if name == 'spr1':
+        if name == 'fast1':
             return e.write_fast1, 1<<e.write_fast1.data
-        if name == 'spr2':
+        if name == 'fast2':
             return e.write_fast2, 1<<e.write_fast2.data
 
+    # SPR regfile
+
     if regfile == 'SPR':
-        assert False, "regfile TODO %s %s" % (regfile, name)
+        # SPR register numbering is *binary* encoded
+        if name == 'spr1': # SPR1
+            return e.write_spr, e.write_spr.data
+
     assert False, "regspec not found %s %s" % (regfile, name)
 
index 0c5e560c3f54462b406aef7864dacb5537d2c7a7..359afc97e48fe42316690449f7e1991aaf4f5231 100644 (file)
@@ -178,7 +178,12 @@ class SelectableInt:
     def __sub__(self, b):
         return self._op(sub, b)
     def __mul__(self, b):
-        return self._op(mul, b)
+        # different case: mul result needs to fit the total bitsize 
+        if isinstance(b, int):
+            b = SelectableInt(b, self.bits)
+        print ("SelectableInt mul", hex(self.value), hex(b.value),
+                                    self.bits, b.bits)
+        return SelectableInt(self.value * b.value, self.bits + b.bits)
     def __floordiv__(self, b):
         return self._op(floordiv, b)
     def __truediv__(self, b):
@@ -192,7 +197,10 @@ class SelectableInt:
     def __xor__(self, b):
         return self._op(xor, b)
     def __abs__(self):
-        return SelectableInt(0, self.bits) - self
+        print ("abs", self.value & (1<<(self.bits-1)))
+        if self.value & (1<<(self.bits-1)) != 0:
+            return -self
+        return self
 
     def __rsub__(self, b):
         if isinstance(b, int):
index 5dcf958ce320e05051b92fd6a0d9baf55d1d097c..c1ca6fa315ed0f603ce9ded499c0e76f1eb25860 100644 (file)
@@ -14,6 +14,7 @@ from nmigen.hdl.rec import Record, Layout
 from nmigen.cli import main
 from nmigen.cli import verilog, rtlil
 from nmigen.compat.sim import run_simulation
+from nmigen.back.pysim import Simulator, Settle
 
 from soc.decoder.power_enums import InternalOp, Function, CryIn
 
@@ -385,56 +386,32 @@ class BranchALU(Elaboratable):
         return list(self)
 
 def run_op(dut, a, b, op, inv_a=0):
-    from nmigen.back.pysim import Settle
     yield dut.a.eq(a)
     yield dut.b.eq(b)
     yield dut.op.insn_type.eq(op)
     yield dut.op.invert_a.eq(inv_a)
     yield dut.n.ready_i.eq(0)
     yield dut.p.valid_i.eq(1)
-
-    # if valid_o rose on the very first cycle, it is a
-    # zero-delay ALU
-    yield Settle()
-    vld = yield dut.n.valid_o
-    if vld:
-        # special case for zero-delay ALU
-        # we must raise ready_i first, since the combinatorial ALU doesn't
-        # have any storage, and doesn't dare to assert ready_o back to us
-        # until we accepted the output data
-        yield dut.n.ready_i.eq(1)
-        result = yield dut.o
-        yield
-        yield dut.p.valid_i.eq(0)
-        yield dut.n.ready_i.eq(0)
-        yield
-        return result
-
+    yield dut.n.ready_i.eq(1)
     yield
 
     # wait for the ALU to accept our input data
-    while True:
-        rdy = yield dut.p.ready_o
-        if rdy:
-            break
+    while not (yield dut.p.ready_o):
         yield
 
     yield dut.p.valid_i.eq(0)
+    yield dut.a.eq(0)
+    yield dut.b.eq(0)
+    yield dut.op.insn_type.eq(0)
+    yield dut.op.invert_a.eq(0)
 
     # wait for the ALU to present the output data
-    while True:
-        yield Settle()
-        vld = yield dut.n.valid_o
-        if vld:
-            break
+    while not (yield dut.n.valid_o):
         yield
 
     # latch the result and lower read_i
-    yield dut.n.ready_i.eq(1)
     result = yield dut.o
-    yield
     yield dut.n.ready_i.eq(0)
-    yield
 
     return result
 
@@ -472,8 +449,111 @@ def test_alu():
         f.write(vl)
 
 
+def test_alu_parallel():
+    # Compare with the sequential test implementation, above.
+    m = Module()
+    m.submodules.alu = dut = ALU(width=16)
+    sim = Simulator(m)
+    sim.add_clock(1e-6)
+
+    def send(a, b, op, inv_a=0):
+        # present input data and assert valid_i
+        yield dut.a.eq(a)
+        yield dut.b.eq(b)
+        yield dut.op.insn_type.eq(op)
+        yield dut.op.invert_a.eq(inv_a)
+        yield dut.p.valid_i.eq(1)
+        yield
+        # wait for ready_o to be asserted
+        while not (yield dut.p.ready_o):
+            yield
+        # clear input data and negate valid_i
+        # if send is called again immediately afterwards, there will be no
+        # visible transition (they will not be negated, after all)
+        yield dut.p.valid_i.eq(0)
+        yield dut.a.eq(0)
+        yield dut.b.eq(0)
+        yield dut.op.insn_type.eq(0)
+        yield dut.op.invert_a.eq(0)
+
+    def receive():
+        # signal readiness to receive data
+        yield dut.n.ready_i.eq(1)
+        yield
+        # wait for valid_o to be asserted
+        while not (yield dut.n.valid_o):
+            yield
+        # read result
+        result = yield dut.o
+        # negate ready_i
+        # if receive is called again immediately afterwards, there will be no
+        # visible transition (it will not be negated, after all)
+        yield dut.n.ready_i.eq(0)
+        return result
+
+    def producer():
+        # send a few test cases, interspersed with wait states
+        # note that, for this test, we do not wait for the result to be ready,
+        # before presenting the next input
+        # 5 + 3
+        yield from send(5, 3, InternalOp.OP_ADD)
+        yield
+        yield
+        # 2 * 3
+        yield from send(2, 3, InternalOp.OP_MUL_L64)
+        # (-5) + 3
+        yield from send(5, 3, InternalOp.OP_ADD, inv_a=1)
+        yield
+        # 5 - 3
+        # note that this is a zero-delay operation
+        yield from send(5, 3, InternalOp.OP_NOP)
+        yield
+        yield
+        # 13 >> 2
+        yield from send(13, 2, InternalOp.OP_SHR)
+
+    def consumer():
+        # receive and check results, interspersed with wait states
+        # the consumer is not in step with the producer, but the
+        # order of the results are preserved
+        yield
+        # 5 + 3 = 8
+        result = yield from receive()
+        assert (result == 8)
+        # 2 * 3 = 6
+        result = yield from receive()
+        assert (result == 6)
+        yield
+        yield
+        # (-5) + 3 = -2
+        result = yield from receive()
+        assert (result == 65533)  # unsigned equivalent to -2
+        # 5 - 3 = 2
+        # note that this is a zero-delay operation
+        # this, and the previous result, will be received back-to-back
+        # (check the output waveform to see this)
+        result = yield from receive()
+        assert (result == 2)
+        yield
+        yield
+        # 13 >> 2 = 3
+        result = yield from receive()
+        assert (result == 3)
+
+    sim.add_sync_process(producer)
+    sim.add_sync_process(consumer)
+    sim_writer = sim.write_vcd(
+        "test_alu_parallel.vcd",
+        "test_alu_parallel.gtkw",
+        traces=dut.ports()
+    )
+    with sim_writer:
+        sim.run()
+
+
 if __name__ == "__main__":
     test_alu()
+    test_alu_parallel()
 
     # alu = BranchALU(width=16)
     # vl = rtlil.convert(alu, ports=alu.ports())
diff --git a/src/soc/experiment/formal/.gitignore b/src/soc/experiment/formal/.gitignore
new file mode 100644 (file)
index 0000000..a51e080
--- /dev/null
@@ -0,0 +1 @@
+proof*
index 472eaaf4c988635cd532e1de00a1d6e3cac6c516..70e4a9c273a5c1718a177b470d525257b00c4d29 100644 (file)
@@ -14,64 +14,50 @@ class CompALUOpSubset(Record):
         layout = (('insn_type', InternalOp),
                   ('fn_unit', Function),
                   ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
-                  ('lk', 1),
                   ('rc', Layout((("rc", 1), ("rc_ok", 1)))), # Data
                   ('oe', Layout((("oe", 1), ("oe_ok", 1)))), # Data
                   ('invert_a', 1),
                   ('zero_a', 1),
                   ('invert_out', 1),
-                  ('write_cr', Layout((("data", 3), ("ok", 1)))), # Data
+                  ('write_cr0', 1),
                   ('input_carry', CryIn),
                   ('output_carry', 1),
-                  ('input_cr', 1),
-                  ('output_cr', 1),
                   ('is_32bit', 1),
                   ('is_signed', 1),
                   ('data_len', 4), # actually used by ALU, in OP_EXTS
                   ('insn', 32),
-                  ('byte_reverse', 1),
-                  ('sign_extend', 1))
+                  )
 
         Record.__init__(self, Layout(layout), name=name)
 
         # grrr.  Record does not have kwargs
         self.insn_type.reset_less = True
         self.fn_unit.reset_less = True
-        self.lk.reset_less = True
         self.zero_a.reset_less = True
         self.invert_a.reset_less = True
         self.invert_out.reset_less = True
         self.input_carry.reset_less = True
         self.output_carry.reset_less = True
-        self.input_cr.reset_less = True
-        self.output_cr.reset_less = True
         self.is_32bit.reset_less = True
         self.is_signed.reset_less = True
         self.data_len.reset_less = True
-        self.byte_reverse.reset_less = True
-        self.sign_extend.reset_less = True
 
     def eq_from_execute1(self, other):
         """ use this to copy in from Decode2Execute1Type
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
     def ports(self):
         return [self.insn_type,
-                self.lk,
                 self.invert_a,
                 self.invert_out,
                 self.input_carry,
                 self.output_carry,
-                self.input_cr,
-                self.output_cr,
                 self.is_32bit,
                 self.is_signed,
                 self.data_len,
-                self.byte_reverse,
-                self.sign_extend,
         ]
index 83377e9f4db4bc3329cd53848fa7e216ff720ada..c5d250ddf314297d647dc34a2de3f219de522eed 100644 (file)
@@ -224,7 +224,7 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.ALU.value)
                     yield from set_alu_inputs(alu, pdecode2, sim)
                     yield
@@ -247,7 +247,7 @@ class TestRunner(FHDLTestCase):
 
     def check_alu_outputs(self, alu, dec2, sim, code):
 
-        rc = yield dec2.e.rc.data
+        rc = yield dec2.e.do.rc.data
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
@@ -255,8 +255,8 @@ class TestRunner(FHDLTestCase):
         if rc:
             self.assertEqual(cridx, 0, code)
 
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
         if not oe or not oe_ok:
             # if OE not enabled, XER SO and OV must correspondingly be false
             so_ok = yield alu.n.data_o.xer_so.ok
index c7df72ca8bcab3eb0fe50f5a8371e88094e12f35..b3679e38a3ddb28075684e3743c7528da504f9d3 100644 (file)
@@ -37,7 +37,7 @@ class CompBROpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
index 7a94c1d3e594bdf5b016cd4af5efce8ee951c7a2..394c43469d62d72c70d99c8185381e6bc9f35deb 100644 (file)
@@ -57,11 +57,11 @@ class Driver(Elaboratable):
         cr_arr = Array([cr[(7-i)*4:(7-i)*4+4] for i in range(8)])
         cr_bit_arr = Array([cr[31-i] for i in range(32)])
 
-        cia, cr_in, spr1, spr2 = dut.i.cia, dut.i.cr, dut.i.spr1, dut.i.spr2
-        ctr = spr1
+        cia, cr_in, fast1, fast2 = dut.i.cia, dut.i.cr, dut.i.fast1, dut.i.fast2
+        ctr = fast1
         lr_o, nia_o = dut.o.lr, dut.o.nia
 
-        comb += [spr2.eq(AnyConst(64)),
+        comb += [fast2.eq(AnyConst(64)),
                  ctr.eq(AnyConst(64)),
                  cia.eq(AnyConst(64))]
 
@@ -157,7 +157,7 @@ class Driver(Elaboratable):
 
                 with m.If(nia_o.ok):
                     # make sure we branch to the spr input
-                    comb += Assert(nia_o.data == spr1)
+                    comb += Assert(nia_o.data == fast1)
 
                     # make sure branch+link works
                     comb += Assert(lr_o.ok == rec.lk)
index 27076a56e763e9ec6ad2f58bab941d7aeda79e5a..9d7c2a543d8eeb85480679bef826fd4f3c1f3397 100644 (file)
@@ -57,8 +57,8 @@ class BranchMainStage(PipeModBase):
         comb = m.d.comb
         op = self.i.ctx.op
         lk = op.lk # see PowerDecode2 as to why this is done
-        cr, cia, ctr, spr1 = self.i.cr, self.i.cia, self.i.ctr, self.i.spr1
-        spr2 = self.i.spr2
+        cr, cia, ctr, fast1 = self.i.cr, self.i.cia, self.i.ctr, self.i.fast1
+        fast2 = self.i.fast2
         nia_o, lr_o, ctr_o = self.o.nia, self.o.lr, self.o.ctr
 
         # obtain relevant instruction field AA, "Absolute Address" mode
@@ -135,9 +135,9 @@ class BranchMainStage(PipeModBase):
             with m.Case(InternalOp.OP_BCREG):
                 xo = self.fields.FormXL.XO[0:-1]
                 with m.If(xo[9] & ~xo[5]):
-                    comb += br_imm_addr.eq(Cat(Const(0, 2), spr1[2:]))
+                    comb += br_imm_addr.eq(Cat(Const(0, 2), fast1[2:]))
                 with m.Else():
-                    comb += br_imm_addr.eq(Cat(Const(0, 2), spr2[2:]))
+                    comb += br_imm_addr.eq(Cat(Const(0, 2), fast2[2:]))
                 comb += br_taken.eq(bc_taken)
                 comb += ctr_o.ok.eq(ctr_write)
 
index 1ebfc05bfbdab5ed85b0a6a91b455562fb8b9d57..fb01775d65eb7c0ce56e162b42800e72f8eaeb17 100644 (file)
@@ -31,29 +31,29 @@ class BranchInputData(IntegerData):
     # Note: for OP_BCREG, SPR1 will either be CTR, LR, or TAR
     # this involves the *decode* unit selecting the register, based
     # on detecting the operand being bcctr, bclr or bctar
-    regspec = [('FAST', 'spr1', '0:63'), # see table above, SPR1
-               ('FAST', 'spr2', '0:63'), # see table above, SPR2
+    regspec = [('FAST', 'fast1', '0:63'), # see table above, SPR1
+               ('FAST', 'fast2', '0:63'), # see table above, SPR2
                ('CR', 'cr_a', '0:3'),    # Condition Register(s) CR0-7
                ('FAST', 'cia', '0:63')]  # Current Instruction Address
     def __init__(self, pspec):
         super().__init__(pspec, False)
 
         # convenience variables.  not all of these are used at once
-        self.ctr = self.spr1
-        self.lr = self.tar = self.spr2
+        self.ctr = self.fast1
+        self.lr = self.tar = self.fast2
         self.cr = self.cr_a
 
 
 class BranchOutputData(IntegerData):
-    regspec = [('FAST', 'spr1', '0:63'),
-               ('FAST', 'spr2', '0:63'),
+    regspec = [('FAST', 'fast1', '0:63'),
+               ('FAST', 'fast2', '0:63'),
                ('FAST', 'nia', '0:63')]
     def __init__(self, pspec):
         super().__init__(pspec, True)
 
         # convenience variables.
-        self.ctr = self.spr1
-        self.lr = self.tar = self.spr2
+        self.ctr = self.fast1
+        self.lr = self.tar = self.fast2
 
 
 class BranchPipeSpec(CommonPipeSpec):
index 2c1e5b0d19019a0546f9988d77cfa8c1054b47b1..f9582323faf3bc8658998f278632580e792123a3 100644 (file)
@@ -191,7 +191,7 @@ class TestRunner(FHDLTestCase):
                     # then additional op-decoding is required, accordingly
                     yield Settle()
                     yield from self.set_inputs(branch, pdecode2, simulator)
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.BRANCH.value, code)
                     yield
                     yield
@@ -220,7 +220,7 @@ class TestRunner(FHDLTestCase):
         # TODO: check write_fast1 as well (should contain CTR)
 
         # TODO: this should be checking write_fast2
-        lk = yield dec2.e.lk
+        lk = yield dec2.e.do.lk
         branch_lk = yield branch.n.data_o.lr.ok
         self.assertEqual(lk, branch_lk, code)
         if lk:
index 0232aa66ead191ea89788ea28579aef9fe564ac6..96de016b495486cfe0607cc970c082e4cc063af1 100644 (file)
@@ -67,7 +67,7 @@ class CommonOutputStage(PipeModBase):
         comb += self.o.o.ok.eq(self.i.o.ok)
         # CR0 to be set
         comb += self.o.cr0.data.eq(cr0)
-        comb += self.o.cr0.ok.eq(op.write_cr.ok)
+        comb += self.o.cr0.ok.eq(op.write_cr0)
         # context
         comb += self.o.ctx.eq(self.i.ctx)
 
index 6de63d059dff17e07058c0493547af6897212448..55210173ea0e335603eae0c1d9208c7d081bc67c 100644 (file)
@@ -16,6 +16,7 @@ Two types exist:
   - CR: not so many needed (perhaps)
   - Branch: one or two of these (depending on speculation run-ahead)
   - Trap: yeah really only one of these
+  - SPR: again, only one.
   - ShiftRot (perhaps not too many of these)
 
 * Multi-cycle (and FSM) Function Units.  these are FUs that can only
@@ -64,12 +65,18 @@ from soc.fu.branch.pipe_data import BranchPipeSpec
 from soc.fu.shift_rot.pipeline import ShiftRotBasePipe
 from soc.fu.shift_rot.pipe_data import ShiftRotPipeSpec
 
+from soc.fu.spr.pipeline import SPRBasePipe
+from soc.fu.spr.pipe_data import SPRPipeSpec
+
 from soc.fu.trap.pipeline import TrapBasePipe
 from soc.fu.trap.pipe_data import TrapPipeSpec
 
 from soc.fu.div.pipeline import DIVBasePipe
 from soc.fu.div.pipe_data import DIVPipeSpec
 
+from soc.fu.mul.pipeline import MulBasePipe
+from soc.fu.mul.pipe_data import MulPipeSpec
+
 from soc.fu.ldst.pipe_data import LDSTPipeSpec
 from soc.experiment.compldst_multi import LDSTCompUnit # special-case
 
@@ -145,11 +152,21 @@ class DIVFunctionUnit(FunctionUnitBaseSingle):
     def __init__(self, idx):
         super().__init__(DIVPipeSpec, DIVBasePipe, idx)
 
+class MulFunctionUnit(FunctionUnitBaseSingle):
+    fnunit = Function.MUL
+    def __init__(self, idx):
+        super().__init__(MulPipeSpec, MulBasePipe, idx)
+
 class TrapFunctionUnit(FunctionUnitBaseSingle):
     fnunit = Function.TRAP
     def __init__(self, idx):
         super().__init__(TrapPipeSpec, TrapBasePipe, idx)
 
+class SPRFunctionUnit(FunctionUnitBaseSingle):
+    fnunit = Function.SPR
+    def __init__(self, idx):
+        super().__init__(SPRPipeSpec, SPRBasePipe, idx)
+
 # special-case
 class LDSTFunctionUnit(LDSTCompUnit):
     fnunit = Function.LDST
@@ -182,13 +199,18 @@ class AllFunctionUnits(Elaboratable):
         addrwid = pspec.addr_wid
         units = pspec.units
         if not isinstance(units, dict):
-            units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
+            units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1,
+                     'spr': 1,
+                     'logical': 1,
+                     'mul': 1,
                      'div': 1, 'shiftrot': 1}
         alus = {'alu': ALUFunctionUnit,
                  'cr': CRFunctionUnit,
                  'branch': BranchFunctionUnit,
                  'trap': TrapFunctionUnit,
+                 'spr': SPRFunctionUnit,
                  'div': DIVFunctionUnit,
+                 'mul': MulFunctionUnit,
                  'logical': LogicalFunctionUnit,
                  'shiftrot': ShiftRotFunctionUnit,
                 }
@@ -221,6 +243,8 @@ def tst_single_fus_il():
                         ('cr', CRFunctionUnit),
                         ('branch', BranchFunctionUnit),
                         ('trap', TrapFunctionUnit),
+                        ('spr', SPRFunctionUnit),
+                        ('mul', MulFunctionUnit),
                         ('logical', LogicalFunctionUnit),
                         ('shiftrot', ShiftRotFunctionUnit)):
         fu = kls(0)
index 9ad8a46c4c0a98ebdb0d9fec8a7f5856a4c8ed76..d58dc61ee6c9bc815a3e81272989f3c6e68456bf 100644 (file)
@@ -20,12 +20,12 @@ class ALUTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to ALUFunctionUnit output regspec
         """
 
-        rc = yield dec2.e.rc.data
-        op = yield dec2.e.insn_type
+        rc = yield dec2.e.do.rc.data
+        op = yield dec2.e.do.insn_type
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
@@ -39,9 +39,9 @@ class ALUTestRunner(TestRunner):
 
         yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
-        yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ov(sim_o, sim, alu, dec2)
         yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
-        yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_so(sim_o, sim, alu, dec2)
 
         ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))
         ALUHelpers.check_xer_ov(self, res, sim_o, code)
index dbcd0f656df8b026b2927e9db8ea4c42394d44a5..31e664c5a5bb13b001ea1150287853ebd8957c70 100644 (file)
@@ -25,7 +25,7 @@ class BranchTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to BranchFunctionUnit output regspec
         """
 
@@ -41,17 +41,17 @@ class BranchTestRunner(TestRunner):
             self.assertEqual(branch_addr, sim.pc.CIA.value, code)
 
         # Link SPR
-        lk = yield dec2.e.lk
-        branch_lk = 'spr2' in res
+        lk = yield dec2.e.do.lk
+        branch_lk = 'fast2' in res
         self.assertEqual(lk, branch_lk, code)
         if lk:
-            branch_lr = res['spr2']
+            branch_lr = res['fast2']
             self.assertEqual(sim.spr['LR'], branch_lr, code)
 
         # CTR SPR
-        ctr_ok = 'spr1' in res
+        ctr_ok = 'fast1' in res
         if ctr_ok:
-            ctr = res['spr1']
+            ctr = res['fast1']
             self.assertEqual(sim.spr['CTR'], ctr, code)
 
 
index 1d319ba6e9538bc0f7fcf45f74503867eceb4438..8b21029fe2e4bab80058dbaeb195ec2b17fbcf1f 100644 (file)
@@ -9,7 +9,7 @@ from soc.decoder.power_enums import Function
 from soc.decoder.isa.all import ISA
 
 from soc.experiment.compalu_multi import find_ok # hack
-
+from soc.config.test.test_loadstore import TestMemPspec
 
 def set_cu_input(cu, idx, data):
     rdop = cu.get_in_name(idx)
@@ -61,6 +61,7 @@ def get_cu_output(cu, idx, code):
 
 
 def set_cu_inputs(cu, inp):
+    print ("set_cu_inputs", inp)
     for idx, data in inp.items():
         yield from set_cu_input(cu, idx, data)
 
@@ -168,15 +169,17 @@ class TestRunner(FHDLTestCase):
 
         if self.funit == Function.LDST:
             from soc.experiment.l0_cache import TstL0CacheBuffer
-            m.submodules.l0 = l0 = TstL0CacheBuffer(n_units=1, regwid=64,
-                                                    addrwid=3,
-                                                    ifacetype='test_bare_wb')
+            pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
+                                 addr_wid=48,
+                                 mask_wid=8,
+                                 reg_wid=64)
+            m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
             pi = l0.l0.dports[0]
-            m.submodules.cu = cu = self.fukls(pi, awid=3)
+            m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
             m.d.comb += cu.ad.go.eq(cu.ad.rel) # link addr-go direct to rel
             m.d.comb += cu.st.go.eq(cu.st.rel) # link store-go direct to rel
         else:
-            m.submodules.cu = cu = self.fukls()
+            m.submodules.cu = cu = self.fukls(0)
 
         comb += pdecode2.dec.raw_opcode_in.eq(instruction)
         sim = Simulator(m)
@@ -218,12 +221,13 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     fuval = self.funit.value
                     self.assertEqual(fn_unit & fuval, fuval)
 
                     # set operand and get inputs
                     yield from set_operand(cu, pdecode2, sim)
+                    yield Settle()
                     iname = yield from self.iodef.get_cu_inputs(pdecode2, sim)
                     inp = get_inp_indexed(cu, iname)
 
@@ -282,7 +286,7 @@ class TestRunner(FHDLTestCase):
                         yield from dump_sim_memory(self, l0, sim, code)
 
                     yield from self.iodef.check_cu_outputs(res, pdecode2,
-                                                            sim, code)
+                                                            sim, cu.alu, code)
 
                     # sigh.  hard-coded.  test memory
                     if self.funit == Function.LDST:
index 1312fb174dd7335be4c89b9ad6605f621fbef51b..9d05d3f72b05ef60b34c6e470872cd7dda8447d2 100644 (file)
@@ -20,14 +20,14 @@ class CRTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to CRFunctionUnit output regspec
         """
 
         print ("check extra output", repr(code), res)
 
         # full CR
-        whole_reg = yield dec2.e.write_cr_whole
+        whole_reg = yield dec2.e.do.write_cr_whole
         cr_en = yield dec2.e.write_cr.ok
         if whole_reg:
             full_cr = res['full_cr']
index 67fe0516703eca35f2934c553e3975d514a43ec1..921a4d286ee2d2bee7e81a477a3201e74925c27e 100644 (file)
@@ -19,7 +19,7 @@ class LDSTTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to LDSTFunctionUnit output regspec
         """
 
index e9a201e34047287c577069f43939b60927196c7e..439901a53c081d848dc1b82b97b6e39b5aae44c2 100644 (file)
@@ -19,12 +19,12 @@ class LogicalTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to LogicalFunctionUnit output regspec
         """
 
-        rc = yield dec2.e.rc.data
-        op = yield dec2.e.insn_type
+        rc = yield dec2.e.do.rc.data
+        op = yield dec2.e.do.insn_type
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
index 81f7b86215fd7fe7bbfbe7b23a01f49ae0f7bcd7..1b8bf770f99bfcdc96eb52d6f1cebb57fc328755 100644 (file)
@@ -20,7 +20,7 @@ class ShiftRotTestRunner(TestRunner):
         res = yield from get_cu_inputs(dec2, sim)
         return res
 
-    def check_cu_outputs(self, res, dec2, sim, code):
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
         """naming (res) must conform to ShiftRotFunctionUnit output regspec
         """
 
@@ -35,8 +35,8 @@ class ShiftRotTestRunner(TestRunner):
             print(f"expected {expected:x}, actual: {cu_out:x}")
             self.assertEqual(expected, cu_out, code)
 
-        rc = yield dec2.e.rc.data
-        op = yield dec2.e.insn_type
+        rc = yield dec2.e.do.rc.data
+        op = yield dec2.e.do.insn_type
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
@@ -54,7 +54,7 @@ class ShiftRotTestRunner(TestRunner):
             self.assertEqual(cr_expected, cr_actual, "CR%d %s" % (cridx, code))
 
         # XER.ca
-        cry_out = yield dec2.e.output_carry
+        cry_out = yield dec2.e.do.output_carry
         if cry_out:
             expected_carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
             xer_ca = res['xer_ca']
diff --git a/src/soc/fu/compunits/test/test_spr_compunit.py b/src/soc/fu/compunits/test/test_spr_compunit.py
new file mode 100644 (file)
index 0000000..087fc50
--- /dev/null
@@ -0,0 +1,72 @@
+import unittest
+from soc.decoder.power_enums import (XER_bits, Function)
+
+from soc.fu.spr.test.test_pipe_caller import get_cu_inputs
+from soc.fu.spr.test.test_pipe_caller import SPRTestCase # creates the tests
+
+from soc.fu.test.common import ALUHelpers
+from soc.fu.compunits.compunits import SPRFunctionUnit
+from soc.fu.compunits.test.test_compunit import TestRunner
+
+
+class SPRTestRunner(TestRunner):
+    def __init__(self, test_data):
+        super().__init__(test_data, SPRFunctionUnit, self,
+                         Function.SPR)
+
+    def get_cu_inputs(self, dec2, sim):
+        """naming (res) must conform to SPRFunctionUnit input regspec
+        """
+        res = yield from get_cu_inputs(dec2, sim)
+        return res
+
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
+        """naming (res) must conform to SPRFunctionUnit output regspec
+        """
+
+        rc = yield dec2.e.do.rc.data
+        op = yield dec2.e.do.insn_type
+        cridx_ok = yield dec2.e.write_cr.ok
+        cridx = yield dec2.e.write_cr.data
+
+        print ("check extra output", repr(code), cridx_ok, cridx)
+
+        if rc:
+            self.assertEqual(cridx_ok, 1, code)
+            self.assertEqual(cridx, 0, code)
+
+        sim_o = {}
+
+        yield from ALUHelpers.get_int_o(res, alu, dec2)
+        yield from ALUHelpers.get_fast_spr1(res, alu, dec2)
+        yield from ALUHelpers.get_slow_spr1(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ov(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ca(res, alu, dec2)
+        yield from ALUHelpers.get_xer_so(res, alu, dec2)
+
+        print ("output", res)
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_so(sim_o, sim, alu, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ov(sim_o, sim, alu, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_slow_spr1(sim_o, sim, dec2)
+
+        print ("sim output", sim_o)
+
+        ALUHelpers.check_xer_ov(self, res, sim_o, code)
+        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        ALUHelpers.check_xer_so(self, res, sim_o, code)
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr1(self, res, sim_o, code)
+        ALUHelpers.check_slow_spr1(self, res, sim_o, code)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(SPRTestRunner(SPRTestCase.test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
diff --git a/src/soc/fu/compunits/test/test_trap_compunit.py b/src/soc/fu/compunits/test/test_trap_compunit.py
new file mode 100644 (file)
index 0000000..86ea947
--- /dev/null
@@ -0,0 +1,50 @@
+import unittest
+from soc.decoder.power_enums import (XER_bits, Function)
+
+from soc.fu.trap.test.test_pipe_caller import get_cu_inputs
+from soc.fu.trap.test.test_pipe_caller import TrapTestCase # creates the tests
+
+from soc.fu.test.common import ALUHelpers
+from soc.fu.compunits.compunits import TrapFunctionUnit
+from soc.fu.compunits.test.test_compunit import TestRunner
+
+
+class TrapTestRunner(TestRunner):
+    def __init__(self, test_data):
+        super().__init__(test_data, TrapFunctionUnit, self,
+                         Function.TRAP)
+
+    def get_cu_inputs(self, dec2, sim):
+        """naming (res) must conform to TrapFunctionUnit input regspec
+        """
+        res = yield from get_cu_inputs(dec2, sim)
+        return res
+
+    def check_cu_outputs(self, res, dec2, sim, alu, code):
+        """naming (res) must conform to TrapFunctionUnit output regspec
+        """
+
+        sim_o = {}
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr2(sim_o, sim, dec2)
+        ALUHelpers.get_sim_nia(sim_o, sim, dec2)
+        ALUHelpers.get_sim_msr(sim_o, sim, dec2)
+
+        print ("sim output", sim_o)
+
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr1(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr2(self, res, sim_o, code)
+        ALUHelpers.check_nia(self, res, sim_o, code)
+        ALUHelpers.check_msr(self, res, sim_o, code)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TrapTestRunner(TrapTestCase.test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
index d5ffe87e2a527e5d54ce4df1b34b81dd526f350a..11e85d1568bcdf666b6ce93f59e501a0fbea4804 100644 (file)
@@ -32,7 +32,7 @@ class CompCROpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
index 50bb6903bcb79d16b3b3e122f13fa74f0635e0dd..e6e38bcad4a15688e555facb16eeeb4a03448db7 100644 (file)
@@ -148,7 +148,7 @@ def get_cu_inputs(dec2, sim):
     """naming (res) must conform to CRFunctionUnit input regspec
     """
     res = {}
-    full_reg = yield dec2.e.read_cr_whole
+    full_reg = yield dec2.e.do.read_cr_whole
 
     # full CR
     print(sim.cr.get_range().value)
@@ -202,7 +202,7 @@ class TestRunner(FHDLTestCase):
         yield from ALUHelpers.set_int_rb(alu, dec2, inp)
 
     def assert_outputs(self, alu, dec2, simulator, code):
-        whole_reg = yield dec2.e.write_cr_whole
+        whole_reg = yield dec2.e.do.write_cr_whole
         cr_en = yield dec2.e.write_cr.ok
         if whole_reg:
             full_cr = yield alu.n.data_o.full_cr.data
@@ -266,7 +266,7 @@ class TestRunner(FHDLTestCase):
                     yield Settle()
                     yield from self.set_inputs(alu, pdecode2, sim)
                     yield alu.p.valid_i.eq(1)
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.CR.value, code)
                     yield
                     opname = code.split(' ')[0]
index 0c55071033d19a8f7c3e80c345ea91519b503b63..a7355dd5e9ec4d04772890d11b107f2a4f293bde 100644 (file)
@@ -38,7 +38,7 @@ class DivStagesEnd(PipeModBaseChain):
 
 
 class DIVBasePipe(ControlBase):
-    def __init__(self, pspec, compute_steps_per_stage=8):
+    def __init__(self, pspec, compute_steps_per_stage=4):
         ControlBase.__init__(self)
         self.pipe_start = DivStagesStart(pspec)
         compute_steps = pspec.core_config.n_stages
index a0ea42edf914527a1936f4bec85d772258cc2e1e..9b0455bef3951a15fd277c33f771d07c2c573f5a 100644 (file)
@@ -12,10 +12,7 @@ from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
 from soc.fu.div.pipe_data import CoreInputData
 from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
-
-def eq32(is_32bit, dest, src):
-    return [dest[0:32].eq(src[0:32]),
-            dest[32:64].eq(Mux(is_32bit, 0, src[32:64]))]
+from nmutil.util import eq32
 
 
 class DivSetupStage(PipeModBase):
index 25dedf8b1052d6a34179c2026af02efe24987f3b..8ae19d5a7ab8408bda790dbcc80f3186eb380452 100644 (file)
@@ -145,7 +145,7 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.DIV.value)
                     yield from set_alu_inputs(alu, pdecode2, sim)
                     yield
@@ -168,7 +168,7 @@ class TestRunner(FHDLTestCase):
 
     def check_alu_outputs(self, alu, dec2, sim, code):
 
-        rc = yield dec2.e.rc.data
+        rc = yield dec2.e.do.rc.data
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
@@ -176,8 +176,8 @@ class TestRunner(FHDLTestCase):
         if rc:
             self.assertEqual(cridx, 0, code)
 
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
         if not oe or not oe_ok:
             # if OE not enabled, XER SO and OV must correspondingly be false
             so_ok = yield alu.n.data_o.xer_so.ok
index 3958009a2804af58da5874c343eab16379eb2fab..58e8f100f41c1eaac24037e69207b8cc3ea673b1 100644 (file)
@@ -38,7 +38,7 @@ class CompLDSTOpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
index 34d1dd917bc26749fb7c3112ae367b0057d38558..92de6388e0ff7f8f17ed736f69adb64083e8b31f 100644 (file)
@@ -14,14 +14,13 @@ class CompLogicalOpSubset(Record):
         layout = (('insn_type', InternalOp),
                   ('fn_unit', Function),
                   ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
-                  ('lk', 1),
                   ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
                   ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
                   ('invert_a', 1),
                   ('zero_a', 1),
                   ('input_carry', CryIn),
                   ('invert_out', 1),
-                  ('write_cr', Layout((("data", 3), ("ok", 1)))), # Data
+                  ('write_cr0', 1),
                   ('output_carry', 1),
                   ('is_32bit', 1),
                   ('is_signed', 1),
@@ -34,7 +33,6 @@ class CompLogicalOpSubset(Record):
         # grrr.  Record does not have kwargs
         self.insn_type.reset_less = True
         self.fn_unit.reset_less = True
-        self.lk.reset_less = True
         self.zero_a.reset_less = True
         self.invert_a.reset_less = True
         self.invert_out.reset_less = True
@@ -49,14 +47,13 @@ class CompLogicalOpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
     def ports(self):
         return [self.insn_type,
                 self.fn_unit,
-                self.lk,
                 self.invert_a,
                 self.invert_out,
                 self.input_carry,
index 1d9dfb5067f5dbd9be923c724c8c298d9f6ad1e9..03c81949fcee8929c9f896ae428aa76aa9d462d6 100644 (file)
@@ -195,7 +195,7 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.LOGICAL.value, code)
                     yield from set_alu_inputs(alu, pdecode2, simulator)
                     yield
@@ -219,7 +219,7 @@ class TestRunner(FHDLTestCase):
 
     def check_alu_outputs(self, alu, dec2, sim, code):
 
-        rc = yield dec2.e.rc.data
+        rc = yield dec2.e.do.rc.data
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
index ea40da35a37f67c846a233c7c7279928aa71886f..ccdd0d35a4397c4d08949ad453b67599b9d85579 100644 (file)
@@ -1,79 +1,38 @@
-# This stage is intended to do most of the work of executing multiply
-# instructions, as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+# This stage is intended to do the main work of an actual multiply
+
+from nmigen import Module
 from nmutil.pipemodbase import PipeModBase
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.mul.pipe_data import MulInputData
+from soc.fu.mul.pipe_data import MulIntermediateData, MulOutputData
 from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.fu.shift_rot.rotator import Rotator
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
 
 
-class ShiftRotMainStage(PipeModBase):
+class MulMainStage2(PipeModBase):
     def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
+        super().__init__(pspec, "mul2")
 
     def ispec(self):
-        return MulInputData(self.pspec)
+        return MulIntermediateData(self.pspec) # pipeline stage input format
 
     def ospec(self):
-        return ALUOutputData(self.pspec)
+        return MulOutputData(self.pspec) # pipeline stage output format
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
 
-        # obtain me and mb fields from instruction.
-        m_fields = self.fields.instrs['M']
-        md_fields = self.fields.instrs['MD']
-        mb = Signal(m_fields['MB'][0:-1].shape())
-        me = Signal(m_fields['ME'][0:-1].shape())
-        mb_extra = Signal(1, reset_less=True)
-        comb += mb.eq(m_fields['MB'][0:-1])
-        comb += me.eq(m_fields['ME'][0:-1])
-        comb += mb_extra.eq(md_fields['mb'][0:-1][0])
-
-        # set up microwatt rotator module
-        m.submodules.rotator = rotator = Rotator()
-        comb += [
-            rotator.me.eq(me),
-            rotator.mb.eq(mb),
-            rotator.mb_extra.eq(mb_extra),
-            rotator.rs.eq(self.i.rs),
-            rotator.ra.eq(self.i.ra),
-            rotator.shift.eq(self.i.rb),
-            rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
-            rotator.arith.eq(self.i.ctx.op.is_signed),
-        ]
+        # convenience variables
+        a, b, o = self.i.a, self.i.b, self.o.o
 
-        # instruction rotate type
-        mode = Signal(3, reset_less=True)
-        with m.Switch(self.i.ctx.op.insn_type):
-            with m.Case(InternalOp.OP_SHL):  comb += mode.eq(0b000)
-            with m.Case(InternalOp.OP_SHR):  comb += mode.eq(0b001) # R-shift
-            with m.Case(InternalOp.OP_RLC):  comb += mode.eq(0b110) # clear LR
-            with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
-            with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
+        # actual multiply (TODO: split into stages)
+        comb += o.eq(a * b)
 
-        comb += Cat(rotator.right_shift,
-                    rotator.clear_left,
-                    rotator.clear_right).eq(mode)
-                
-        # outputs from the microwatt rotator module
-        # XXX TODO: carry32
-        comb += [self.o.o.eq(rotator.result_o),
-                 self.o.xer_ca[0].eq(rotator.carry_out_o)]
+        ###### xer and context, all pass-through #####
 
-        ###### sticky overflow and context, both pass-through #####
-
-        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.xer_ca.eq(self.i.xer_ca)
+        comb += self.o.neg_res.eq(self.i.neg_res)
+        comb += self.o.neg_res32.eq(self.i.neg_res32)
+        comb += self.o.xer_so.eq(self.i.xer_so)
         comb += self.o.ctx.eq(self.i.ctx)
 
         return m
+
diff --git a/src/soc/fu/mul/mul_input_record.py b/src/soc/fu/mul/mul_input_record.py
new file mode 100644 (file)
index 0000000..8554c53
--- /dev/null
@@ -0,0 +1,60 @@
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import InternalOp, Function, CryIn
+
+
+class CompMULOpSubset(Record):
+    """CompMULOpSubset
+
+    a copy of the relevant subset information from Decode2Execute1Type
+    needed for MUL operations.  use with eq_from_execute1 (below) to
+    grab subsets.
+    """
+    def __init__(self, name=None):
+        layout = (('insn_type', InternalOp),
+                  ('fn_unit', Function),
+                  ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
+                  ('rc', Layout((("rc", 1), ("rc_ok", 1)))), # Data
+                  ('oe', Layout((("oe", 1), ("oe_ok", 1)))), # Data
+                  ('invert_a', 1),
+                  ('zero_a', 1),
+                  ('invert_out', 1),
+                  ('write_cr0', 1),
+                  ('input_carry', CryIn),
+                  ('output_carry', 1),
+                  ('is_32bit', 1),
+                  ('is_signed', 1),
+                  ('insn', 32),
+                  )
+
+        Record.__init__(self, Layout(layout), name=name)
+
+        # grrr.  Record does not have kwargs
+        self.insn_type.reset_less = True
+        self.fn_unit.reset_less = True
+        self.zero_a.reset_less = True
+        self.invert_a.reset_less = True
+        self.invert_out.reset_less = True
+        self.input_carry.reset_less = True
+        self.output_carry.reset_less = True
+        self.is_32bit.reset_less = True
+        self.is_signed.reset_less = True
+
+    def eq_from_execute1(self, other):
+        """ use this to copy in from Decode2Execute1Type
+        """
+        res = []
+        for fname, sig in self.fields.items():
+            eqfrom = other.do.fields[fname]
+            res.append(sig.eq(eqfrom))
+        return res
+
+    def ports(self):
+        return [self.insn_type,
+                self.invert_a,
+                self.invert_out,
+                self.input_carry,
+                self.output_carry,
+                self.is_32bit,
+                self.is_signed,
+        ]
index 495d503b4ff1bffffb02e251b19cb64a2c92dd80..38741f61bab416c2d2bd10388aa58b47280a7424 100644 (file)
@@ -1,10 +1,32 @@
-from soc.fu.alu.alu_input_record import CompALUOpSubset
+from soc.fu.mul.mul_input_record import CompMULOpSubset
 from soc.fu.pipe_data import IntegerData, CommonPipeSpec
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.shift_rot.pipe_data import ShoftRotInputData
+from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData
+from nmigen import Signal
 
 
-# TODO: replace CompALUOpSubset with CompShiftRotOpSubset
-class ShiftRotPipeSpec(CommonPipeSpec):
-    regspec = (ShiftRotInputData.regspec, ALUOutputData.regspec)
-    opsubsetkls = CompALUOpSubset
+class MulIntermediateData(ALUInputData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+
+        self.neg_res = Signal(reset_less=True)
+        self.neg_res32 = Signal(reset_less=True)
+        self.data.append(self.neg_res)
+        self.data.append(self.neg_res32)
+
+
+class MulOutputData(IntegerData):
+    regspec = [('INT', 'o', '0:128'),
+               ('XER', 'xer_so', '32'), # XER bit 32: SO
+               ('XER', 'xer_ca', '34,45')] # XER bit 34/45: CA/CA32
+    def __init__(self, pspec):
+        super().__init__(pspec, False) # still input style
+
+        self.neg_res = Signal(reset_less=True)
+        self.neg_res32 = Signal(reset_less=True)
+        self.data.append(self.neg_res)
+        self.data.append(self.neg_res32)
+
+
+class MulPipeSpec(CommonPipeSpec):
+    regspec = (ALUInputData.regspec, ALUOutputData.regspec)
+    opsubsetkls = CompMULOpSubset
index e726d170d79cf01c5af88454998f14ef60e46d6e..a557c90ea0b971f8bca53a2ab9f894bf62844b1d 100644 (file)
@@ -1,32 +1,45 @@
 from nmutil.singlepipe import ControlBase
 from nmutil.pipemodbase import PipeModBaseChain
-from soc.fu.shift_rot.input_stage import ShiftRotInputStage
-from soc.fu.shift_rot.main_stage import ShiftRotMainStage
+from soc.fu.alu.input_stage import ALUInputStage
 from soc.fu.alu.output_stage import ALUOutputStage
+from soc.fu.mul.pre_stage import MulMainStage1
+from soc.fu.mul.main_stage import MulMainStage2
+from soc.fu.mul.post_stage import MulMainStage3
+
 
 class MulStages1(PipeModBaseChain):
     def get_chain(self):
-        inp = ALUInputStage(self.pspec)
-        main = MulMainStage1(self.pspec)
+        inp = ALUInputStage(self.pspec)   # a-invert, carry etc
+        main = MulMainStage1(self.pspec)  # detect signed/32-bit
         return [inp, main]
 
+
 class MulStages2(PipeModBaseChain):
     def get_chain(self):
-        main2 = MulMainStage2(self.pspec)
-        out = ALUOutputStage(self.pspec)
-        return [main2, out]
+        main2 = MulMainStage2(self.pspec) # actual multiply
+        return [main2]
+
+
+class MulStages3(PipeModBaseChain):
+    def get_chain(self):
+        main3 = MulMainStage3(self.pspec) # select output bits, invert, set ov
+        out = ALUOutputStage(self.pspec)  # do CR, XER and out-invert etc.
+        return [main3, out]
 
 
-class ShiftRotBasePipe(ControlBase):
+class MulBasePipe(ControlBase):
     def __init__(self, pspec):
         ControlBase.__init__(self)
         self.pspec = pspec
         self.pipe1 = MulStages1(pspec)
         self.pipe2 = MulStages2(pspec)
-        self._eqs = self.connect([self.pipe1, self.pipe2])
+        self.pipe3 = MulStages3(pspec)
+        self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 
     def elaborate(self, platform):
         m = ControlBase.elaborate(self, platform)
-        m.submodules.pipe = self.pipe1
+        m.submodules.mul_pipe1 = self.pipe1
+        m.submodules.mul_pipe2 = self.pipe2
+        m.submodules.mul_pipe3 = self.pipe3
         m.d.comb += self._eqs
         return m
diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py
new file mode 100644 (file)
index 0000000..bdee2ec
--- /dev/null
@@ -0,0 +1,80 @@
+# This stage is intended to do most of the work of analysing the multiply result
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, signed)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUOutputData
+from soc.fu.mul.pipe_data import MulOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class MulMainStage3(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "mul3")
+
+    def ispec(self):
+        return MulOutputData(self.pspec) # pipeline stage output format
+
+    def ospec(self):
+        return ALUOutputData(self.pspec) # defines pipeline stage output format
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # convenience variables
+        cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
+        ov_o = self.o.xer_ov
+        o_i, cry_i, op = self.i.o, self.i.xer_ca, self.i.ctx.op
+
+        # check if op is 32-bit, and get sign bit from operand a
+        is_32bit = Signal(reset_less=True)
+        comb += is_32bit.eq(op.is_32bit)
+
+        # check negate: select signed/unsigned
+        mul_o = Signal(o_i.width, reset_less=True)
+        comb += mul_o.eq(Mux(self.i.neg_res, -o_i, o_i))
+        comb += o.ok.eq(1)
+
+        # OP_MUL_nnn - select hi32/hi64/lo64 from result
+        with m.Switch(op.insn_type):
+            # hi-32 replicated twice
+            with m.Case(InternalOp.OP_MUL_H32):
+                comb += o.data.eq(Repl(mul_o[32:64], 2))
+            # hi-64 
+            with m.Case(InternalOp.OP_MUL_H64):
+                comb += o.data.eq(mul_o[64:128])
+            # lo-64 - overflow
+            with m.Default():
+                comb += o.data.eq(mul_o[0:64])
+
+                # compute overflow
+                mul_ov = Signal(reset_less=True)
+                with m.If(is_32bit):
+                    m32 = mul_o[32:64]
+                    comb += mul_ov.eq(m32.bool() & ~m32.all())
+                with m.Else():
+                    m64 = mul_o[64:128]
+                    comb += mul_ov.eq(m64.bool() & ~m64.all())
+
+                # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
+                ov = Signal(2, reset_less=True)
+                comb += ov[0].eq(mul_ov)
+                comb += ov[1].eq(mul_ov)
+                comb += ov_o.data.eq(ov)
+                comb += ov_o.ok.eq(1)
+
+        # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
+        ca = Signal(2, reset_less=True)
+        comb += ca[0].eq(mul_o[-1])                      # XER.CA - XXX more?
+        comb += ca[1].eq(mul_o[32] ^ (self.i.neg_res32)) # XER.CA32
+        comb += cry_o.data.eq(ca)
+        comb += cry_o.ok.eq(1)
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
diff --git a/src/soc/fu/mul/pre_stage.py b/src/soc/fu/mul/pre_stage.py
new file mode 100644 (file)
index 0000000..8436309
--- /dev/null
@@ -0,0 +1,66 @@
+# This stage is intended to prepare the multiplication operands
+
+from nmigen import (Module, Signal, Mux)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUInputData
+from soc.fu.mul.pipe_data import MulIntermediateData
+from ieee754.part.partsig import PartitionedSignal
+from nmutil.util import eq32
+
+class MulMainStage1(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "mul1")
+
+    def ispec(self):
+        return ALUInputData(self.pspec) # defines pipeline stage input format
+
+    def ospec(self):
+        return MulIntermediateData(self.pspec) # pipeline stage output format
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # convenience variables
+        a, b, op = self.i.a, self.i.b, self.i.ctx.op
+        a_o, b_o, neg_res_o = self.o.a, self.o.b, self.o.neg_res
+        neg_res_o, neg_res32_o = self.o.neg_res, self.o.neg_res32
+
+        # check if op is 32-bit, and get sign bit from operand a
+        is_32bit = Signal(reset_less=True)
+        sign_a = Signal(reset_less=True)
+        sign_b = Signal(reset_less=True)
+        sign32_a = Signal(reset_less=True)
+        sign32_b = Signal(reset_less=True)
+        comb += is_32bit.eq(op.is_32bit)
+
+        # work out if a/b are negative (check 32-bit / signed)
+        comb += sign_a.eq(Mux(op.is_32bit, a[31], a[63]) & op.is_signed)
+        comb += sign_b.eq(Mux(op.is_32bit, b[31], b[63]) & op.is_signed)
+        comb += sign32_a.eq(a[31] & op.is_signed)
+        comb += sign32_b.eq(b[31] & op.is_signed)
+
+        # work out if result is negative sign
+        comb += neg_res_o.eq(sign_a ^ sign_b)
+        comb += neg_res32_o.eq(sign32_a ^ sign32_b) # pass through for OV32
+
+        # negation of a 64-bit value produces the same lower 32-bit
+        # result as negation of just the lower 32-bits, so we don't
+        # need to do anything special before negating
+        abs_a = Signal(64, reset_less=True)
+        abs_b = Signal(64, reset_less=True)
+        comb += abs_a.eq(Mux(sign_a, -a, a))
+        comb += abs_b.eq(Mux(sign_b, -b, b))
+
+        # set up 32/64 bit inputs
+        comb += eq32(is_32bit, a_o, abs_a)
+        comb += eq32(is_32bit, b_o, abs_b)
+
+        ###### XER and context, both pass-through #####
+
+        comb += self.o.xer_ca.eq(self.i.xer_ca)
+        comb += self.o.xer_so.eq(self.i.xer_so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
index 88ac54999d207114ac5fc344785ad50506b8dbd2..cd93e1290117dc388a75b958f1f7e04ebdff68e4 100644 (file)
@@ -6,66 +6,47 @@ import unittest
 from soc.decoder.isa.caller import ISACaller, special_sprs
 from soc.decoder.power_decoder import (create_pdecode)
 from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp, CryIn)
 from soc.decoder.selectable_int import SelectableInt
 from soc.simulator.program import Program
 from soc.decoder.isa.all import ISA
 
+
+from soc.fu.test.common import (TestCase, ALUHelpers)
 from soc.fu.mul.pipeline import MulBasePipe
-from soc.fu.alu.alu_input_record import CompALUOpSubset
 from soc.fu.mul.pipe_data import MulPipeSpec
 import random
 
 
-class TestCase:
-    def __init__(self, program, regs, sprs, name):
-        self.program = program
-        self.regs = regs
-        self.sprs = sprs
-        self.name = name
+def get_cu_inputs(dec2, sim):
+    """naming (res) must conform to MulFunctionUnit input regspec
+    """
+    res = {}
+
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+
+    print ("alu get_cu_inputs", res)
+
+    return res
+
 
 
 def set_alu_inputs(alu, dec2, sim):
-    inputs = []
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
     # and place it into data_i.b
 
-    reg3_ok = yield dec2.e.read_reg3.ok
-    if reg3_ok:
-        reg3_sel = yield dec2.e.read_reg3.data
-        data3 = sim.gpr(reg3_sel).value
-    else:
-        data3 = 0
-    reg1_ok = yield dec2.e.read_reg1.ok
-    if reg1_ok:
-        reg1_sel = yield dec2.e.read_reg1.data
-        data1 = sim.gpr(reg1_sel).value
-    else:
-        data1 = 0
-    reg2_ok = yield dec2.e.read_reg2.ok
-    imm_ok = yield dec2.e.imm_data.ok
-    if reg2_ok:
-        reg2_sel = yield dec2.e.read_reg2.data
-        data2 = sim.gpr(reg2_sel).value
-    elif imm_ok:
-        data2 = yield dec2.e.imm_data.imm
-    else:
-        data2 = 0
-
-    yield alu.p.data_i.ra.eq(data1)
-    yield alu.p.data_i.rb.eq(data2)
-    yield alu.p.data_i.rs.eq(data3)
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
-    carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
-    carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
-    yield alu.p.data_i.xer_ca[0].eq(carry)
-    yield alu.p.data_i.xer_ca[1].eq(carry32)
-    so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
-    yield alu.p.data_i.xer_so.eq(so)
-    
+    inp = yield from get_cu_inputs(dec2, sim)
+    print ("set alu inputs", inp)
+    yield from ALUHelpers.set_int_ra(alu, dec2, inp)
+    yield from ALUHelpers.set_int_rb(alu, dec2, inp)
+
+    yield from ALUHelpers.set_xer_ca(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_so(alu, dec2, inp)
+
 
 # This test bench is a bit different than is usual. Initially when I
 # was writing it, I had all of the tests call a function to create a
@@ -85,92 +66,73 @@ def set_extra_alu_inputs(alu, dec2, sim):
 # massively. Before, it took around 1 minute on my computer, now it
 # takes around 3 seconds
 
-test_data = []
-
 
 class MulTestCase(FHDLTestCase):
+    test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
-    def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
-        tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
-        test_data.append(tc)
 
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None):
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs)
+        self.test_data.append(tc)
 
-    def test_shift(self):
-        insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
-        for i in range(20):
-            choice = random.choice(insns)
-            lst = [f"{choice} 3, 1, 2"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, 63)
-            print(initial_regs[1], initial_regs[2])
-            self.run_tst_program(Program(lst), initial_regs)
-
-
-    def test_shift_arith(self):
-        lst = ["sraw 3, 1, 2"]
+    def tst_0_mullw(self):
+        lst = [f"mullw 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        initial_regs[2] = random.randint(0, 63)
-        print(initial_regs[1], initial_regs[2])
+        #initial_regs[1] = 0xffffffffffffffff
+        #initial_regs[2] = 0xffffffffffffffff
+        initial_regs[1] = 0x2ffffffff
+        initial_regs[2] = 0x2
         self.run_tst_program(Program(lst), initial_regs)
 
-    def test_shift_once(self):
-        lst = ["slw 3, 1, 4",
-               "slw 3, 1, 2"]
+    def tst_1_mullwo_(self):
+        lst = [f"mullwo. 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = 0x80000000
-        initial_regs[2] = 0x40
-        initial_regs[4] = 0x00
+        initial_regs[1] = 0x3b34b06f
+        initial_regs[2] = 0xfdeba998
         self.run_tst_program(Program(lst), initial_regs)
 
-    def test_rlwinm(self):
-        for i in range(10):
-            mb = random.randint(0,31)
-            me = random.randint(0,31)
-            sh = random.randint(0,31)
-            lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}"]
-            initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            self.run_tst_program(Program(lst), initial_regs)
-
-    def test_rlwimi(self):
-        lst = ["rlwimi 3, 1, 5, 20, 6"]
+    def tst_2_mullwo(self):
+        lst = [f"mullwo 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = 0xdeadbeef
-        initial_regs[3] = 0x12345678
+        initial_regs[1] = 0xffffffffffffa988 # -5678
+        initial_regs[2] = 0xffffffffffffedcc # -1234
         self.run_tst_program(Program(lst), initial_regs)
 
-    def test_rlwnm(self):
-        lst = ["rlwnm 3, 1, 2, 20, 6"]
+    def tst_3_mullw(self):
+        lst = ["mullw 3, 1, 2",
+               "mullw 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        initial_regs[2] = random.randint(0, 63)
+        initial_regs[1] = 0x6
+        initial_regs[2] = 0xe
         self.run_tst_program(Program(lst), initial_regs)
 
-    def test_rldicl(self):
-        lst = ["rldicl 3, 1, 5, 20"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        self.run_tst_program(Program(lst), initial_regs)
+    def test_4_mullw_rand(self):
+        for i in range(40):
+            lst = ["mullw 3, 1, 2"]
+            initial_regs = [0] * 32
+            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1<<64)-1)
+            self.run_tst_program(Program(lst), initial_regs)
 
-    def test_rldicr(self):
-        lst = ["rldicr 3, 1, 5, 20"]
-        initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
-        self.run_tst_program(Program(lst), initial_regs)
+    def test_4_mullw_nonrand(self):
+        for i in range(40):
+            lst = ["mullw 3, 1, 2"]
+            initial_regs = [0] * 32
+            initial_regs[1] = i+1
+            initial_regs[2] = i+20
+            self.run_tst_program(Program(lst), initial_regs)
 
-    def test_rlc(self):
-        insns = ["rldic", "rldicl", "rldicr"]
-        for i in range(20):
+    def tst_rand_mullw(self):
+        insns = ["mullw", "mullw.", "mullwo", "mullwo."]
+        for i in range(40):
             choice = random.choice(insns)
-            sh = random.randint(0, 63)
-            m = random.randint(0, 63)
-            lst = [f"{choice} 3, 1, {sh}, {m}"]
+            lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
             initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1<<64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_ilang(self):
@@ -199,7 +161,6 @@ class TestRunner(FHDLTestCase):
         m.submodules.alu = alu = MulBasePipe(pspec)
 
         comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
-        comb += alu.p.valid_i.eq(1)
         comb += alu.n.ready_i.eq(1)
         comb += pdecode2.dec.raw_opcode_in.eq(instruction)
         sim = Simulator(m)
@@ -210,61 +171,101 @@ class TestRunner(FHDLTestCase):
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
-                simulator = ISA(pdecode2, test.regs, test.sprs, 0)
+                sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
+                                test.mem, test.msr)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
+                yield Settle()
 
-                index = simulator.pc.CIA.value//4
+                index = sim.pc.CIA.value//4
                 while index < len(instructions):
                     ins, code = instructions[index]
 
-                    print("0x{:X}".format(ins & 0xffffffff))
+                    print("instruction: 0x{:X}".format(ins & 0xffffffff))
                     print(code)
+                    if 'XER' in sim.spr:
+                        so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+                        ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
+                        ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
+                        print ("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
-                    self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
-                    yield from set_alu_inputs(alu, pdecode2, simulator)
-                    yield from set_extra_alu_inputs(alu, pdecode2, simulator)
-                    yield 
+                    fn_unit = yield pdecode2.e.do.fn_unit
+                    self.assertEqual(fn_unit, Function.MUL.value)
+                    yield from set_alu_inputs(alu, pdecode2, sim)
+
+                    # set valid for one cycle, propagate through pipeline...
+                    yield alu.p.valid_i.eq(1)
+                    yield
+                    yield alu.p.valid_i.eq(0)
+
                     opname = code.split(' ')[0]
-                    yield from simulator.call(opname)
-                    index = simulator.pc.CIA.value//4
+                    yield from sim.call(opname)
+                    index = sim.pc.CIA.value//4
 
+                    # ...wait for valid to pop out the end
                     vld = yield alu.n.valid_o
                     while not vld:
                         yield
                         vld = yield alu.n.valid_o
                     yield
-                    alu_out = yield alu.n.data_o.o
-                    out_reg_valid = yield pdecode2.e.write_reg.ok
-                    if out_reg_valid:
-                        write_reg_idx = yield pdecode2.e.write_reg.data
-                        expected = simulator.gpr(write_reg_idx).value
-                        msg = f"expected {expected:x}, actual: {alu_out:x}"
-                        self.assertEqual(expected, alu_out, msg)
-                    yield from self.check_extra_alu_outputs(alu, pdecode2,
-                                                            simulator)
+
+                    yield from self.check_alu_outputs(alu, pdecode2, sim, code)
+                    yield Settle()
 
         sim.add_sync_process(process)
-        with sim.write_vcd("simulator.vcd", "simulator.gtkw",
+        with sim.write_vcd("div_simulator.vcd", "div_simulator.gtkw",
                             traces=[]):
             sim.run()
-    def check_extra_alu_outputs(self, alu, dec2, sim):
-        rc = yield dec2.e.rc.data
+
+    def check_alu_outputs(self, alu, dec2, sim, code):
+
+        rc = yield dec2.e.do.rc.data
+        cridx_ok = yield dec2.e.write_cr.ok
+        cridx = yield dec2.e.write_cr.data
+
+        print ("check extra output", repr(code), cridx_ok, cridx)
         if rc:
-            cr_expected = sim.crl[0].get_range().value
-            cr_actual = yield alu.n.data_o.cr0
-            self.assertEqual(cr_expected, cr_actual)
+            self.assertEqual(cridx, 0, code)
+
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        if not oe or not oe_ok:
+            # if OE not enabled, XER SO and OV must correspondingly be false
+            so_ok = yield alu.n.data_o.xer_so.ok
+            ov_ok = yield alu.n.data_o.xer_ov.ok
+            self.assertEqual(so_ok, False, code)
+            self.assertEqual(ov_ok, False, code)
+
+        sim_o = {}
+        res = {}
+
+        yield from ALUHelpers.get_cr_a(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ov(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ca(res, alu, dec2)
+        yield from ALUHelpers.get_int_o(res, alu, dec2)
+        yield from ALUHelpers.get_xer_so(res, alu, dec2)
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
+        yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
+        yield from ALUHelpers.get_sim_xer_so(sim_o, sim, dec2)
+
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_xer_ov(self, res, sim_o, code)
+        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        ALUHelpers.check_xer_so(self, res, sim_o, code)
+        ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))
 
 
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
-    suite.addTest(TestRunner(test_data))
+    suite.addTest(TestRunner(MulTestCase.test_data))
 
     runner = unittest.TextTestRunner()
     runner.run(suite)
index 6d3b5aa533082ab4636d91e6489b6a2ec3d367d8..4201d4008eb4993e9a11ad9d0a79e9ddd9ccafba 100644 (file)
@@ -27,6 +27,7 @@ class IntegerData:
     def eq(self, i):
         eqs = [self.ctx.eq(i.ctx)]
         for j in range(len(self.data)):
+            assert type(self.data[j]) == type(i.data[j])
             eqs.append(self.data[j].eq(i.data[j]))
         return eqs
 
index ce99144e8af2e07d05b9c737277023350c55d691..da055538e5db1704ac9f31b11677c8a29d6d41de 100644 (file)
@@ -16,7 +16,7 @@ class CompSROpSubset(Record):
                   ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
                   ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
                   ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
-                  ('write_cr', Layout((("data", 3), ("ok", 1)))), # Data
+                  ('write_cr0', 0),
                   ('input_carry', CryIn),
                   ('output_carry', 1),
                   ('input_cr', 1),
@@ -43,7 +43,7 @@ class CompSROpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
index 51adba3829ccd81995e177c657aa636d30d0712d..62f0d5a5d2ba86fd569fd0b0ea30ffc8da78d3a8 100644 (file)
@@ -205,7 +205,7 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.SHIFT_ROT.value)
                     yield from set_alu_inputs(alu, pdecode2, simulator)
                     yield
@@ -231,7 +231,7 @@ class TestRunner(FHDLTestCase):
 
     def check_alu_outputs(self, alu, dec2, sim, code):
 
-        rc = yield dec2.e.rc.data
+        rc = yield dec2.e.do.rc.data
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
diff --git a/src/soc/fu/spr/main_stage.py b/src/soc/fu/spr/main_stage.py
new file mode 100644 (file)
index 0000000..e2234e6
--- /dev/null
@@ -0,0 +1,90 @@
+"""SPR Pipeline
+
+* https://bugs.libre-soc.org/show_bug.cgi?id=348
+* https://libre-soc.org/openpower/isa/sprset/
+"""
+
+from nmigen import (Module, Signal, Cat)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.spr.pipe_data import SPRInputData, SPROutputData
+from soc.decoder.power_enums import InternalOp, SPR, XER_bits
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+from soc.decoder.power_decoder2 import decode_spr_num
+
+
+class SPRMainStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "spr_main")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+
+    def ispec(self):
+        return SPRInputData(self.pspec)
+
+    def ospec(self):
+        return SPROutputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        op = self.i.ctx.op
+
+        # convenience variables
+        a_i, spr1_i, fast1_i = self.i.a, self.i.spr1, self.i.fast1
+        so_i, ov_i, ca_i = self.i.xer_so, self.i.xer_ov, self.i.xer_ca
+        so_o, ov_o, ca_o = self.o.xer_so, self.o.xer_ov, self.o.xer_ca
+        o, spr1_o, fast1_o = self.o.o, self.o.spr1, self.o.fast1
+
+        # take copy of D-Form TO field
+        x_fields = self.fields.FormXFX
+        spr = Signal(len(x_fields.SPR))
+        comb += spr.eq(decode_spr_num(x_fields.SPR))
+
+        # TODO: some #defines for the bits n stuff.
+        with m.Switch(op.insn_type):
+            #### MTSPR ####
+            with m.Case(InternalOp.OP_MTSPR):
+                with m.Switch(spr):
+                    # fast SPRs first
+                    with m.Case(SPR.CTR, SPR.LR, SPR.TAR, SPR.SRR0, SPR.SRR1):
+                        comb += fast1_o.data.eq(a_i)
+                        comb += fast1_o.ok.eq(1)
+                    # XER is constructed
+                    with m.Case(SPR.XER):
+                        # sticky
+                        comb += so_o.data.eq(a_i[63-XER_bits['SO']])
+                        comb += so_o.ok.eq(1)
+                        # overflow
+                        comb += ov_o.data[0].eq(a_i[63-XER_bits['OV']])
+                        comb += ov_o.data[1].eq(a_i[63-XER_bits['OV32']])
+                        comb += ov_o.ok.eq(1)
+                        # carry
+                        comb += ca_o.data[0].eq(a_i[63-XER_bits['CA']])
+                        comb += ca_o.data[1].eq(a_i[63-XER_bits['CA32']])
+                        comb += ca_o.ok.eq(1)
+                    # slow SPRs TODO
+
+            # move from SPRs
+            with m.Case(InternalOp.OP_MFSPR):
+                comb += o.ok.eq(1)
+                with m.Switch(spr):
+                    # fast SPRs first
+                    with m.Case(SPR.CTR, SPR.LR, SPR.TAR, SPR.SRR0, SPR.SRR1):
+                        comb += o.data.eq(fast1_i)
+                    # XER is constructed
+                    with m.Case(SPR.XER):
+                        # sticky
+                        comb += o[63-XER_bits['SO']].eq(so_i)
+                        # overflow
+                        comb += o[63-XER_bits['OV']].eq(ov_i[0])
+                        comb += o[63-XER_bits['OV32']].eq(ov_i[1])
+                        # carry
+                        comb += o[63-XER_bits['CA']].eq(ca_i[0])
+                        comb += o[63-XER_bits['CA32']].eq(ca_i[1])
+                    # slow SPRs TODO
+
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
index 48439200b33e1170a8840c19e646e222010dd845..f4b62aefc84e6fdf84d5f2d6d90863412b79ba11 100644 (file)
@@ -11,12 +11,13 @@ Links:
 
 from soc.fu.pipe_data import IntegerData
 from soc.fu.spr.spr_input_record import CompSPROpSubset
+from soc.fu.alu.pipe_data import CommonPipeSpec
 
 
 class SPRInputData(IntegerData):
     regspec = [('INT', 'ra', '0:63'),        # RA
                ('SPR', 'spr1', '0:63'),      # SPR (slow)
-               ('FAST', 'spr2', '0:63'),     # SPR (fast: MSR, LR, CTR etc)
+               ('FAST', 'fast1', '0:63'),    # SPR (fast: MSR, LR, CTR etc)
                ('XER', 'xer_so', '32'),      # XER bit 32: SO
                ('XER', 'xer_ov', '33,44'),   # XER bit 34/45: CA/CA32
                ('XER', 'xer_ca', '34,45')]   # bit0: ov, bit1: ov32
@@ -29,7 +30,7 @@ class SPRInputData(IntegerData):
 class SPROutputData(IntegerData):
     regspec = [('INT', 'o', '0:63'),        # RT
                ('SPR', 'spr1', '0:63'),     # SPR (slow)
-               ('FAST', 'spr2', '0:63'),    # SPR (fast: MSR, LR, CTR etc)
+               ('FAST', 'fast1', '0:63'),   # SPR (fast: MSR, LR, CTR etc)
                ('XER', 'xer_so', '32'),     # XER bit 32: SO
                ('XER', 'xer_ov', '33,44'),  # XER bit 34/45: CA/CA32
                ('XER', 'xer_ca', '34,45')]  # bit0: ov, bit1: ov32
@@ -37,6 +38,6 @@ class SPROutputData(IntegerData):
         super().__init__(pspec, True)
 
 
-class SPRPipeSpec:
+class SPRPipeSpec(CommonPipeSpec):
     regspec = (SPRInputData.regspec, SPROutputData.regspec)
     opsubsetkls = CompSPROpSubset
diff --git a/src/soc/fu/spr/pipeline.py b/src/soc/fu/spr/pipeline.py
new file mode 100644 (file)
index 0000000..fc76c22
--- /dev/null
@@ -0,0 +1,22 @@
+from nmutil.singlepipe import ControlBase
+from nmutil.pipemodbase import PipeModBaseChain
+from soc.fu.spr.main_stage import SPRMainStage
+
+class SPRStages(PipeModBaseChain):
+    def get_chain(self):
+        main = SPRMainStage(self.pspec)
+        return [main]
+
+
+class SPRBasePipe(ControlBase):
+    def __init__(self, pspec):
+        ControlBase.__init__(self)
+        self.pspec = pspec
+        self.pipe1 = SPRStages(pspec)
+        self._eqs = self.connect([self.pipe1])
+
+    def elaborate(self, platform):
+        m = ControlBase.elaborate(self, platform)
+        m.submodules.pipe = self.pipe1
+        m.d.comb += self._eqs
+        return m
diff --git a/src/soc/fu/spr/spr_input_record.py b/src/soc/fu/spr/spr_input_record.py
new file mode 100644 (file)
index 0000000..8dfce15
--- /dev/null
@@ -0,0 +1,42 @@
+from nmigen.hdl.rec import Record, Layout
+
+from soc.decoder.power_enums import (InternalOp, Function)
+
+
+class CompSPROpSubset(Record):
+    """CompSPROpSubset
+
+    a copy of the relevant subset information from Decode2Execute1Type
+    needed for TRAP operations.  use with eq_from_execute1 (below) to
+    grab subsets.
+    """
+    def __init__(self, name=None):
+        layout = (('insn_type', InternalOp),
+                  ('fn_unit', Function),
+                  ('insn', 32),
+                  ('is_32bit', 1),
+                  )
+
+        Record.__init__(self, Layout(layout), name=name)
+
+        # grrr.  Record does not have kwargs
+        self.insn_type.reset_less = True
+        self.insn.reset_less = True
+        self.fn_unit.reset_less = True
+        self.is_32bit.reset_less = True
+
+    def eq_from_execute1(self, other):
+        """ use this to copy in from Decode2Execute1Type
+        """
+        res = []
+        for fname, sig in self.fields.items():
+            eqfrom = other.do.fields[fname]
+            res.append(sig.eq(eqfrom))
+        return res
+
+    def ports(self):
+        return [self.insn_type,
+                self.insn,
+                self.fn_unit,
+                self.is_32bit,
+        ]
diff --git a/src/soc/fu/spr/test/test_pipe_caller.py b/src/soc/fu/spr/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..be147ca
--- /dev/null
@@ -0,0 +1,245 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmutil.formaltest import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp, CryIn)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.fu.test.common import (TestCase, ALUHelpers)
+from soc.fu.spr.pipeline import SPRBasePipe
+from soc.fu.spr.pipe_data import SPRPipeSpec
+import random
+
+
+def get_cu_inputs(dec2, sim):
+    """naming (res) must conform to SPRFunctionUnit input regspec
+    """
+    res = {}
+
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
+    yield from ALUHelpers.get_sim_slow_spr1(res, sim, dec2) # FAST1
+    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2) # FAST1
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
+    yield from ALUHelpers.get_sim_xer_ov(res, sim, dec2) # XER.ov
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+
+    print ("spr get_cu_inputs", res)
+
+    return res
+
+
+
+def set_alu_inputs(alu, dec2, sim):
+    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+    # and place it into data_i.b
+
+    inp = yield from get_cu_inputs(dec2, sim)
+    yield from ALUHelpers.set_int_ra(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_ca(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_ov(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_so(alu, dec2, inp)
+
+    yield from ALUHelpers.set_fast_spr1(alu, dec2, inp)
+    yield from ALUHelpers.set_slow_spr1(alu, dec2, inp)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in SPRTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+
+class SPRTestCase(FHDLTestCase):
+    test_data = []
+
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None):
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs)
+        self.test_data.append(tc)
+
+    def test_1_mfspr(self):
+        lst = ["mfspr 1, 26", # SRR0
+               "mfspr 2, 27",  # SRR1
+               "mfspr 3, 8",  # LR
+               "mfspr 4, 1",] # XER
+        initial_regs = [0] * 32
+        initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678, 'LR': 0x1234,
+                        'XER': 0xe00c0000}
+        self.run_tst_program(Program(lst), initial_regs, initial_sprs)
+
+    def test_1_mtspr(self):
+        lst = ["mtspr 26, 1", # SRR0
+               "mtspr 27, 2", # and into reg 2
+               "mtspr 1, 3",] # XER
+        initial_regs = [0] * 32
+        initial_regs[1] = 0x129518230011feed
+        initial_regs[2] = 0x129518230011feed
+        initial_regs[3] = 0xe00c0000
+        initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678, 'LR': 0x1234,
+                        'XER': 0x0}
+        self.run_tst_program(Program(lst), initial_regs, initial_sprs)
+
+    def test_ilang(self):
+        pspec = SPRPipeSpec(id_wid=2)
+        alu = SPRBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("trap_pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        pspec = SPRPipeSpec(id_wid=2)
+        m.submodules.alu = alu = SPRBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print("test", test.name)
+                print ("sprs", test.sprs)
+                program = test.program
+                self.subTest(test.name)
+                sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
+                                test.mem, test.msr)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                pc = sim.pc.CIA.value
+                index = pc//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("pc %08x instr: %08x" % (pc, ins & 0xffffffff))
+                    print(code)
+
+                    if 'XER' in sim.spr:
+                        so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+                        ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
+                        ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
+                        print ("before: so/ov/32", so, ov, ov32)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+
+                    fast_in = yield pdecode2.e.read_fast1.data
+                    spr_in = yield pdecode2.e.read_spr1.data
+                    print ("dec2 spr/fast in", fast_in, spr_in)
+
+                    fast_out = yield pdecode2.e.write_fast1.data
+                    spr_out = yield pdecode2.e.write_spr.data
+                    print ("dec2 spr/fast in", fast_out, spr_out)
+
+                    fn_unit = yield pdecode2.e.do.fn_unit
+                    self.assertEqual(fn_unit, Function.SPR.value)
+                    yield from set_alu_inputs(alu, pdecode2, sim)
+                    yield
+                    opname = code.split(' ')[0]
+                    yield from sim.call(opname)
+                    pc = sim.pc.CIA.value
+                    index = pc//4
+                    print("pc after %08x" % (pc))
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+
+                    yield from self.check_alu_outputs(alu, pdecode2, sim, code)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("alu_simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+
+    def check_alu_outputs(self, alu, dec2, sim, code):
+
+        rc = yield dec2.e.do.rc.data
+        cridx_ok = yield dec2.e.write_cr.ok
+        cridx = yield dec2.e.write_cr.data
+
+        print ("check extra output", repr(code), cridx_ok, cridx)
+        if rc:
+            self.assertEqual(cridx, 0, code)
+
+        sim_o = {}
+        res = {}
+
+        yield from ALUHelpers.get_int_o(res, alu, dec2)
+        yield from ALUHelpers.get_fast_spr1(res, alu, dec2)
+        yield from ALUHelpers.get_slow_spr1(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ov(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ca(res, alu, dec2)
+        yield from ALUHelpers.get_xer_so(res, alu, dec2)
+
+        print ("output", res)
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_so(sim_o, sim, alu, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ov(sim_o, sim, alu, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_slow_spr1(sim_o, sim, dec2)
+
+        print ("sim output", sim_o)
+
+        ALUHelpers.check_xer_ov(self, res, sim_o, code)
+        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        ALUHelpers.check_xer_so(self, res, sim_o, code)
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr1(self, res, sim_o, code)
+        ALUHelpers.check_slow_spr1(self, res, sim_o, code)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(SPRTestCase.test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
index 4b43b47ea72514f2cb776d2b8f8a3a01f79c8cb1..39a1fd90e26cfbc1bb7679f97297ae8285ab99ce 100644 (file)
@@ -10,7 +10,8 @@ from soc.regfile.regfiles import FastRegs
 
 class TestCase:
     def __init__(self, program, name, regs=None, sprs=None, cr=0, mem=None,
-                       msr=0):
+                       msr=0,
+                       do_sim=True):
 
         self.program = program
         self.name = name
@@ -26,6 +27,8 @@ class TestCase:
         self.cr = cr
         self.mem = mem
         self.msr = msr
+        self.do_sim = do_sim
+
 
 class ALUHelpers:
 
@@ -37,16 +40,27 @@ class ALUHelpers:
     def get_sim_cia(res, sim, dec2):
         res['cia'] = sim.pc.CIA.value
 
+    # use this *after* the simulation has run a step (it returns CIA)
+    def get_sim_nia(res, sim, dec2):
+        res['nia'] = sim.pc.CIA.value
+
     def get_sim_msr(res, sim, dec2):
         res['msr'] = sim.msr.value
 
+    def get_sim_slow_spr1(res, sim, dec2):
+        spr1_en = yield dec2.e.read_spr1.ok
+        if spr1_en:
+            spr1_sel = yield dec2.e.read_spr1.data
+            spr1_data = sim.spr[spr1_sel].value
+            res['spr1'] = spr1_data
+
     def get_sim_fast_spr1(res, sim, dec2):
         fast1_en = yield dec2.e.read_fast1.ok
         if fast1_en:
             fast1_sel = yield dec2.e.read_fast1.data
             spr1_sel = fast_reg_to_spr(fast1_sel)
             spr1_data = sim.spr[spr1_sel].value
-            res['spr1'] = spr1_data
+            res['fast1'] = spr1_data
 
     def get_sim_fast_spr2(res, sim, dec2):
         fast2_en = yield dec2.e.read_fast2.ok
@@ -54,7 +68,7 @@ class ALUHelpers:
             fast2_sel = yield dec2.e.read_fast2.data
             spr2_sel = fast_reg_to_spr(fast2_sel)
             spr2_data = sim.spr[spr2_sel].value
-            res['spr2'] = spr2_data
+            res['fast2'] = spr2_data
 
     def get_sim_cr_a(res, sim, dec2):
         cridx_ok = yield dec2.e.read_cr1.ok
@@ -82,8 +96,9 @@ class ALUHelpers:
             res['rc'] = sim.gpr(data).value
 
     def get_rd_sim_xer_ca(res, sim, dec2):
-        cry_in = yield dec2.e.input_carry
-        if cry_in == CryIn.CA.value:
+        cry_in = yield dec2.e.do.input_carry
+        xer_in = yield dec2.e.xer_in
+        if xer_in or cry_in == CryIn.CA.value:
             expected_carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
             expected_carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
             res['xer_ca'] = expected_carry | (expected_carry32 << 1)
@@ -100,9 +115,9 @@ class ALUHelpers:
         if 'rb' in inp:
             yield alu.p.data_i.rb.eq(inp['rb'])
         # If there's an immediate, set the B operand to that
-        imm_ok = yield dec2.e.imm_data.imm_ok
+        imm_ok = yield dec2.e.do.imm_data.imm_ok
         if imm_ok:
-            data2 = yield dec2.e.imm_data.imm
+            data2 = yield dec2.e.do.imm_data.imm
             yield alu.p.data_i.rb.eq(data2)
 
     def set_int_rc(alu, dec2, inp):
@@ -116,6 +131,11 @@ class ALUHelpers:
             yield alu.p.data_i.xer_ca.eq(inp['xer_ca'])
             print ("extra inputs: CA/32", bin(inp['xer_ca']))
 
+    def set_xer_ov(alu, dec2, inp):
+        if 'xer_ov' in inp:
+            yield alu.p.data_i.xer_ov.eq(inp['xer_ov'])
+            print ("extra inputs: OV/32", bin(inp['xer_ov']))
+
     def set_xer_so(alu, dec2, inp):
         if 'xer_so' in inp:
             so = inp['xer_so']
@@ -130,14 +150,22 @@ class ALUHelpers:
         if 'cia' in inp:
             yield alu.p.data_i.cia.eq(inp['cia'])
 
-    def set_fast_spr1(alu, dec2, inp):
+    def set_slow_spr1(alu, dec2, inp):
         if 'spr1' in inp:
             yield alu.p.data_i.spr1.eq(inp['spr1'])
 
-    def set_fast_spr2(alu, dec2, inp):
+    def set_slow_spr2(alu, dec2, inp):
         if 'spr2' in inp:
             yield alu.p.data_i.spr2.eq(inp['spr2'])
 
+    def set_fast_spr1(alu, dec2, inp):
+        if 'fast1' in inp:
+            yield alu.p.data_i.fast1.eq(inp['fast1'])
+
+    def set_fast_spr2(alu, dec2, inp):
+        if 'fast2' in inp:
+            yield alu.p.data_i.fast2.eq(inp['fast2'])
+
     def set_cr_a(alu, dec2, inp):
         if 'cr_a' in inp:
             yield alu.p.data_i.cr_a.eq(inp['cr_a'])
@@ -156,16 +184,26 @@ class ALUHelpers:
         else:
             yield alu.p.data_i.full_cr.eq(0)
 
-    def get_fast_spr1(res, alu, dec2):
+    def get_slow_spr1(res, alu, dec2):
         spr1_valid = yield alu.n.data_o.spr1.ok
         if spr1_valid:
             res['spr1'] = yield alu.n.data_o.spr1.data
 
-    def get_fast_spr2(res, alu, dec2):
+    def get_slow_spr2(res, alu, dec2):
         spr2_valid = yield alu.n.data_o.spr2.ok
         if spr2_valid:
             res['spr2'] = yield alu.n.data_o.spr2.data
 
+    def get_fast_spr1(res, alu, dec2):
+        spr1_valid = yield alu.n.data_o.fast1.ok
+        if spr1_valid:
+            res['fast1'] = yield alu.n.data_o.fast1.data
+
+    def get_fast_spr2(res, alu, dec2):
+        spr2_valid = yield alu.n.data_o.fast2.ok
+        if spr2_valid:
+            res['fast2'] = yield alu.n.data_o.fast2.data
+
     def get_cia(res, alu, dec2):
         res['cia'] = yield alu.p.data_i.cia
 
@@ -195,20 +233,29 @@ class ALUHelpers:
             res['cr_a'] = yield alu.n.data_o.cr0.data
 
     def get_xer_so(res, alu, dec2):
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
-        if oe and oe_ok:
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_out = yield dec2.e.xer_out
+        if not (yield alu.n.data_o.xer_so.ok):
+            return
+        if xer_out or (oe and oe_ok):
             res['xer_so'] = yield alu.n.data_o.xer_so.data[0]
 
     def get_xer_ov(res, alu, dec2):
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
-        if oe and oe_ok:
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_out = yield dec2.e.xer_out
+        if not (yield alu.n.data_o.xer_ov.ok):
+            return
+        if xer_out or (oe and oe_ok):
             res['xer_ov'] = yield alu.n.data_o.xer_ov.data
 
     def get_xer_ca(res, alu, dec2):
-        cry_out = yield dec2.e.output_carry
-        if cry_out:
+        cry_out = yield dec2.e.do.output_carry
+        xer_out = yield dec2.e.xer_out
+        if not (yield alu.n.data_o.xer_ca.ok):
+            return
+        if xer_out or (cry_out):
             res['xer_ca'] = yield alu.n.data_o.xer_ca.data
 
     def get_sim_int_o(res, sim, dec2):
@@ -233,37 +280,94 @@ class ALUHelpers:
         ok = yield dec2.e.write_fast2.ok
         if ok:
             spr_num = yield dec2.e.write_fast2.data
-            spr_name = spr_dict[spr_num]
-            res['spr2'] = sim.spr[spr_name]
+            spr_num = fast_reg_to_spr(spr_num)
+            spr_name = spr_dict[spr_num].SPR
+            res['fast2'] = sim.spr[spr_name].value
 
     def get_wr_fast_spr1(res, sim, dec2):
         ok = yield dec2.e.write_fast1.ok
         if ok:
             spr_num = yield dec2.e.write_fast1.data
-            spr_name = spr_dict[spr_num]
-            res['spr1'] = sim.spr[spr_name]
+            spr_num = fast_reg_to_spr(spr_num)
+            spr_name = spr_dict[spr_num].SPR
+            res['fast1'] = sim.spr[spr_name].value
+
+    def get_wr_slow_spr1(res, sim, dec2):
+        ok = yield dec2.e.write_spr.ok
+        if ok:
+            spr_num = yield dec2.e.write_spr.data
+            spr_name = spr_dict[spr_num].SPR
+            res['spr1'] = sim.spr[spr_name].value
 
     def get_wr_sim_xer_ca(res, sim, dec2):
-        cry_out = yield dec2.e.output_carry
-        if cry_out:
+        #if not (yield alu.n.data_o.xer_ca.ok):
+        #    return
+        cry_out = yield dec2.e.do.output_carry
+        xer_out = yield dec2.e.xer_out
+        if cry_out or xer_out:
             expected_carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
             expected_carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
             res['xer_ca'] = expected_carry | (expected_carry32 << 1)
 
+    def get_wr_sim_xer_ov(res, sim, alu, dec2):
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_out = yield dec2.e.xer_out
+        print ("get_wr_sim_xer_ov", xer_out)
+        if not (yield alu.n.data_o.xer_ov.ok):
+            return
+        if xer_out or (oe and oe_ok):
+            expected_ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
+            expected_ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
+            res['xer_ov'] = expected_ov | (expected_ov32 << 1)
+
+    def get_wr_sim_xer_so(res, sim, alu, dec2):
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_out = yield dec2.e.xer_out
+        if not (yield alu.n.data_o.xer_so.ok):
+            return
+        if xer_out or (oe and oe_ok):
+            res['xer_so'] = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+
     def get_sim_xer_ov(res, sim, dec2):
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
-        if oe and oe_ok:
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_in = yield dec2.e.xer_in
+        print ("get_sim_xer_ov", xer_in)
+        if xer_in or (oe and oe_ok):
             expected_ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
             expected_ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
             res['xer_ov'] = expected_ov | (expected_ov32 << 1)
 
     def get_sim_xer_so(res, sim, dec2):
-        oe = yield dec2.e.oe.oe
-        oe_ok = yield dec2.e.oe.ok
-        if oe and oe_ok:
+        oe = yield dec2.e.do.oe.oe
+        oe_ok = yield dec2.e.do.oe.ok
+        xer_in = yield dec2.e.xer_in
+        if xer_in or (oe and oe_ok):
             res['xer_so'] = 1 if sim.spr['XER'][XER_bits['SO']] else 0
 
+    def check_slow_spr1(dut, res, sim_o, msg):
+        if 'spr1' in res:
+            expected = sim_o['spr1']
+            alu_out = res['spr1']
+            print(f"expected {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
+    def check_fast_spr1(dut, res, sim_o, msg):
+        if 'fast1' in res:
+            expected = sim_o['fast1']
+            alu_out = res['fast1']
+            print(f"expected {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
+    def check_fast_spr2(dut, res, sim_o, msg):
+        if 'fast2' in res:
+            expected = sim_o['fast2']
+            alu_out = res['fast2']
+            print(f"expected {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
     def check_int_o1(dut, res, sim_o, msg):
         if 'o1' in res:
             expected = sim_o['o1']
@@ -275,6 +379,20 @@ class ALUHelpers:
         if 'o' in res:
             expected = sim_o['o']
             alu_out = res['o']
+            print(f"expected int sim {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
+    def check_msr(dut, res, sim_o, msg):
+        if 'msr' in res:
+            expected = sim_o['msr']
+            alu_out = res['msr']
+            print(f"expected {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
+    def check_nia(dut, res, sim_o, msg):
+        if 'nia' in res:
+            expected = sim_o['nia']
+            alu_out = res['nia']
             print(f"expected {expected:x}, actual: {alu_out:x}")
             dut.assertEqual(expected, alu_out, msg)
 
index 1af54f2cf682539d9f268504e464113e16758eb8..0ddb2823f13243ba8a54aa501b2c641514092980 100644 (file)
@@ -1,5 +1,9 @@
 """Trap Pipeline
 
+Deals with td/tw/tdi/twi as well as mfmsr/mtmsr, sc and rfid. addpcis TODO.
+Also used generally for interrupts (as a micro-coding mechanism) by
+actually modifying the decoded instruction in PowerDecode2.
+
 * https://bugs.libre-soc.org/show_bug.cgi?id=325
 * https://bugs.libre-soc.org/show_bug.cgi?id=344
 * https://libre-soc.org/openpower/isa/fixedtrap/
@@ -15,50 +19,21 @@ from soc.decoder.power_enums import InternalOp
 from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
 
-from soc.decoder.power_decoder2 import (TT_FP, TT_PRIV, TT_TRAP, TT_ADDR)
-
-# Listed in V3.0B Book III Chap 4.2.1
-# MSR bit numbers
-MSR_SF  = (63 - 0)     # Sixty-Four bit mode
-MSR_HV  = (63 - 3)     # Hypervisor state
-MSR_S   = (63 - 41)    # Secure state
-MSR_EE  = (63 - 48)    # External interrupt Enable
-MSR_PR  = (63 - 49)    # PRoblem state
-MSR_FP  = (63 - 50)    # FP available
-MSR_ME  = (63 - 51)    # Machine Check int enable
-MSR_IR  = (63 - 58)    # Instruction Relocation
-MSR_DR  = (63 - 59)    # Data Relocation
-MSR_PMM = (63 - 60)    # Performance Monitor Mark
-MSR_RI  = (63 - 62)    # Recoverable Interrupt
-MSR_LE  = (63 - 63)    # Little Endian
-
-# Listed in V3.0B Book III 7.5.9 "Program Interrupt"
-
-# note that these correspond to trap_input_record.traptype bits 0,1,2,3
-# (TODO: add more?)
-
-PI_FP   = (63 - 43)    # 1 if FP exception
-PI_PRIV = (63 - 45)    # 1 if privileged interrupt
-PI_TRAP = (63 - 46)    # 1 if exception is "trap" type
-PI_ADR  = (63 - 47)    # 0 if SRR0 = address of instruction causing exception
+from soc.decoder.power_decoder2 import (TT_FP, TT_PRIV, TT_TRAP, TT_ADDR,
+                                        TT_ILLEG)
+from soc.consts import MSR, PI
 
 
 def msr_copy(msr_o, msr_i, zero_me=True):
-    """
-    -- ISA says this:
-    --  Defined MSR bits are classified as either full func-
-    --  tion or partial function. Full function MSR bits are
-    --  saved in SRR1 or HSRR1 when an interrupt other
-    --  than a System Call Vectored interrupt occurs and
-    --  restored by rfscv, rfid, or hrfid, while partial func-
-    --  tion MSR bits are not saved or restored.
-    --  Full function MSR bits lie in the range 0:32, 37:41, and
-    --  48:63, and partial function MSR bits lie in the range
-    --  33:36 and 42:47. (Note this is IBM bit numbering).
-    msr_out := (others => '0');
-    msr_out(63 downto 31) := msr(63 downto 31);
-    msr_out(26 downto 22) := msr(26 downto 22);
-    msr_out(15 downto 0)  := msr(15 downto 0);
+    """msr_copy
+    ISA says this:
+    Defined MSR bits are classified as either full func tion or partial
+    function. Full function MSR bits are saved in SRR1 or HSRR1 when
+    an interrupt other than a System Call Vectored interrupt occurs and
+    restored by rfscv, rfid, or hrfid, while partial function MSR bits
+    are not saved or restored.  Full function MSR bits lie in the range
+    0:32, 37:41, and 48:63, and partial function MSR bits lie in the
+    range 33:36 and 42:47. (Note this is IBM bit numbering).
     """
     l = []
     if zero_me:
@@ -72,10 +47,10 @@ def msr_check_pr(m, msr):
     """msr_check_pr: checks "problem state"
     """
     comb = m.d.comb
-    with m.If(msr[MSR_PR]):
-        comb += msr[MSR_EE].eq(1) # set external interrupt bit
-        comb += msr[MSR_IR].eq(1) # set instruction relocation bit
-        comb += msr[MSR_DR].eq(1) # set data relocation bit
+    with m.If(msr[MSR.PR]):
+        comb += msr[MSR.EE].eq(1) # set external interrupt bit
+        comb += msr[MSR.IR].eq(1) # set instruction relocation bit
+        comb += msr[MSR.DR].eq(1) # set data relocation bit
 
 
 class TrapMainStage(PipeModBase):
@@ -85,7 +60,8 @@ class TrapMainStage(PipeModBase):
         self.fields.create_specs()
 
     def trap(self, m, trap_addr, return_addr):
-        """trap """ # TODO add descriptive docstring
+        """trap.  sets new PC, stores MSR and old PC in SRR1 and SRR0
+        """
         comb  = m.d.comb
         msr_i = self.i.msr
         nia_o, srr0_o, srr1_o = self.o.nia, self.o.srr0, self.o.srr1
@@ -177,21 +153,25 @@ class TrapMainStage(PipeModBase):
                     self.trap(m, trapaddr<<4, cia_i)
                     with m.If(traptype == 0):
                         # say trap occurred (see 3.0B Book III 7.5.9)
-                        comb += srr1_o.data[PI_TRAP].eq(1)
+                        comb += srr1_o.data[PI.TRAP].eq(1)
                     with m.If(traptype & TT_PRIV):
-                        comb += srr1_o.data[PI_PRIV].eq(1)
+                        comb += srr1_o.data[PI.PRIV].eq(1)
                     with m.If(traptype & TT_FP):
-                        comb += srr1_o.data[PI_FP].eq(1)
+                        comb += srr1_o.data[PI.FP].eq(1)
                     with m.If(traptype & TT_ADDR):
-                        comb += srr1_o.data[PI_ADR].eq(1)
+                        comb += srr1_o.data[PI.ADR].eq(1)
+                    with m.If(traptype & TT_ILLEG):
+                        comb += srr1_o.data[PI.ILLEG].eq(1)
 
             # move to MSR
-            with m.Case(InternalOp.OP_MTMSRD):
+            with m.Case(InternalOp.OP_MTMSRD, InternalOp.OP_MTMSR):
                 L = self.fields.FormX.L[0:-1] # X-Form field L
+                # start with copy of msr
+                comb += msr_o.eq(msr_i)
                 with m.If(L):
-                    # just update EE and RI
-                    comb += msr_o.data[MSR_EE].eq(a_i[MSR_EE])
-                    comb += msr_o.data[MSR_RI].eq(a_i[MSR_RI])
+                    # just update RI..EE
+                    comb += msr_o.data[MSR.RI].eq(a_i[MSR.RI])
+                    comb += msr_o.data[MSR.EE].eq(a_i[MSR.EE])
                 with m.Else():
                     # Architecture says to leave out bits 3 (HV), 51 (ME)
                     # and 63 (LE) (IBM bit numbering)
@@ -207,8 +187,8 @@ class TrapMainStage(PipeModBase):
                 comb += o.ok.eq(1)
 
             with m.Case(InternalOp.OP_RFID):
-                # XXX f_out.virt_mode <= b_in(MSR_IR) or b_in(MSR_PR);
-                # XXX f_out.priv_mode <= not b_in(MSR_PR);
+                # XXX f_out.virt_mode <= b_in(MSR.IR) or b_in(MSR.PR);
+                # XXX f_out.priv_mode <= not b_in(MSR.PR);
 
                 # return addr was in srr0
                 comb += nia_o.data.eq(br_ext(srr0_i[2:]))
@@ -216,15 +196,26 @@ class TrapMainStage(PipeModBase):
                 # MSR was in srr1
                 comb += msr_copy(msr_o.data, srr1_i, zero_me=False) # don't zero
                 msr_check_pr(m, msr_o.data)
+
+                # hypervisor stuff
+                comb += msr_o.data[MSR.HV].eq(msr_i[MSR.HV] & srr1_i[MSR.HV])
+                comb += msr_o.data[MSR.ME].eq((msr_i[MSR.HV] & srr1_i[MSR.HV]) |
+                                             (~msr_i[MSR.HV] & srr1_i[MSR.HV]))
+                # don't understand but it's in the spec
+                with m.If((msr_i[63-31:63-29] != Const(0b010, 3)) |
+                          (srr1_i[63-31:63-29] != Const(0b000, 3))):
+                    comb += msr_o.data[63-31:63-29].eq(srr1_i[63-31:63-29])
+                with m.Else():
+                    comb += msr_o.data[63-31:63-29].eq(msr_i[63-31:63-29])
                 comb += msr_o.ok.eq(1)
 
-            # TODO (later) - add OP_SC
-            #with m.Case(InternalOp.OP_SC):
-            #    # TODO: scv must generate illegal instruction.  this is
-            #    # the decoder's job, not ours, here.
-            #
-            #    # jump to the trap address, return at cia+4
-            #    self.trap(m, 0xc00, cia_i+4)
+            # OP_SC
+            with m.Case(InternalOp.OP_SC):
+                # TODO: scv must generate illegal instruction.  this is
+                # the decoder's job, not ours, here.
+
+                # jump to the trap address, return at cia+4
+                self.trap(m, 0xc00, cia_i+4)
 
             # TODO (later)
             #with m.Case(InternalOp.OP_ADDPCIS):
index 3b336e2ba6049fb2f32ec7b2d6eeae25cc3d39d9..a43c214c65031f0c8ba99526a75bb95dee462a35 100644 (file)
@@ -5,27 +5,27 @@ from soc.fu.trap.trap_input_record import CompTrapOpSubset
 class TrapInputData(IntegerData):
     regspec = [('INT', 'ra', '0:63'),  # RA
                ('INT', 'rb', '0:63'),  # RB/immediate
-               ('FAST', 'spr1', '0:63'), # SRR0
-               ('FAST', 'spr2', '0:63'), # SRR1
+               ('FAST', 'fast1', '0:63'), # SRR0
+               ('FAST', 'fast2', '0:63'), # SRR1
                ('FAST', 'cia', '0:63'),  # Program counter (current)
                ('FAST', 'msr', '0:63')]  # MSR
     def __init__(self, pspec):
         super().__init__(pspec, False)
         # convenience
-        self.srr0, self.srr1 = self.spr1, self.spr2
+        self.srr0, self.srr1 = self.fast1, self.fast2
         self.a, self.b = self.ra, self.rb
 
 
 class TrapOutputData(IntegerData):
     regspec = [('INT', 'o', '0:63'),     # RA
-               ('FAST', 'spr1', '0:63'), # SRR0 SPR
-               ('FAST', 'spr2', '0:63'), # SRR1 SPR
+               ('FAST', 'fast1', '0:63'), # SRR0 SPR
+               ('FAST', 'fast2', '0:63'), # SRR1 SPR
                ('FAST', 'nia', '0:63'),  # NIA (Next PC)
                ('FAST', 'msr', '0:63')]  # MSR
     def __init__(self, pspec):
         super().__init__(pspec, True)
         # convenience
-        self.srr0, self.srr1 = self.spr1, self.spr2
+        self.srr0, self.srr1 = self.fast1, self.fast2
 
 
 
index 5ab41ee9ead831f4e19036f9a0c451bd36841235..25514730c75c5cba8fafa48aa76a38ddbccd9d0f 100644 (file)
@@ -26,6 +26,7 @@ def get_cu_inputs(dec2, sim):
     yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
     yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
     yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2) # SPR1
+    yield from ALUHelpers.get_sim_fast_spr2(res, sim, dec2) # SPR2
     ALUHelpers.get_sim_cia(res, sim, dec2) # PC
     ALUHelpers.get_sim_msr(res, sim, dec2) # MSR
 
@@ -43,6 +44,8 @@ def set_alu_inputs(alu, dec2, sim):
     inp = yield from get_cu_inputs(dec2, sim)
     yield from ALUHelpers.set_int_ra(alu, dec2, inp)
     yield from ALUHelpers.set_int_rb(alu, dec2, inp)
+    yield from ALUHelpers.set_fast_spr1(alu, dec2, inp) # SPR1
+    yield from ALUHelpers.set_fast_spr2(alu, dec2, inp) # SPR1
 
     yield from ALUHelpers.set_cia(alu, dec2, inp)
     yield from ALUHelpers.set_msr(alu, dec2, inp)
@@ -104,6 +107,25 @@ class TrapTestCase(FHDLTestCase):
             initial_regs[2] = 1
             self.run_tst_program(Program(lst), initial_regs)
 
+    def test_3_mtmsr_0(self):
+        lst = ["mtmsr 1,0"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0xffffffffffffffff
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_3_mtmsr_1(self):
+        lst = ["mtmsr 1,1"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0xffffffffffffffff
+        self.run_tst_program(Program(lst), initial_regs)
+
+    def test_999_illegal(self):
+        # ok, um this is a bit of a cheat: use an instruction we know
+        # is not implemented by either ISACaller or the core
+        lst = ["tbegin."]
+        initial_regs = [0] * 32
+        self.run_tst_program(Program(lst), initial_regs)
+
     def test_ilang(self):
         pspec = TrapPipeSpec(id_wid=2)
         alu = TrapBasePipe(pspec)
@@ -163,7 +185,7 @@ class TestRunner(FHDLTestCase):
                     yield pdecode2.dec.bigendian.eq(0)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
-                    fn_unit = yield pdecode2.e.fn_unit
+                    fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.TRAP.value)
                     yield from set_alu_inputs(alu, pdecode2, sim)
                     yield
@@ -188,7 +210,7 @@ class TestRunner(FHDLTestCase):
 
     def check_alu_outputs(self, alu, dec2, sim, code):
 
-        rc = yield dec2.e.rc.data
+        rc = yield dec2.e.do.rc.data
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
@@ -208,17 +230,18 @@ class TestRunner(FHDLTestCase):
         print ("output", res)
 
         yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
-        yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
-        yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
-        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
-        ALUHelpers.get_sim_cia(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr2(sim_o, sim, dec2)
+        ALUHelpers.get_sim_nia(sim_o, sim, dec2)
         ALUHelpers.get_sim_msr(sim_o, sim, dec2)
 
-        ALUHelpers.check_cr_a(self, res, sim_o, "CR%d %s" % (cridx, code))
-        ALUHelpers.check_xer_ov(self, res, sim_o, code)
-        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        print ("sim output", sim_o)
+
         ALUHelpers.check_int_o(self, res, sim_o, code)
-        ALUHelpers.check_xer_so(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr1(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr2(self, res, sim_o, code)
+        ALUHelpers.check_nia(self, res, sim_o, code)
+        ALUHelpers.check_msr(self, res, sim_o, code)
 
 
 if __name__ == "__main__":
index d15dca7eb7a7ea2f1fd1f2ece577df26818574b3..9c9a53c4bc1ab9157dc3da13719bf5566f64082b 100644 (file)
@@ -15,7 +15,7 @@ class CompTrapOpSubset(Record):
                   ('fn_unit', Function),
                   ('insn', 32),
                   ('is_32bit', 1),
-                  ('traptype', 4), # see trap main_stage.py and PowerDecoder2
+                  ('traptype', 5), # see trap main_stage.py and PowerDecoder2
                   ('trapaddr', 13),
                   )
 
@@ -34,7 +34,7 @@ class CompTrapOpSubset(Record):
         """
         res = []
         for fname, sig in self.fields.items():
-            eqfrom = other.fields[fname]
+            eqfrom = other.do.fields[fname]
             res.append(sig.eq(eqfrom))
         return res
 
diff --git a/src/soc/regfile/formal/.gitignore b/src/soc/regfile/formal/.gitignore
new file mode 100644 (file)
index 0000000..a51e080
--- /dev/null
@@ -0,0 +1 @@
+proof*
index 7ed51a8a11f54cfdeefdc2157d2dbdb4ff51dc90..74c283f3e036c8c79545795a49bc15b37088af3f 100644 (file)
@@ -70,13 +70,13 @@ class FastRegs(RegFileArray):
         super().__init__(64, 8)
         self.w_ports = {'nia': self.write_port("nia"),
                         'msr': self.write_port("dest2"),
-                        'spr1': self.write_port("dest3"),
-                        'spr2': self.write_port("dest4"),
+                        'fast1': self.write_port("dest3"),
+                        'fast2': self.write_port("dest4"),
                         'd_wr1': self.write_port("d_wr1")}
         self.r_ports = {'cia': self.read_port("src1"),
                         'msr': self.read_port("src2"),
-                        'spr1': self.read_port("src3"),
-                        'spr2': self.read_port("src4"),
+                        'fast1': self.read_port("src3"),
+                        'fast2': self.read_port("src4"),
                         'd_rd1': self.read_port("d_rd1")}
 
 
@@ -136,8 +136,8 @@ class SPRRegs(RegFile):
     def __init__(self):
         n_sprs = len(SPR)
         super().__init__(64, n_sprs)
-        self.w_ports = {'spr': self.write_port(name="dest")}
-        self.r_ports = {'spr': self.read_port("src")}
+        self.w_ports = {'spr1': self.write_port(name="dest")}
+        self.r_ports = {'spr1': self.read_port("src")}
 
 
 # class containing all regfiles: int, cr, xer, fast, spr
index f27d711e417e0fd990cd42decbd5988497c979f3..1587e550d0a445406bee600cb2215af16044b5b9 100644 (file)
@@ -8,3 +8,7 @@ def fast_reg_to_spr(spr_num):
         return SPR.LR.value
     elif spr_num == FastRegs.TAR:
         return SPR.TAR.value
+    elif spr_num == FastRegs.SRR0:
+        return SPR.SRR0.value
+    elif spr_num == FastRegs.SRR1:
+        return SPR.SRR1.value
index 8e4ff93fd281867f23ad46f2414d32b534f2098d..f5dd58e352d336285c516d559d6bb7e0fd087083 100644 (file)
@@ -32,6 +32,7 @@ from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.decode2execute1 import Data
 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
 from soc.config.test.test_loadstore import TestMemPspec
+from soc.decoder.power_enums import InternalOp
 import operator
 
 
@@ -70,7 +71,7 @@ class NonProductionCore(Elaboratable):
         self.pdecode2 = PowerDecode2(pdecode)   # instruction decoder
 
         # issue/valid/busy signalling
-        self.ivalid_i = self.pdecode2.e.valid   # instruction is valid
+        self.ivalid_i = self.pdecode2.valid   # instruction is valid
         self.issue_i = Signal(reset_less=True)
         self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
@@ -78,6 +79,11 @@ class NonProductionCore(Elaboratable):
         self.bigendian_i = self.pdecode2.dec.bigendian
         self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
 
+        # start/stop and terminated signalling
+        self.core_start_i = Signal(reset_less=True)
+        self.core_stop_i = Signal(reset_less=True)
+        self.core_terminated_o = Signal(reset=1) # indicates stopped
+
     def elaborate(self, platform):
         m = Module()
 
@@ -88,13 +94,24 @@ class NonProductionCore(Elaboratable):
         regs = self.regs
         fus = self.fus.fus
 
-        fu_bitdict = self.connect_instruction(m)
+        # core start/stopped state
+        core_stopped = Signal(reset=1) # begins in stopped state
+
+        # start/stop signalling
+        with m.If(self.core_start_i):
+            m.d.sync += core_stopped.eq(0)
+        with m.If(self.core_stop_i):
+            m.d.sync += core_stopped.eq(1)
+        m.d.comb += self.core_terminated_o.eq(core_stopped)
+
+        # connect up Function Units, then read/write ports
+        fu_bitdict = self.connect_instruction(m, core_stopped)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
         return m
 
-    def connect_instruction(self, m):
+    def connect_instruction(self, m, core_stopped):
         comb, sync = m.d.comb, m.d.sync
         fus = self.fus.fus
         dec2 = self.pdecode2
@@ -105,18 +122,41 @@ class NonProductionCore(Elaboratable):
         for i, funame in enumerate(fus.keys()):
             fu_bitdict[funame] = fu_enable[i]
 
-        # connect up instructions.  only one is enabled at any given time
-        for funame, fu in fus.items():
-            fnunit = fu.fnunit.value
-            enable = Signal(name="en_%s" % funame, reset_less=True)
-            comb += enable.eq(self.ivalid_i & (dec2.e.fn_unit & fnunit).bool())
-            with m.If(enable):
-                comb += fu.oper_i.eq_from_execute1(dec2.e)
-                comb += fu.issue_i.eq(self.issue_i)
-                comb += self.busy_o.eq(fu.busy_o)
-                rdmask = dec2.rdflags(fu)
-                comb += fu.rdmaskn.eq(~rdmask)
-            comb += fu_bitdict[funame].eq(enable)
+        # only run when allowed and when instruction is valid
+        can_run = Signal(reset_less=True)
+        comb += can_run.eq(self.ivalid_i & ~core_stopped)
+
+        # sigh - need a NOP counter
+        counter = Signal(2)
+        with m.If(counter != 0):
+            sync += counter.eq(counter - 1)
+        comb += self.busy_o.eq(counter != 0)
+
+        # check for ATTN: halt if true
+        with m.If(self.ivalid_i & (dec2.e.do.insn_type == InternalOp.OP_ATTN)):
+            m.d.sync += core_stopped.eq(1)
+
+        with m.Elif(self.ivalid_i & (dec2.e.do.insn_type == InternalOp.OP_NOP)):
+            sync += counter.eq(2)
+            comb += self.busy_o.eq(1)
+
+        with m.Else():
+            # connect up instructions.  only one is enabled at any given time
+            for funame, fu in fus.items():
+                fnunit = fu.fnunit.value
+                enable = Signal(name="en_%s" % funame, reset_less=True)
+                comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool() & can_run)
+
+                # run this FunctionUnit if enabled, except if the instruction
+                # is "attn" in which case we HALT.
+                with m.If(enable):
+                    # route operand, issue, busy, read flags and mask to FU
+                    comb += fu.oper_i.eq_from_execute1(dec2.e)
+                    comb += fu.issue_i.eq(self.issue_i)
+                    comb += self.busy_o.eq(fu.busy_o)
+                    rdmask = dec2.rdflags(fu)
+                    comb += fu.rdmaskn.eq(~rdmask)
+                    comb += fu_bitdict[funame].eq(enable)
 
         return fu_bitdict
 
index 0ba749970e089fb0e82a9782b31fd33674b70967..fc0bbd0bb894e28098b3d74d9df974f8c76ce829 100644 (file)
@@ -24,6 +24,7 @@ from soc.regfile.regfiles import FastRegs
 from soc.simple.core import NonProductionCore
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.ifetch import ConfigFetchUnit
+from soc.decoder.power_enums import InternalOp
 
 
 class TestIssuer(Elaboratable):
@@ -70,14 +71,14 @@ class TestIssuer(Elaboratable):
 
         # PC and instruction from I-Memory
         current_insn = Signal(32) # current fetched instruction (note sync)
-        current_pc = Signal(64) # current PC (note it is reset/sync)
+        cur_pc = Signal(64) # current PC (note it is reset/sync)
         pc_changed = Signal() # note write to PC
-        comb += self.pc_o.eq(current_pc)
+        comb += self.pc_o.eq(cur_pc)
         ilatch = Signal(32)
 
         # next instruction (+4 on current)
         nia = Signal(64, reset_less=True)
-        comb += nia.eq(current_pc + 4)
+        comb += nia.eq(cur_pc + 4)
 
         # temporaries
         core_busy_o = core.busy_o         # core is busy
@@ -86,65 +87,67 @@ class TestIssuer(Elaboratable):
         core_be_i = core.bigendian_i      # bigendian mode
         core_opcode_i = core.raw_opcode_i # raw opcode
 
-        # actually use a nmigen FSM for the first time (w00t)
-        with m.FSM() as fsm:
-
-            # waiting (zzz)
-            with m.State("IDLE"):
-                sync += pc_changed.eq(0)
-                with m.If(self.go_insn_i):
-                    # instruction allowed to go: start by reading the PC
-                    pc = Signal(64, reset_less=True)
-                    with m.If(self.pc_i.ok):
-                        # incoming override (start from pc_i)
-                        comb += pc.eq(self.pc_i.data)
+        insn_type = core.pdecode2.e.do.insn_type
+
+        # only run if not in halted state
+        with m.If(~core.core_terminated_o):
+
+            # actually use a nmigen FSM for the first time (w00t)
+            with m.FSM() as fsm:
+
+                # waiting (zzz)
+                with m.State("IDLE"):
+                    sync += pc_changed.eq(0)
+                    with m.If(self.go_insn_i):
+                        # instruction allowed to go: start by reading the PC
+                        pc = Signal(64, reset_less=True)
+                        with m.If(self.pc_i.ok):
+                            # incoming override (start from pc_i)
+                            comb += pc.eq(self.pc_i.data)
+                        with m.Else():
+                            # otherwise read FastRegs regfile for PC
+                            comb += self.fast_rd1.ren.eq(1<<FastRegs.PC)
+                            comb += pc.eq(self.fast_rd1.data_o)
+                        # capture the PC and also drop it into Insn Memory
+                        # we have joined a pair of combinatorial memory
+                        # lookups together.  this is Generally Bad.
+                        comb += self.imem.a_pc_i.eq(pc)
+                        comb += self.imem.a_valid_i.eq(1)
+                        comb += self.imem.f_valid_i.eq(1)
+                        sync += cur_pc.eq(pc)
+                        m.next = "INSN_READ" # move to "wait for bus" phase
+
+                # waiting for instruction bus (stays there until not busy)
+                with m.State("INSN_READ"):
+                    with m.If(self.imem.f_busy_o): # zzz...
+                        # busy: stay in wait-read
+                        comb += self.imem.a_valid_i.eq(1)
+                        comb += self.imem.f_valid_i.eq(1)
                     with m.Else():
-                        # otherwise read FastRegs regfile for PC
-                        comb += self.fast_rd1.ren.eq(1<<FastRegs.PC)
-                        comb += pc.eq(self.fast_rd1.data_o)
-                    # capture the PC and also drop it into Insn Memory
-                    # we have joined a pair of combinatorial memory
-                    # lookups together.  this is Generally Bad.
-                    comb += self.imem.a_pc_i.eq(pc)
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
-                    sync += current_pc.eq(pc)
-                    m.next = "INSN_READ" # move to "wait for bus" phase
-
-            # waiting for instruction bus (stays there until not busy)
-            with m.State("INSN_READ"):
-                with m.If(self.imem.f_busy_o): # zzz...
-                    # busy: stay in wait-read
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
-                with m.Else():
-                    # not busy: instruction fetched
-                    insn = self.imem.f_instr_o.word_select(current_pc[2], 32)
-                    comb += current_insn.eq(insn)
-                    comb += core_ivalid_i.eq(1) # say instruction is valid
-                    comb += core_issue_i.eq(1)  # and issued (ivalid redundant)
-                    comb += core_be_i.eq(0)     # little-endian mode
-                    comb += core_opcode_i.eq(current_insn) # actual opcode
-                    sync += ilatch.eq(current_insn)
-                    m.next = "INSN_ACTIVE" # move to "wait for completion" phase
-
-            # instruction started: must wait till it finishes
-            with m.State("INSN_ACTIVE"):
-                comb += core_ivalid_i.eq(1) # say instruction is valid
-                comb += core_opcode_i.eq(ilatch) # actual opcode
-                #sync += core_issue_i.eq(0) # issue raises for only one cycle
-                with m.If(self.fast_nia.wen):
-                    sync += pc_changed.eq(1)
-                with m.If(~core_busy_o): # instruction done!
-                    #sync += core_ivalid_i.eq(0) # say instruction is invalid
-                    #sync += core_opcode_i.eq(0) # clear out (no good reason)
-                    # ok here we are not reading the branch unit.  TODO
-                    # this just blithely overwrites whatever pipeline updated
-                    # the PC
-                    with m.If(~pc_changed):
-                        comb += self.fast_wr1.wen.eq(1<<FastRegs.PC)
-                        comb += self.fast_wr1.data_i.eq(nia)
-                    m.next = "IDLE" # back to idle
+                        # not busy: instruction fetched
+                        insn = self.imem.f_instr_o.word_select(cur_pc[2], 32)
+                        comb += current_insn.eq(insn)
+                        comb += core_ivalid_i.eq(1) # instruction is valid
+                        comb += core_issue_i.eq(1)  # and issued 
+                        comb += core_opcode_i.eq(current_insn) # actual opcode
+                        sync += ilatch.eq(current_insn)
+                        m.next = "INSN_ACTIVE" # move to "wait completion" 
+
+                # instruction started: must wait till it finishes
+                with m.State("INSN_ACTIVE"):
+                    with m.If(insn_type != InternalOp.OP_NOP):
+                        comb += core_ivalid_i.eq(1) # say instruction is valid
+                    comb += core_opcode_i.eq(ilatch) # actual opcode
+                    with m.If(self.fast_nia.wen):
+                        sync += pc_changed.eq(1)
+                    with m.If(~core_busy_o): # instruction done!
+                        # ok here we are not reading the branch unit.  TODO
+                        # this just blithely overwrites whatever pipeline
+                        # updated the PC
+                        with m.If(~pc_changed):
+                            comb += self.fast_wr1.wen.eq(1<<FastRegs.PC)
+                            comb += self.fast_wr1.data_i.eq(nia)
+                        m.next = "IDLE" # back to idle
 
         return m
 
@@ -162,6 +165,8 @@ class TestIssuer(Elaboratable):
 
 if __name__ == '__main__':
     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
+             'spr': 1,
+             'mul': 1,
              'shiftrot': 1}
     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
                          imem_ifacetype='bare_wb',
index 322661e7ca4ba21c798b971f44da7f6f58151f67..a341cfe5fdbf5ab1940e81b63a8434a3f91d0f9e 100644 (file)
@@ -14,7 +14,7 @@ from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.isa.all import ISA
 from soc.decoder.power_enums import Function, XER_bits
-
+from soc.config.test.test_loadstore import TestMemPspec
 
 from soc.simple.core import NonProductionCore
 from soc.experiment.compalu_multi import find_ok # hack
@@ -74,8 +74,8 @@ def setup_regs(core, test):
     so = yield xregs.regs[xregs.SO].reg
     ov = yield xregs.regs[xregs.OV].reg
     ca = yield xregs.regs[xregs.CA].reg
-    oe = yield pdecode2.e.oe.oe
-    oe_ok = yield pdecode2.e.oe.oe_ok
+    oe = yield pdecode2.e.do.oe.oe
+    oe_ok = yield pdecode2.e.do.oe.oe_ok
 
     print ("before: so/ov-32/ca-32", so, bin(ov), bin(ca))
     print ("oe:", oe, oe_ok)
@@ -132,9 +132,11 @@ def check_regs(dut, sim, core, test, code):
 def wait_for_busy_hi(cu):
     while True:
         busy_o = yield cu.busy_o
-        if busy_o:
+        terminated_o = yield cu.core_terminated_o
+        if busy_o or terminated_o:
+            print("busy/terminated:", busy_o, terminated_o)
             break
-        print("!busy",)
+        print("!busy", busy_o, terminated_o)
         yield
 
 def set_issue(core, dec2, sim):
@@ -147,7 +149,9 @@ def set_issue(core, dec2, sim):
 def wait_for_busy_clear(cu):
     while True:
         busy_o = yield cu.busy_o
-        if not busy_o:
+        terminated_o = yield cu.core_terminated_o
+        if not busy_o or terminated_o:
+            print("busy/terminated:", busy_o, terminated_o)
             break
         print("busy",)
         yield
@@ -164,7 +168,13 @@ class TestRunner(FHDLTestCase):
         instruction = Signal(32)
         ivalid_i = Signal()
 
-        m.submodules.core = core = NonProductionCore()
+        pspec = TestMemPspec(ldst_ifacetype='testpi',
+                             imem_ifacetype='',
+                             addr_wid=48,
+                             mask_wid=8,
+                             reg_wid=64)
+
+        m.submodules.core = core = NonProductionCore(pspec)
         pdecode2 = core.pdecode2
         l0 = core.l0
 
index 028872accfbc3bc1d5548ec944d623adf19c7c2c..e0c404faa968746dd4c05916950c698580497ba1 100644 (file)
@@ -30,28 +30,37 @@ from soc.fu.logical.test.test_pipe_caller import LogicalTestCase
 from soc.fu.shift_rot.test.test_pipe_caller import ShiftRotTestCase
 from soc.fu.cr.test.test_pipe_caller import CRTestCase
 from soc.fu.branch.test.test_pipe_caller import BranchTestCase
+from soc.fu.spr.test.test_pipe_caller import SPRTestCase
 from soc.fu.ldst.test.test_pipe_caller import LDSTTestCase
-from soc.simulator.test_sim import GeneralTestCases
+from soc.simulator.test_sim import (GeneralTestCases, AttnTestCase)
 
 
 def setup_i_memory(imem, startaddr, instructions):
     mem = imem
-    print ("insn before, init mem", mem.depth, mem.width, mem)
+    print ("insn before, init mem", mem.depth, mem.width, mem,
+                                    len(instructions))
     for i in range(mem.depth):
         yield mem._array[i].eq(0)
     yield Settle()
     startaddr //= 4 # instructions are 32-bit
     mask = ((1<<64)-1)
-    for insn, code in instructions:
+    for ins in instructions:
+        if isinstance(ins, tuple):
+            insn, code = ins
+        else:
+            insn, code = ins, ''
         msbs = (startaddr>>1) & mask
         val = yield mem._array[msbs]
-        print ("before set", hex(startaddr), hex(msbs), hex(val))
+        if insn != 0:
+            print ("before set", hex(4*startaddr),
+                                 hex(msbs), hex(val), hex(insn))
         lsb = 1 if (startaddr & 1) else 0
         val = (val | (insn << (lsb*32))) & mask
         yield mem._array[msbs].eq(val)
         yield Settle()
-        print ("after  set", hex(startaddr), hex(msbs), hex(val))
-        print ("instr: %06x 0x%x %s %08x" % (4*startaddr, insn, code, val))
+        if insn != 0:
+            print ("after  set", hex(4*startaddr), hex(msbs), hex(val))
+            print ("instr: %06x 0x%x %s %08x" % (4*startaddr, insn, code, val))
         startaddr += 1
         startaddr = startaddr & mask
 
@@ -88,6 +97,13 @@ class TestRunner(FHDLTestCase):
         def process():
 
             for test in self.test_data:
+
+                # get core going
+                yield core.core_start_i.eq(1)
+                yield
+                yield core.core_start_i.eq(0)
+                yield Settle()
+
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
@@ -126,11 +142,16 @@ class TestRunner(FHDLTestCase):
                     yield
                     yield issuer.pc_i.ok.eq(0) # don't change PC from now on
                     yield go_insn_i.eq(0)      # and don't issue a new insn
+                    yield Settle()
 
                     # wait until executed
-                    yield from wait_for_busy_hi(core)
+                    #yield from wait_for_busy_hi(core)
+                    yield
                     yield from wait_for_busy_clear(core)
 
+                    terminated = yield core.core_terminated_o
+                    print ("terminated", terminated)
+
                     print ("sim", code)
                     # call simulated operation
                     opname = code.split(' ')[0]
@@ -144,6 +165,10 @@ class TestRunner(FHDLTestCase):
                     # Memory check
                     yield from check_sim_memory(self, l0, sim, code)
 
+                    terminated = yield core.core_terminated_o
+                    if terminated:
+                        break
+
         sim.add_sync_process(process)
         with sim.write_vcd("issuer_simulator.vcd",
                             traces=[]):
@@ -153,6 +178,7 @@ class TestRunner(FHDLTestCase):
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
+    suite.addTest(TestRunner(AttnTestCase.test_data))
     suite.addTest(TestRunner(GeneralTestCases.test_data))
     suite.addTest(TestRunner(LDSTTestCase.test_data))
     suite.addTest(TestRunner(CRTestCase.test_data))
@@ -160,6 +186,7 @@ if __name__ == "__main__":
     suite.addTest(TestRunner(LogicalTestCase.test_data))
     suite.addTest(TestRunner(ALUTestCase.test_data))
     suite.addTest(TestRunner(BranchTestCase.test_data))
+    suite.addTest(TestRunner(SPRTestCase.test_data))
 
     runner = unittest.TextTestRunner()
     runner.run(suite)
diff --git a/src/soc/simple/test/test_microwatt.py b/src/soc/simple/test/test_microwatt.py
new file mode 100644 (file)
index 0000000..a737489
--- /dev/null
@@ -0,0 +1,155 @@
+from soc.simulator.program import Program
+from soc.fu.test.common import TestCase
+
+import unittest
+
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmutil.formaltest import FHDLTestCase
+
+from soc.simple.issuer import TestIssuer
+
+from soc.config.test.test_loadstore import TestMemPspec
+from soc.simple.test.test_core import (setup_regs, check_regs,
+                                       wait_for_busy_clear,
+                                       wait_for_busy_hi)
+from soc.fu.compunits.test.test_compunit import (setup_test_memory,
+                                                 check_sim_memory)
+
+from soc.simple.test.test_issuer import setup_i_memory
+
+import sys
+sys.setrecursionlimit(10**6)
+
+
+class BinaryTestCase(FHDLTestCase):
+    test_data = []
+
+    def __init__(self, name="general"):
+        super().__init__(name)
+        self.test_name = name
+
+    @unittest.skip("a bit big")
+    def test_binary(self):
+        with Program("1.bin") as program:
+            self.run_tst_program(program)
+
+    def test_binary(self):
+        with Program("hello_world.bin") as program:
+            self.run_tst_program(program)
+
+    def run_tst_program(self, prog):
+        initial_regs = [0] * 32
+        tc = TestCase(prog, self.test_name, initial_regs, None, 0,
+                                            None, 0,
+                      do_sim=False)
+        self.test_data.append(tc)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, tst_data):
+        super().__init__("binary_runner")
+        self.test_data = tst_data
+
+    def binary_runner(self):
+        m = Module()
+        comb = m.d.comb
+        go_insn_i = Signal()
+        pc_i = Signal(32)
+
+        pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
+                             imem_ifacetype='test_bare_wb',
+                             addr_wid=48,
+                             mask_wid=8,
+                             reg_wid=64,
+                             imem_test_depth=32768,
+                             dmem_test_depth=32768)
+        m.submodules.issuer = issuer = TestIssuer(pspec)
+        imem = issuer.imem._get_memory()
+        core = issuer.core
+        pdecode2 = core.pdecode2
+        l0 = core.l0
+
+        comb += issuer.pc_i.data.eq(pc_i)
+        comb += issuer.go_insn_i.eq(go_insn_i)
+
+        # nmigen Simulation
+        sim = Simulator(m)
+        sim.add_clock(1e-6)
+
+        def process():
+
+            for test in self.test_data:
+
+                # get core going
+                yield core.bigendian_i.eq(1)
+                yield core.core_start_i.eq(1)
+                yield
+                yield core.core_start_i.eq(0)
+                yield Settle()
+
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                print ("regs", test.regs)
+                print ("sprs", test.sprs)
+                print ("cr", test.cr)
+                print ("mem", test.mem)
+                print ("msr", test.msr)
+                print ("assem", program.assembly)
+                instructions = list(program.generate_instructions())
+
+                print ("instructions", len(instructions))
+
+                pc = 0 # start of memory
+
+                yield from setup_i_memory(imem, pc, instructions)
+                #yield from setup_test_memory(l0, sim)
+                yield from setup_regs(core, test)
+
+                yield pc_i.eq(pc)
+                yield issuer.pc_i.ok.eq(1)
+
+                while True:
+
+                    # start the instruction
+                    yield go_insn_i.eq(1)
+                    yield
+                    yield issuer.pc_i.ok.eq(0) # don't change PC from now on
+                    yield go_insn_i.eq(0)      # and don't issue a new insn
+                    yield from wait_for_busy_hi(core)
+                    yield Settle()
+
+                    # wait until executed
+                    ins = yield core.raw_opcode_i
+                    pc = yield issuer.pc_o
+                    print("instruction: 0x%x @ %x" % (ins & 0xffffffff, pc))
+                    yield from wait_for_busy_clear(core)
+
+                    terminated = yield core.core_terminated_o
+                    print ("terminated", terminated)
+
+                    terminated = yield core.core_terminated_o
+                    if terminated:
+                        break
+
+            # register check
+            #yield from check_regs(self, sim, core, test, code)
+
+            # Memory check
+            #yield from check_sim_memory(self, l0, sim, code)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("binary_issuer_simulator.vcd",
+                            traces=[]):
+            sim.run()
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(BinaryTestCase.test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
index fdcea64201580ab7432f767f6a16c1c6d9580e7a..4cf9b9ffd52778e2a971178121f8979679b690b6 100644 (file)
@@ -20,10 +20,15 @@ obj_fmt = "-be"
 
 class Program:
     def __init__(self, instructions):
-        if isinstance(instructions, list):
-            instructions = '\n'.join(instructions)
-        self.assembly = instructions + '\n' # plus final newline
-        self._assemble()
+        if isinstance(instructions, str): # filename
+            self.binfile = open(instructions, "rb")
+            self.assembly = '' # noo disassemble number fiiive
+            print ("program", self.binfile)
+        else:
+            if isinstance(instructions, list):
+                instructions = '\n'.join(instructions)
+            self.assembly = instructions + '\n' # plus final newline
+            self._assemble()
         self._instructions = list(self._get_instructions())
 
     def __enter__(self):
index c265a77ffa2b16ec30487a04b86cd1fc3b26457b..b39ada05d07d1bb4879ec3f93bbdba9b400e1c79 100644 (file)
@@ -112,6 +112,8 @@ def run_program(program, initial_mem=None):
     q.delete_breakpoint()
     # run to completion
     q.break_address(0x20000000 + program.size())
+    # or to trap
+    q.break_address(0x700)
     q.gdb_continue()
     return q
 
diff --git a/src/soc/simulator/test_mul_sim.py b/src/soc/simulator/test_mul_sim.py
new file mode 100644 (file)
index 0000000..a3db0f1
--- /dev/null
@@ -0,0 +1,54 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.test.utils import FHDLTestCase
+import unittest
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_enums import (Function, InternalOp,
+                                     In1Sel, In2Sel, In3Sel,
+                                     OutSel, RC, LdstLen, CryIn,
+                                     single_bit_flags, Form, SPR,
+                                     get_signal_name, get_csv)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.simulator.program import Program
+from soc.simulator.qemu import run_program
+from soc.decoder.isa.all import ISA
+from soc.fu.test.common import TestCase
+from soc.simulator.test_sim import DecoderBase
+
+
+
+class MulTestCases(FHDLTestCase):
+    test_data = []
+
+    def __init__(self, name="div"):
+        super().__init__(name)
+        self.test_name = name
+
+    def tst_mullw(self):
+        lst = ["addi 1, 0, 0x5678",
+               "addi 2, 0, 0x1234",
+               "mullw 3, 1, 2"]
+        self.run_tst_program(Program(lst), [3])
+
+    def test_mullwo(self):
+        lst = ["addi 1, 0, 0x5678",
+               "neg 1, 1",
+               "addi 2, 0, 0x1234",
+               "neg 2, 2",
+               "mullwo 3, 1, 2"]
+        self.run_tst_program(Program(lst), [3])
+
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
+                                    initial_mem=None):
+        initial_regs = [0] * 32
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs, 0,
+                                            initial_mem, 0)
+        self.test_data.append(tc)
+
+
+class MulDecoderTestCase(DecoderBase, MulTestCases):
+    pass
+
+
+if __name__ == "__main__":
+    unittest.main()
index de4193bc5f3bacd7311a984682e728c52484b6a4..f1265a3a17cc5be272cd4ec2d464ee09da00ff14 100644 (file)
@@ -15,9 +15,30 @@ from soc.decoder.isa.all import ISA
 from soc.fu.test.common import TestCase
 
 
-class Register:
-    def __init__(self, num):
-        self.num = num
+class AttnTestCase(FHDLTestCase):
+    test_data = []
+
+    def __init__(self, name="general"):
+        super().__init__(name)
+        self.test_name = name
+
+    def test_0_attn(self):
+        """simple test of attn.  program is 4 long: should halt at 2nd op
+        """
+        lst = ["addi 6, 0, 0x10",
+               "attn",
+               "subf. 1, 6, 7",
+               "cmp cr2, 1, 6, 7",
+               ]
+        with Program(lst) as program:
+            self.run_tst_program(program, [1])
+
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
+                                    initial_mem=None):
+        initial_regs = [0] * 32
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs, 0,
+                                            initial_mem, 0)
+        self.test_data.append(tc)
 
 
 class GeneralTestCases(FHDLTestCase):
@@ -209,7 +230,7 @@ class GeneralTestCases(FHDLTestCase):
 
 class DecoderBase:
 
-    def run_tst(self, generator, initial_mem=None):
+    def run_tst(self, generator, initial_mem=None, initial_pc=0):
         m = Module()
         comb = m.d.comb
 
@@ -220,9 +241,13 @@ class DecoderBase:
         pdecode = create_pdecode()
         m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
 
+        # place program at requested address
+        gen = (initial_pc, gen)
+
         simulator = ISA(pdecode2, [0] * 32, {}, 0, initial_mem, 0,
                         initial_insns=gen, respect_pc=True,
-                        disassembly=insn_code)
+                        disassembly=insn_code,
+                        initial_pc=initial_pc)
 
         sim = Simulator(m)
 
@@ -246,11 +271,12 @@ class DecoderBase:
 
     def run_tst_program(self, prog, reglist, initial_mem=None):
         import sys
-        simulator = self.run_tst(prog, initial_mem=initial_mem)
+        simulator = self.run_tst(prog, initial_mem=initial_mem,
+                                 initial_pc=0x20000000)
         prog.reset()
         with run_program(prog, initial_mem) as q:
             self.qemu_register_compare(simulator, q, reglist)
-            self.qemu_mem_compare(simulator, q, reglist)
+            self.qemu_mem_compare(simulator, q, True)
         print(simulator.gpr.dump())
 
     def qemu_mem_compare(self, sim, qemu, check=True):
@@ -280,14 +306,17 @@ class DecoderBase:
         print("qemu pc", hex(qpc))
         print("qemu cr", hex(qcr))
         print("qemu xer", bin(qxer))
+        print("sim nia", hex(sim.pc.NIA.value))
         print("sim pc", hex(sim.pc.CIA.value))
         print("sim cr", hex(sim_cr))
         print("sim xer", hex(sim_xer))
-        self.assertEqual(qcr, sim_cr)
+        self.assertEqual(qpc, sim_pc)
         for reg in regs:
             qemu_val = qemu.get_register(reg)
             sim_val = sim.gpr(reg).value
-            self.assertEqual(qemu_val, sim_val)
+            self.assertEqual(qemu_val, sim_val,
+                             "expect %x got %x" % (qemu_val, sim_val))
+        self.assertEqual(qcr, sim_cr)
 
 
 class DecoderTestCase(DecoderBase, GeneralTestCases):