From: Luke Kenneth Casson Leighton Date: Sun, 14 Jun 2020 14:00:12 +0000 (+0100) Subject: reasonably certain that the careful and slow use of little-endian data read/write X-Git-Tag: div_pipeline~386 X-Git-Url: https://git.libre-soc.org/?p=soc.git;a=commitdiff_plain;h=f11148ec9133fb151d8e83e06d61bf2fc1cb7d7b reasonably certain that the careful and slow use of little-endian data read/write and explicit endian-ness swapping is correct, when comparing the simulator against qemu --- diff --git a/src/soc/decoder/isa/caller.py b/src/soc/decoder/isa/caller.py index 4c2cfd83..1ed3cc90 100644 --- a/src/soc/decoder/isa/caller.py +++ b/src/soc/decoder/isa/caller.py @@ -26,6 +26,12 @@ special_sprs = { 'VRSAVE': 256} +def swap_order(x, nbytes): + x = x.to_bytes(nbytes, byteorder='little') + x = int.from_bytes(x, byteorder='big', signed=False) + return x + + def create_args(reglist, extra=None): args = OrderedSet() for reg in reglist: @@ -42,24 +48,25 @@ class Mem: self.mem = {} self.bytes_per_word = bytes_per_word self.word_log2 = math.ceil(math.log2(bytes_per_word)) + print ("Sim-Mem", initial_mem, self.bytes_per_word, self.word_log2) if not initial_mem: return - print ("Sim-Mem", initial_mem, self.bytes_per_word) for addr, (val, width) in initial_mem.items(): - self.st(addr, val, width) + #val = swap_order(val, width) + self.st(addr, val, width, swap=False) def _get_shifter_mask(self, wid, remainder): shifter = ((self.bytes_per_word - wid) - remainder) * \ 8 # bits per byte # XXX https://bugs.libre-soc.org/show_bug.cgi?id=377 # BE/LE mode? - # shifter = remainder * 8 + shifter = remainder * 8 mask = (1 << (wid * 8)) - 1 print ("width,rem,shift,mask", wid, remainder, hex(shifter), hex(mask)) return shifter, mask # TODO: Implement ld/st of lesser width - def ld(self, address, width=8): + def ld(self, address, width=8, swap=True): print("ld from addr 0x{:x} width {:d}".format(address, width)) remainder = address & (self.bytes_per_word - 1) address = address >> self.word_log2 @@ -75,16 +82,20 @@ class Mem: print ("masking", hex(val), hex(mask<>= shifter + if swap: + val = swap_order(val, width) print("Read 0x{:x} from addr 0x{:x}".format(val, address)) return val - def st(self, addr, v, width=8): + def st(self, addr, v, width=8, swap=True): staddr = addr remainder = addr & (self.bytes_per_word - 1) addr = addr >> self.word_log2 print("Writing 0x{:x} to ST 0x{:x} memaddr 0x{:x}/{:x}".format(v, - staddr, addr, remainder)) + staddr, addr, remainder, swap)) assert remainder & (width - 1) == 0, "Unaligned access unsupported!" + if swap: + v = swap_order(v, width) if width != self.bytes_per_word: if addr in self.mem: val = self.mem[addr] diff --git a/src/soc/fu/compunits/test/test_compunit.py b/src/soc/fu/compunits/test/test_compunit.py index 739f95fd..554b6ded 100644 --- a/src/soc/fu/compunits/test/test_compunit.py +++ b/src/soc/fu/compunits/test/test_compunit.py @@ -123,7 +123,7 @@ class TestRunner(FHDLTestCase): if self.funit == Function.LDST: from soc.experiment.l0_cache import TstL0CacheBuffer m.submodules.l0 = l0 = TstL0CacheBuffer(n_units=1, regwid=64, - addrwid=4) + addrwid=3) pi = l0.l0.dports[0].pi m.submodules.cu = cu = self.fukls(pi, awid=3) m.d.comb += cu.ad.go.eq(cu.ad.rel) # link addr-go direct to rel @@ -155,7 +155,7 @@ class TestRunner(FHDLTestCase): mem = l0.mem.mem print ("before, init mem", mem.depth, mem.width, mem) for i in range(mem.depth): - data = sim.mem.ld(i*8, 8) + data = sim.mem.ld(i*8, 8, False) print ("init ", i, hex(data)) yield mem._array[i].eq(data) yield Settle() @@ -254,7 +254,7 @@ class TestRunner(FHDLTestCase): print (" %6i %016x" % (i, actual_mem)) for i in range(mem.depth): - expected_mem = sim.mem.ld(i*8, 8) + expected_mem = sim.mem.ld(i*8, 8, False) actual_mem = yield mem._array[i] self.assertEqual(expected_mem, actual_mem, "%s %d %x %x" % (code, i, diff --git a/src/soc/fu/ldst/test/test_pipe_caller.py b/src/soc/fu/ldst/test/test_pipe_caller.py index 2c943c7e..25e72c02 100644 --- a/src/soc/fu/ldst/test/test_pipe_caller.py +++ b/src/soc/fu/ldst/test/test_pipe_caller.py @@ -94,12 +94,12 @@ class LDSTTestCase(FHDLTestCase): lst = ["sth 4, 0(2)", "lhz 4, 0(2)"] initial_regs = [0] * 32 - initial_regs[1] = 0x0004 - initial_regs[2] = 0x0002 + initial_regs[1] = 0x1004 + initial_regs[2] = 0x1002 initial_regs[3] = 0x15eb - initial_mem = {0x0000: (0x5432123412345678, 8), - 0x0008: (0xabcdef0187654321, 8), - 0x0020: (0x1828384822324252, 8), + initial_mem = {0x1000: (0x5432123412345678, 8), + 0x1008: (0xabcdef0187654321, 8), + 0x1020: (0x1828384822324252, 8), } self.run_tst_program(Program(lst), initial_regs, initial_mem=initial_mem) diff --git a/src/soc/simulator/qemu.py b/src/soc/simulator/qemu.py index 4130f863..c265a77f 100644 --- a/src/soc/simulator/qemu.py +++ b/src/soc/simulator/qemu.py @@ -34,12 +34,25 @@ class QemuController: breakstring = f' {breakpoint}' return self.gdb.write('-break-delete' + breakstring) + def set_byte(self, addr, v): + print ("qemu set byte", hex(addr), hex(v)) + faddr = '&{int}0x%x' % addr + res = self.gdb.write('-data-write-memory-bytes %s "%02x"' % (faddr, v)) + print ("confirm", self.get_mem(addr, 1)) + def get_mem(self, addr, nbytes): - res = self.gdb.write("-data-read-memory %d u 8 1 %d" % (addr, nbytes)) - print ("get_mem", res) + res = self.gdb.write("-data-read-memory %d u 1 1 %d" % (addr, 8*nbytes)) + #print ("get_mem", res) for x in res: if(x["type"]=="result"): - return x['payload']['memory'][0]['data'] + l = list(map(int, x['payload']['memory'][0]['data'])) + res = [] + for j in range(0, len(l), 8): + b = 0 + for i, v in enumerate(l[j:j+8]): + b += v << (i*8) + res.append(b) + return res return None def get_registers(self): @@ -83,11 +96,15 @@ class QemuController: self.qemu_popen.stdin.close() -def run_program(program): +def run_program(program, initial_mem=None): q = QemuController(program.binfile.name) q.connect() # Run to the start of the program q.break_address(0x20000000) + if initial_mem: + for addr, (v, wid) in initial_mem.items(): + for i in range(wid): + q.set_byte(addr+i, (v>>i*8) & 0xff) q.gdb_continue() # set the CR to 0, matching the simulator q.gdb_eval('$cr=0') diff --git a/src/soc/simulator/test_sim.py b/src/soc/simulator/test_sim.py index 76fc17f6..1fc2a46d 100644 --- a/src/soc/simulator/test_sim.py +++ b/src/soc/simulator/test_sim.py @@ -21,7 +21,7 @@ class Register: class DecoderTestCase(FHDLTestCase): - def run_tst(self, generator): + def run_tst(self, generator, initial_mem=None): m = Module() comb = m.d.comb instruction = Signal(32) @@ -30,7 +30,7 @@ class DecoderTestCase(FHDLTestCase): m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode) - simulator = ISA(pdecode2, [0] * 32, {}, 0, {}, 0) + simulator = ISA(pdecode2, [0] * 32, {}, 0, initial_mem, 0) comb += pdecode2.dec.raw_opcode_in.eq(instruction) comb += pdecode2.dec.bigendian.eq(0) gen = generator.generate_instructions() @@ -61,7 +61,7 @@ class DecoderTestCase(FHDLTestCase): return simulator - def test_0_cmp(self): + def _tst0_cmp(self): lst = ["addi 6, 0, 0x10", "addi 7, 0, 0x05", "subf. 1, 6, 7", @@ -70,32 +70,38 @@ class DecoderTestCase(FHDLTestCase): with Program(lst) as program: self.run_tst_program(program, [1]) - def test_example(self): - lst = ["addi 1, 0, 0x1234", - "addi 2, 0, 0x5678", + def _tstexample(self): + lst = ["addi 1, 0, 0x5678", + "addi 2, 0, 0x1234", "add 3, 1, 2", "and 4, 1, 2"] with Program(lst) as program: self.run_tst_program(program, [1, 2, 3, 4]) - def test_ldst(self): - lst = ["addi 1, 0, 0x1234", - "addi 2, 0, 0x5678", + def _tstldst(self): + lst = ["addi 1, 0, 0x5678", + "addi 2, 0, 0x1234", "stw 1, 0(2)", - "lwz 3, 0(2)"] + "lwz 3, 0(2)" + ] + initial_mem = {0x1230: (0x5432123412345678, 8), + 0x1238: (0xabcdef0187654321, 8), + } with Program(lst) as program: - self.run_tst_program(program, [1, 2, 3]) + self.run_tst_program(program, + [1, 2, 3], + initial_mem) - def test_ldst_extended(self): - lst = ["addi 1, 0, 0x1234", - "addi 2, 0, 0x5678", + def _tstldst_extended(self): + lst = ["addi 1, 0, 0x5678", + "addi 2, 0, 0x1234", "addi 4, 0, 0x40", "stw 1, 0x40(2)", "lwzx 3, 4, 2"] with Program(lst) as program: self.run_tst_program(program, [1, 2, 3]) - def test_0_ldst_widths(self): + def _tst0_ldst_widths(self): lst = ["addis 1, 0, 0xdead", "ori 1, 1, 0xbeef", "addi 2, 0, 0x1000", @@ -109,7 +115,7 @@ class DecoderTestCase(FHDLTestCase): with Program(lst) as program: self.run_tst_program(program, [1, 2, 3, 4, 5]) - def test_sub(self): + def _tstsub(self): lst = ["addi 1, 0, 0x1234", "addi 2, 0, 0x5678", "subf 3, 1, 2", @@ -118,7 +124,7 @@ class DecoderTestCase(FHDLTestCase): with Program(lst) as program: self.run_tst_program(program, [1, 2, 3, 4, 5]) - def test_add_with_carry(self): + def _tstadd_with_carry(self): lst = ["addi 1, 0, 5", "neg 1, 1", "addi 2, 0, 7", @@ -129,7 +135,7 @@ class DecoderTestCase(FHDLTestCase): with Program(lst) as program: self.run_tst_program(program, [1, 2, 3]) - def test_addis(self): + def _tstaddis(self): lst = ["addi 1, 0, 0x0FFF", "addis 1, 1, 0x0F" ] @@ -137,32 +143,57 @@ class DecoderTestCase(FHDLTestCase): self.run_tst_program(program, [1]) @unittest.skip("broken") - def test_mulli(self): + def _tstmulli(self): lst = ["addi 1, 0, 3", "mulli 1, 1, 2" ] with Program(lst) as program: self.run_tst_program(program, [1]) - def run_tst_program(self, prog, reglist): + def test_3_load_store(self): + lst = ["addi 1, 0, 0x1004", + "addi 2, 0, 0x1002", + "addi 3, 0, 0x15eb", + "sth 4, 0(2)", + "lhz 4, 0(2)"] + initial_regs = [0] * 32 + initial_regs[1] = 0x0004 + initial_regs[2] = 0x0002 + initial_regs[3] = 0x15eb + initial_mem = {0x1000: (0x5432123412345678, 8), + 0x1008: (0xabcdef0187654321, 8), + 0x1020: (0x1828384822324252, 8), + } + with Program(lst) as program: + self.run_tst_program(program, [1,2,3,4], initial_mem) + + def run_tst_program(self, prog, reglist, initial_mem=None): import sys - simulator = self.run_tst(prog) + simulator = self.run_tst(prog, initial_mem=initial_mem) prog.reset() - with run_program(prog) as q: + with run_program(prog, initial_mem) as q: self.qemu_register_compare(simulator, q, reglist) self.qemu_mem_compare(simulator, q, reglist) print(simulator.gpr.dump()) - def qemu_mem_compare(self, sim, qemu, regs): - addr = 0x1000 - qmemdump = qemu.get_mem(addr, 16) - for i in range(len(qmemdump)): - s = hex(int(qmemdump[i])) - print ("qemu mem %06x %s" % (addr+i*8, s)) + def qemu_mem_compare(self, sim, qemu, check=True): + if False: # disable convenient large interesting debugging memory dump + addr = 0x0 + qmemdump = qemu.get_mem(addr, 2048) + for i in range(len(qmemdump)): + s = hex(int(qmemdump[i])) + print ("qemu mem %06x %s" % (addr+i*8, s)) + for k, v in sim.mem.mem.items(): + qmemdump = qemu.get_mem(k*8, 8) + s = hex(int(qmemdump[0]))[2:] + print ("qemu mem %06x %16s" % (k*8, s)) for k, v in sim.mem.mem.items(): - print ("sim %06x %016x" % (k, v)) + print ("sim mem %06x %016x" % (k*8, v)) + if not check: + return for k, v in sim.mem.mem.items(): - self.assertEqual(int(qmemdump[(k-0x200)//8]), v) # magic constant?? + qmemdump = qemu.get_mem(k*8, 1) + self.assertEqual(int(qmemdump[0]), v) def qemu_register_compare(self, sim, qemu, regs): qpc, qxer, qcr = qemu.get_pc(), qemu.get_xer(), qemu.get_cr()