add optional instruction memory
[soc.git] / src / soc / decoder / isa / caller.py
index 6e1b895c684e226b7c4bc96aea862266d36a5e7a..6f444339fafa2dc99f9837270febaaee800072d9 100644 (file)
@@ -1,3 +1,10 @@
+"""core of the python-based POWER9 simulator
+
+this is part of a cycle-accurate POWER9 simulator.  its primary purpose is
+not speed, it is for both learning and educational purposes, as well as
+a method of verifying the HDL.
+"""
+
 from functools import wraps
 from soc.decoder.orderedset import OrderedSet
 from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
@@ -19,6 +26,12 @@ special_sprs = {
     'VRSAVE': 256}
 
 
+def swap_order(x, nbytes):
+    x = x.to_bytes(nbytes, byteorder='little')
+    x = int.from_bytes(x, byteorder='big', signed=False)
+    return x
+
+
 def create_args(reglist, extra=None):
     args = OrderedSet()
     for reg in reglist:
@@ -31,25 +44,39 @@ def create_args(reglist, extra=None):
 
 class Mem:
 
-    def __init__(self, bytes_per_word=8, initial_mem=None):
+    def __init__(self, row_bytes=8, initial_mem=None):
         self.mem = {}
-        self.bytes_per_word = bytes_per_word
-        self.word_log2 = math.ceil(math.log2(bytes_per_word))
+        self.bytes_per_word = row_bytes
+        self.word_log2 = math.ceil(math.log2(row_bytes))
+        print ("Sim-Mem", initial_mem, self.bytes_per_word, self.word_log2)
         if not initial_mem:
             return
-        print ("Sim-Mem", initial_mem, self.bytes_per_word)
+
+        # different types of memory data structures recognised (for convenience)
+        if isinstance(initial_mem, list):
+            initial_mem = (0, initial_mem)
+        if isinstance(initial_mem, tuple):
+            startaddr, mem = initial_mem
+            initial_mem = {}
+            for i, val in enumerate(mem):
+                initial_mem[startaddr + row_bytes*i] = (val, row_bytes)
+
         for addr, (val, width) in initial_mem.items():
-            self.st(addr, val, width)
+            #val = swap_order(val, width)
+            self.st(addr, val, width, swap=False)
 
     def _get_shifter_mask(self, wid, remainder):
         shifter = ((self.bytes_per_word - wid) - remainder) * \
             8  # bits per byte
+        # XXX https://bugs.libre-soc.org/show_bug.cgi?id=377
+        # BE/LE mode?
+        shifter = remainder * 8
         mask = (1 << (wid * 8)) - 1
         print ("width,rem,shift,mask", wid, remainder, hex(shifter), hex(mask))
         return shifter, mask
 
     # TODO: Implement ld/st of lesser width
-    def ld(self, address, width=8):
+    def ld(self, address, width=8, swap=True):
         print("ld from addr 0x{:x} width {:d}".format(address, width))
         remainder = address & (self.bytes_per_word - 1)
         address = address >> self.word_log2
@@ -65,14 +92,20 @@ class Mem:
             print ("masking", hex(val), hex(mask<<shifter), shifter)
             val = val & (mask << shifter)
             val >>= shifter
+        if swap:
+            val = swap_order(val, width)
         print("Read 0x{:x} from addr 0x{:x}".format(val, address))
         return val
 
-    def st(self, addr, v, width=8):
+    def st(self, addr, v, width=8, swap=True):
+        staddr = addr
         remainder = addr & (self.bytes_per_word - 1)
         addr = addr >> self.word_log2
-        print("Writing 0x{:x} to addr 0x{:x}/{:x}".format(v, addr, remainder))
+        print("Writing 0x{:x} to ST 0x{:x} memaddr 0x{:x}/{:x}".format(v,
+                        staddr, addr, remainder, swap))
         assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
+        if swap:
+            v = swap_order(v, width)
         if width != self.bytes_per_word:
             if addr in self.mem:
                 val = self.mem[addr]
@@ -172,20 +205,29 @@ class SPR(dict):
 
     def __call__(self, ridx):
         return self[ridx]
-        
-        
+
 
 class ISACaller:
     # decoder2 - an instance of power_decoder2
     # regfile - a list of initial values for the registers
+    # initial_{etc} - initial values for SPRs, Condition Register, Mem, MSR
     def __init__(self, decoder2, regfile, initial_sprs=None, initial_cr=0,
-                       initial_mem=None, initial_msr=0):
+                       initial_mem=None, initial_msr=0,
+                       initial_insns=None):
         if initial_sprs is None:
             initial_sprs = {}
         if initial_mem is None:
             initial_mem = {}
+        if initial_insns is None:
+            initial_insns = {}
+            self.respect_pc = False
+        else:
+            # setup batch of instructions: we want to respect (follow) the PC
+            self.respect_pc = True
+
         self.gpr = GPR(decoder2, regfile)
-        self.mem = Mem(initial_mem=initial_mem)
+        self.mem = Mem(row_bytes=8, initial_mem=initial_mem)
+        self.insns = Mem(row_bytes=4, initial_mem=initial_insns)
         self.pc = PC()
         self.spr = SPR(decoder2, initial_sprs)
         self.msr = SelectableInt(initial_msr, 64) # underlying reg
@@ -235,8 +277,8 @@ class ISACaller:
 
     def TRAP(self, trap_addr=0x700):
         print ("TRAP: TODO")
-        # store PC in SRR0, set PC to 0x700
-        # store MSR in SRR1, set MSR to um errr something
+        # store CIA(+4?) in SRR0, set NIA to 0x700
+        # store MSR in SRR1, set MSR to um errr something, have to check spec
 
     def memassign(self, ea, sz, val):
         self.mem.memassign(ea, sz, val)
@@ -261,8 +303,9 @@ class ISACaller:
 
         self.namespace['XER'] = self.spr['XER']
         self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
+        self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
 
-    def handle_carry_(self, inputs, outputs):
+    def handle_carry_(self, inputs, outputs, already_done):
         inv_a = yield self.dec2.e.invert_a
         if inv_a:
             inputs[0] = ~inputs[0]
@@ -276,14 +319,16 @@ class ISACaller:
         gts = [(x > output) for x in inputs]
         print(gts)
         cy = 1 if any(gts) else 0
-        self.spr['XER'][XER_bits['CA']] = cy
-
+        if not (1 & already_done):
+            self.spr['XER'][XER_bits['CA']] = cy
 
+        print ("inputs", inputs)
         # 32 bit carry
         gts = [(x[32:64] > output[32:64]) == SelectableInt(1, 1)
                for x in inputs]
         cy32 = 1 if any(gts) else 0
-        self.spr['XER'][XER_bits['CA32']] = cy32
+        if not (2 & already_done):
+            self.spr['XER'][XER_bits['CA32']] = cy32
 
     def handle_overflow(self, inputs, outputs):
         inv_a = yield self.dec2.e.invert_a
@@ -295,14 +340,24 @@ class ISACaller:
             imm = yield self.dec2.e.imm_data.data
             inputs.append(SelectableInt(imm, 64))
         assert len(outputs) >= 1
+        print ("handle_overflow", inputs, outputs)
         if len(inputs) >= 2:
             output = outputs[0]
+
+            # OV (64-bit)
             input_sgn = [exts(x.value, x.bits) < 0 for x in inputs]
             output_sgn = exts(output.value, output.bits) < 0
             ov = 1 if input_sgn[0] == input_sgn[1] and \
                 output_sgn != input_sgn[0] else 0
 
+            # OV (32-bit)
+            input32_sgn = [exts(x.value, 32) < 0 for x in inputs]
+            output32_sgn = exts(output.value, 32) < 0
+            ov32 = 1 if input32_sgn[0] == input32_sgn[1] and \
+                output32_sgn != input32_sgn[0] else 0
+
             self.spr['XER'][XER_bits['OV']] = ov
+            self.spr['XER'][XER_bits['OV32']] = ov32
             so = self.spr['XER'][XER_bits['SO']]
             so = so | ov
             self.spr['XER'][XER_bits['SO']] = so
@@ -322,7 +377,6 @@ class ISACaller:
     def set_pc(self, pc_val):
         self.namespace['NIA'] = SelectableInt(pc_val, 64)
         self.pc.update(self.namespace)
-        
 
     def call(self, name):
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
@@ -354,11 +408,23 @@ class ISACaller:
         results = info.func(self, *inputs)
         print(results)
 
+        # detect if CA/CA32 already in outputs (sra*, basically)
+        already_done = 0
+        if info.write_regs:
+            output_names = create_args(info.write_regs)
+            for name in output_names:
+                if name == 'CA':
+                    already_done |= 1
+                if name == 'CA32':
+                    already_done |= 2
+
+        print ("carry already done?", bin(already_done))
         carry_en = yield self.dec2.e.output_carry
         if carry_en:
-            yield from self.handle_carry_(inputs, results)
-        ov_en = yield self.dec2.e.oe
-        if ov_en:
+            yield from self.handle_carry_(inputs, results, already_done)
+        ov_en = yield self.dec2.e.oe.oe
+        ov_ok = yield self.dec2.e.oe.ok
+        if ov_en & ov_ok:
             yield from self.handle_overflow(inputs, results)
         rc_en = yield self.dec2.e.rc.data
         if rc_en:
@@ -366,12 +432,17 @@ class ISACaller:
 
         # any modified return results?
         if info.write_regs:
-            output_names = create_args(info.write_regs)
             for name, output in zip(output_names, results):
                 if isinstance(output, int):
                     output = SelectableInt(output, 256)
-                if name in info.special_regs:
-                    print('writing special %s' % name, output)
+                if name in ['CA', 'CA32']:
+                    if carry_en:
+                        print ("writing %s to XER" % name, output)
+                        self.spr['XER'][XER_bits[name]] = output.value
+                    else:
+                        print ("NOT writing %s to XER" % name, output)
+                elif name in info.special_regs:
+                    print('writing special %s' % name, output, special_sprs)
                     if name in special_sprs:
                         self.spr[name] = output
                     else: